From eef3dc34a1e0b01d53328b88c25237bcc7323777 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 17 Oct 2018 17:54:00 -0700 Subject: ARM: OMAP2+: prm44xx: Fix section annotation on omap44xx_prm_enable_io_wakeup When building the kernel with Clang, the following section mismatch warning appears: WARNING: vmlinux.o(.text+0x38b3c): Section mismatch in reference from the function omap44xx_prm_late_init() to the function .init.text:omap44xx_prm_enable_io_wakeup() The function omap44xx_prm_late_init() references the function __init omap44xx_prm_enable_io_wakeup(). This is often because omap44xx_prm_late_init lacks a __init annotation or the annotation of omap44xx_prm_enable_io_wakeup is wrong. Remove the __init annotation from omap44xx_prm_enable_io_wakeup so there is no more mismatch. Signed-off-by: Nathan Chancellor Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/prm44xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/prm44xx.c b/arch/arm/mach-omap2/prm44xx.c index 7b95729e8359..38a1be6c3694 100644 --- a/arch/arm/mach-omap2/prm44xx.c +++ b/arch/arm/mach-omap2/prm44xx.c @@ -351,7 +351,7 @@ static void omap44xx_prm_reconfigure_io_chain(void) * to occur, WAKEUPENABLE bits must be set in the pad mux registers, and * omap44xx_prm_reconfigure_io_chain() must be called. No return value. */ -static void __init omap44xx_prm_enable_io_wakeup(void) +static void omap44xx_prm_enable_io_wakeup(void) { s32 inst = omap4_prmst_get_prm_dev_inst(); -- cgit v1.2.3-70-g09d2 From 13682e524167cbd7e2a26c5e91bec765f0f96273 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 17 Oct 2018 11:18:30 +0200 Subject: arm64: dts: rockchip: remove vdd_log from rock960 to fix a stability issues When the performance governor is set as default, the rock960 hangs around one minute after booting, whatever the activity is (idle, key pressed, loaded, ...). Based on the commit log found at https://patchwork.kernel.org/patch/10092377/ "vdd_log has no consumer and therefore will not be set to a specific voltage. Still the PWM output pin gets configured and thence the vdd_log output voltage will changed from it's default. Depending on the idle state of the PWM this will slightly over or undervoltage the logic supply of the RK3399 and cause instability with GbE (undervoltage) and PCIe (overvoltage). Since the default value set by a voltage divider is the correct supply voltage and we don't need to change it during runtime we remove the rail from the devicetree completely so the PWM pin will not be configured." After removing the vdd-log from the rock960's specific DT, the board does no longer hang and shows a stable behavior. Apply the same change for the rock960 by removing the vdd-log from the DT. Fixes: 874846f1fccd ("arm64: dts: rockchip: add 96boards RK3399 Ficus board") Cc: stable@vger.kernel.org Tested-by: Manivannan Sadhasivam Signed-off-by: Daniel Lezcano Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi index 6c8c4ab044aa..56abbb08c133 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi @@ -57,18 +57,6 @@ regulator-always-on; vin-supply = <&vcc_sys>; }; - - vdd_log: vdd-log { - compatible = "pwm-regulator"; - pwms = <&pwm2 0 25000 0>; - regulator-name = "vdd_log"; - regulator-min-microvolt = <800000>; - regulator-max-microvolt = <1400000>; - regulator-always-on; - regulator-boot-on; - vin-supply = <&vcc_sys>; - }; - }; &cpu_l0 { -- cgit v1.2.3-70-g09d2 From e46daee53bb50bde38805f1823a182979724c229 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 30 Oct 2018 22:12:56 +0100 Subject: ARM: 8806/1: kprobes: Fix false positive with FORTIFY_SOURCE The arm compiler internally interprets an inline assembly label as an unsigned long value, not a pointer. As a result, under CONFIG_FORTIFY_SOURCE, the address of a label has a size of 4 bytes, which was tripping the runtime checks. Instead, we can just cast the label (as done with the size calculations earlier). Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1639397 Reported-by: William Cohen Fixes: 6974f0c4555e ("include/linux/string.h: add the option of fortified string.h functions") Cc: stable@vger.kernel.org Acked-by: Laura Abbott Acked-by: Masami Hiramatsu Tested-by: William Cohen Signed-off-by: Kees Cook Signed-off-by: Russell King --- arch/arm/probes/kprobes/opt-arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index b2aa9b32bff2..2c118a6ab358 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -247,7 +247,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *or } /* Copy arch-dep-instance from template. */ - memcpy(code, &optprobe_template_entry, + memcpy(code, (unsigned char *)optprobe_template_entry, TMPL_END_IDX * sizeof(kprobe_opcode_t)); /* Adjust buffer according to instruction. */ -- cgit v1.2.3-70-g09d2 From e7f4ffffa972db4820c23ff9831a6a4f3f6d8cc5 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sun, 28 Oct 2018 15:13:48 -0500 Subject: ARM: dts: am3517: Fix pinmuxing for CD on MMC1 The MMC1 is active low, not active high. For some reason, this worked with different combination of U-Boot and kernels, but it's supposed to be active low and is currently broken. Fixes: cfaa856a2510 ("ARM: dts: am3517: Add pinmuxing, CD and WP for MMC1") #kernel 4.18+ Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am3517-evm.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am3517-evm.dts b/arch/arm/boot/dts/am3517-evm.dts index d4d33cd7adad..1e2bb68231ad 100644 --- a/arch/arm/boot/dts/am3517-evm.dts +++ b/arch/arm/boot/dts/am3517-evm.dts @@ -228,7 +228,7 @@ vmmc-supply = <&vmmc_fixed>; bus-width = <4>; wp-gpios = <&gpio4 30 GPIO_ACTIVE_HIGH>; /* gpio_126 */ - cd-gpios = <&gpio4 31 GPIO_ACTIVE_HIGH>; /* gpio_127 */ + cd-gpios = <&gpio4 31 GPIO_ACTIVE_LOW>; /* gpio_127 */ }; &mmc3 { -- cgit v1.2.3-70-g09d2 From 6809564d64ff1301d44c13334cc0e8369e825a20 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sun, 28 Oct 2018 15:28:32 -0500 Subject: ARM: dts: LogicPD Torpedo: Fix mmc3_dat1 interrupt When the Torpedo was first introduced back at Kernel 4.2, the interrupt extended flag has been set incorrectly. It was subsequently moved, so this patch corrects Kernel 4.18+ Fixes: a38867305203 ("ARM: dts: Move move WiFi bindings to logicpd-torpedo-37xx-devkit") # v4.18+ Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts index 9d5d53fbe9c0..c39cf2ca54da 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts @@ -35,7 +35,7 @@ * jumpering combinations for the long run. */ &mmc3 { - interrupts-extended = <&intc 94 &omap3_pmx_core2 0x46>; + interrupts-extended = <&intc 94 &omap3_pmx_core 0x136>; pinctrl-0 = <&mmc3_pins &mmc3_core2_pins>; pinctrl-names = "default"; vmmc-supply = <&wl12xx_vmmc>; -- cgit v1.2.3-70-g09d2 From 3d8b804bc528d3720ec0c39c212af92dafaf6e84 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sun, 28 Oct 2018 15:29:27 -0500 Subject: ARM: dts: logicpd-somlv: Fix interrupt on mmc3_dat1 The interrupt on mmc3_dat1 is wrong which prevents this from appearing in /proc/interrupts. Fixes: ab8dd3aed011 ("ARM: DTS: Add minimal Support for Logic PD DM3730 SOM-LV") #Kernel 4.9+ Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/logicpd-som-lv.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index ac343330d0c8..98b682a8080c 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -129,7 +129,7 @@ }; &mmc3 { - interrupts-extended = <&intc 94 &omap3_pmx_core2 0x46>; + interrupts-extended = <&intc 94 &omap3_pmx_core 0x136>; pinctrl-0 = <&mmc3_pins &wl127x_gpio>; pinctrl-names = "default"; vmmc-supply = <&wl12xx_vmmc>; -- cgit v1.2.3-70-g09d2 From 419b194cdedc76d0d3cd5b0900db0fa8177c4a52 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sun, 28 Oct 2018 15:34:21 -0500 Subject: ARM: dts: am3517-som: Fix WL127x Wifi interrupt At the same time the AM3517 EVM was gaining WiFi support, separate patches were introduced to move the interrupt from HIGH to RISING. Because they overlapped, this was not done to the AM3517-EVM. This patch fixes Kernel 4.19+ Fixes: 6bf5e3410f19 ("ARM: dts: am3517-som: Add WL127x Wifi") Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am3517-som.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am3517-som.dtsi b/arch/arm/boot/dts/am3517-som.dtsi index dae6e458e59f..b1c988eed87c 100644 --- a/arch/arm/boot/dts/am3517-som.dtsi +++ b/arch/arm/boot/dts/am3517-som.dtsi @@ -163,7 +163,7 @@ compatible = "ti,wl1271"; reg = <2>; interrupt-parent = <&gpio6>; - interrupts = <10 IRQ_TYPE_LEVEL_HIGH>; /* gpio_170 */ + interrupts = <10 IRQ_TYPE_EDGE_RISING>; /* gpio_170 */ ref-clock-frequency = <26000000>; tcxo-clock-frequency = <26000000>; }; -- cgit v1.2.3-70-g09d2 From cec83ff1241ec98113a19385ea9e9cfa9aa4125b Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Wed, 7 Nov 2018 22:30:31 +0100 Subject: ARM: OMAP1: ams-delta: Fix possible use of uninitialized field While playing with initialization order of modem device, it has been discovered that under some circumstances (early console init, I believe) its .pm() callback may be called before the uart_port->private_data pointer is initialized from plat_serial8250_port->private_data, resulting in NULL pointer dereference. Fix it by checking for uninitialized pointer before using it in modem_pm(). Fixes: aabf31737a6a ("ARM: OMAP1: ams-delta: update the modem to use regulator API") Signed-off-by: Janusz Krzysztofik Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/board-ams-delta.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index 3d191fd52910..17886744dbe6 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -750,6 +750,9 @@ static void modem_pm(struct uart_port *port, unsigned int state, unsigned old) struct modem_private_data *priv = port->private_data; int ret; + if (!priv) + return; + if (IS_ERR(priv->regulator)) return; -- cgit v1.2.3-70-g09d2 From 8588eac3ff66e77fb681591714fd76da3a4da80d Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Thu, 27 Sep 2018 10:31:46 +0530 Subject: arm64: dts: ti: k3-am654: Fix wakeup_uart reg address cbass_wakeup interconnect which is the parent of wakeup_uart node defines address-cells=1 and size-cells=1, therefore fix up reg property of wakeup_uart node accordingly. Otherwise, this UART instance fails to probe if enabled. Fixes: 4201af2544b3 ("arm64: dts: ti: am654: Add uart nodes") Signed-off-by: Vignesh R Reviewed-by: Lokesh Vutla Signed-off-by: Tero Kristo --- arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi index affc3c309353..8d7b47f9dfbf 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi @@ -36,7 +36,7 @@ wkup_uart0: serial@42300000 { compatible = "ti,am654-uart"; - reg = <0x00 0x42300000 0x00 0x100>; + reg = <0x42300000 0x100>; reg-shift = <2>; reg-io-width = <4>; interrupts = ; -- cgit v1.2.3-70-g09d2 From 5f8d3ab136d0ccb59c4d628d8f85e0d8f2761d07 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 2 Nov 2018 14:45:32 -0700 Subject: arm64: dts: sdm845-mtp: Reserve reserved gpios With the introduction of commit 3edfb7bd76bd ("gpiolib: Show correct direction from the beginning") the gpiolib will attempt to read the direction of all pins, which triggers a read from protected register regions. The pins 0 through 3 and 81 through 84 are protected, so mark these as reserved. Signed-off-by: Bjorn Andersson Reviewed-by: Stephen Boyd Reviewed-by: Linus Walleij Signed-off-by: Andy Gross --- arch/arm64/boot/dts/qcom/sdm845-mtp.dts | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts index eedfaf8922e2..d667eee4e6d0 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts @@ -352,6 +352,10 @@ status = "okay"; }; +&tlmm { + gpio-reserved-ranges = <0 4>, <81 4>; +}; + &uart9 { status = "okay"; }; -- cgit v1.2.3-70-g09d2 From 9134586715e389fe90d9d28cb37a668f374d686a Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 29 Oct 2018 22:45:54 -0700 Subject: arm64: dts: qcom: msm8998: Reserve gpio ranges on MTP GPIOs 0 through 3 and 81 through 84 are configured to not be accessible from the application CPUs. Mark them as reserved to allow the MSM8998 MTP to boot after the introduction of 3edfb7bd76bd ("gpiolib: Show correct direction from the beginning"). Signed-off-by: Bjorn Andersson Reviewed-by: Jeffrey Hugo Signed-off-by: Andy Gross --- arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi index b4276da1fb0d..11fd1fe8bdb5 100644 --- a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi @@ -241,3 +241,7 @@ }; }; }; + +&tlmm { + gpio-reserved-ranges = <0 4>, <81 4>; +}; -- cgit v1.2.3-70-g09d2 From 2c519f583e84eb578d4db48e38160f58aafad2ac Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 10 Oct 2018 16:46:17 +0200 Subject: ARC: remove redundant 'default n' from Kconfig 'default n' is the default value for any bool or tristate Kconfig setting so there is no need to write it explicitly. Also since commit f467c5640c29 ("kconfig: only write '# CONFIG_FOO is not set' for visible symbols") the Kconfig behavior is the same regardless of 'default n' being present or not: ... One side effect of (and the main motivation for) this change is making the following two definitions behave exactly the same: config FOO bool config FOO bool default n With this change, neither of these will generate a '# CONFIG_FOO is not set' line (assuming FOO isn't selected/implied). That might make it clearer to people that a bare 'default n' is redundant. ... Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index c9e2a1323536..5fcbda6b37cc 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -176,13 +176,11 @@ endchoice config CPU_BIG_ENDIAN bool "Enable Big Endian Mode" - default n help Build kernel for Big Endian Mode of ARC CPU config SMP bool "Symmetric Multi-Processing" - default n select ARC_MCIP if ISA_ARCV2 help This enables support for systems with more than one CPU. @@ -254,7 +252,6 @@ config ARC_CACHE_PAGES config ARC_CACHE_VIPT_ALIASING bool "Support VIPT Aliasing D$" depends on ARC_HAS_DCACHE && ISA_ARCOMPACT - default n endif #ARC_CACHE @@ -262,7 +259,6 @@ config ARC_HAS_ICCM bool "Use ICCM" help Single Cycle RAMS to store Fast Path Code - default n config ARC_ICCM_SZ int "ICCM Size in KB" @@ -273,7 +269,6 @@ config ARC_HAS_DCCM bool "Use DCCM" help Single Cycle RAMS to store Fast Path Data - default n config ARC_DCCM_SZ int "DCCM Size in KB" @@ -366,13 +361,11 @@ if ISA_ARCOMPACT config ARC_COMPACT_IRQ_LEVELS bool "Setup Timer IRQ as high Priority" - default n # if SMP, LV2 enabled ONLY if ARC implementation has LV2 re-entrancy depends on !SMP config ARC_FPU_SAVE_RESTORE bool "Enable FPU state persistence across context switch" - default n help Double Precision Floating Point unit had dedicated regs which need to be saved/restored across context-switch. @@ -453,7 +446,6 @@ config HIGHMEM config ARC_HAS_PAE40 bool "Support for the 40-bit Physical Address Extension" - default n depends on ISA_ARCV2 select HIGHMEM select PHYS_ADDR_T_64BIT @@ -496,7 +488,6 @@ config HZ config ARC_METAWARE_HLINK bool "Support for Metaware debugger assisted Host access" - default n help This options allows a Linux userland apps to directly access host file system (open/creat/read/write etc) with help from @@ -524,13 +515,11 @@ config ARC_DW2_UNWIND config ARC_DBG_TLB_PARANOIA bool "Paranoia Checks in Low Level TLB Handlers" - default n endif config ARC_UBOOT_SUPPORT bool "Support uboot arg Handling" - default n help ARC Linux by default checks for uboot provided args as pointers to external cmdline or DTB. This however breaks in absence of uboot, -- cgit v1.2.3-70-g09d2 From 3624379d90ad2b65f9dbb30d7f7ce5498d2fe322 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Thu, 4 Oct 2018 16:12:12 +0300 Subject: ARC: IOC: panic if kernel was started with previously enabled IOC If IOC was already enabled (due to bootloader) it technically needs to be reconfigured with aperture base,size corresponding to Linux memory map which will certainly be different than uboot's. But disabling and reenabling IOC when DMA might be potentially active is tricky business. To avoid random memory issues later, just panic here and ask user to upgrade bootloader to one which doesn't enable IOC This was actually seen as issue on some of the HSDK board with a version of uboot which enabled IOC. There were random issues later with starting of X or peripherals etc. Also while I'm at it, replace hardcoded bits in ARC_REG_IO_COH_PARTIAL and ARC_REG_IO_COH_ENABLE registers by definitions. Inspired by: https://lkml.org/lkml/2018/1/19/557 Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/include/asm/cache.h | 2 ++ arch/arc/mm/cache.c | 20 +++++++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index ff7d3232764a..f393b663413e 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -113,7 +113,9 @@ extern unsigned long perip_base, perip_end; /* IO coherency related Auxiliary registers */ #define ARC_REG_IO_COH_ENABLE 0x500 +#define ARC_IO_COH_ENABLE_BIT BIT(0) #define ARC_REG_IO_COH_PARTIAL 0x501 +#define ARC_IO_COH_PARTIAL_BIT BIT(0) #define ARC_REG_IO_COH_AP0_BASE 0x508 #define ARC_REG_IO_COH_AP0_SIZE 0x509 diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index f2701c13a66b..cf9619d4efb4 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -1144,6 +1144,20 @@ noinline void __init arc_ioc_setup(void) { unsigned int ioc_base, mem_sz; + /* + * If IOC was already enabled (due to bootloader) it technically needs to + * be reconfigured with aperture base,size corresponding to Linux memory map + * which will certainly be different than uboot's. But disabling and + * reenabling IOC when DMA might be potentially active is tricky business. + * To avoid random memory issues later, just panic here and ask user to + * upgrade bootloader to one which doesn't enable IOC + */ + if (read_aux_reg(ARC_REG_IO_COH_ENABLE) & ARC_IO_COH_ENABLE_BIT) + panic("IOC already enabled, please upgrade bootloader!\n"); + + if (!ioc_enable) + return; + /* * As for today we don't support both IOC and ZONE_HIGHMEM enabled * simultaneously. This happens because as of today IOC aperture covers @@ -1187,8 +1201,8 @@ noinline void __init arc_ioc_setup(void) panic("IOC Aperture start must be aligned to the size of the aperture"); write_aux_reg(ARC_REG_IO_COH_AP0_BASE, ioc_base >> 12); - write_aux_reg(ARC_REG_IO_COH_PARTIAL, 1); - write_aux_reg(ARC_REG_IO_COH_ENABLE, 1); + write_aux_reg(ARC_REG_IO_COH_PARTIAL, ARC_IO_COH_PARTIAL_BIT); + write_aux_reg(ARC_REG_IO_COH_ENABLE, ARC_IO_COH_ENABLE_BIT); /* Re-enable L1 dcache */ __dc_enable(); @@ -1265,7 +1279,7 @@ void __init arc_cache_init_master(void) if (is_isa_arcv2() && l2_line_sz && !slc_enable) arc_slc_disable(); - if (is_isa_arcv2() && ioc_enable) + if (is_isa_arcv2() && ioc_exists) arc_ioc_setup(); if (is_isa_arcv2() && l2_line_sz && slc_enable) { -- cgit v1.2.3-70-g09d2 From afba5d157fe1f2e64bc3df46fe83841657ec8fdd Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 29 Oct 2018 09:44:18 -0700 Subject: ARCv2: boot log unaligned access in use ARC gcc 8.x generates unaligned accesses by default, so call that out Signed-off-by: Vineet Gupta --- arch/arc/kernel/setup.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index b2cae79a25d7..eea8c5ce6335 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -243,7 +243,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) { struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id]; struct bcr_identity *core = &cpu->core; - int i, n = 0; + int i, n = 0, ua = 0; FIX_PTR(cpu); @@ -263,10 +263,13 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) IS_AVAIL2(cpu->extn.rtc, "RTC [UP 64-bit] ", CONFIG_ARC_TIMERS_64BIT), IS_AVAIL2(cpu->extn.gfrc, "GFRC [SMP 64-bit] ", CONFIG_ARC_TIMERS_64BIT)); - n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s", +#ifdef __ARC_UNALIGNED__ + ua = 1; +#endif + n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s%s", IS_AVAIL2(cpu->isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC), IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64), - IS_AVAIL1(cpu->isa.unalign, "unalign (not used)")); + IS_AVAIL1(cpu->isa.unalign, "unalign "), IS_USED_RUN(ua)); if (i) n += scnprintf(buf + n, len - n, "\n\t\t: "); -- cgit v1.2.3-70-g09d2 From 4592f11e47a2b28562d6cfe165d5ea7495ff4dca Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Tue, 23 Oct 2018 15:09:19 +0300 Subject: ARC: [plat-hsdk] Enable DW APB GPIO support Enable GPIO support on HSDK. HSDK SoC includes Synopsys DesignWare DW_apb_gpio IP with 24 GPIOs mapped onto port A. Signed-off-by: Eugeniy Paltsev Acked-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/hsdk.dts | 15 +++++++++++++++ arch/arc/configs/hsdk_defconfig | 3 +++ 2 files changed, 18 insertions(+) (limited to 'arch') diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index ef149f59929a..43f17b51ee89 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -222,6 +222,21 @@ bus-width = <4>; dma-coherent; }; + + gpio: gpio@3000 { + compatible = "snps,dw-apb-gpio"; + reg = <0x3000 0x20>; + #address-cells = <1>; + #size-cells = <0>; + + gpio_port_a: gpio-controller@0 { + compatible = "snps,dw-apb-gpio-port"; + gpio-controller; + #gpio-cells = <2>; + snps,nr-gpios = <24>; + reg = <0>; + }; + }; }; memory@80000000 { diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 1dec2b4bc5e6..eca10b8baea5 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -45,6 +45,9 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set +CONFIG_GPIOLIB=y +CONFIG_GPIO_SYSFS=y +CONFIG_GPIO_DWAPB=y # CONFIG_HWMON is not set CONFIG_DRM=y # CONFIG_DRM_FBDEV_EMULATION is not set -- cgit v1.2.3-70-g09d2 From 121e38e5acdc8e1e4cdb750fcdcc72f94e420968 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 7 Nov 2018 15:12:49 +0300 Subject: ARC: mm: fix uninitialised signal code in do_page_fault Commit 15773ae938d8 ("signal/arc: Use force_sig_fault where appropriate") introduced undefined behaviour by leaving si_code unitiailized and leaking random kernel values to user space. Fixes: 15773ae938d8 ("signal/arc: Use force_sig_fault where appropriate") Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index c9da6102eb4f..e2d9fc3fea01 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -66,7 +66,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) struct vm_area_struct *vma = NULL; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; - int si_code; + int si_code = 0; int ret; vm_fault_t fault; int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */ -- cgit v1.2.3-70-g09d2 From c1d91f86a1b4c9c05854d59c6a0abd5d0f75b849 Mon Sep 17 00:00:00 2001 From: Christoph Muellner Date: Tue, 13 Nov 2018 11:25:35 +0100 Subject: arm64: dts: rockchip: Fix PCIe reset polarity for rk3399-puma-haikou. This patch fixes the wrong polarity setting for the PCIe host driver's pre-reset pin for rk3399-puma-haikou. Without this patch link training will most likely fail. Fixes: 60fd9f72ce8a ("arm64: dts: rockchip: add Haikou baseboard with RK3399-Q7 SoM") Cc: stable@vger.kernel.org Signed-off-by: Christoph Muellner Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts index 2dceeea29b83..1e6a71066c16 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts @@ -153,7 +153,7 @@ }; &pcie0 { - ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_LOW>; + ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_HIGH>; num-lanes = <4>; pinctrl-names = "default"; pinctrl-0 = <&pcie_clkreqn_cpm>; -- cgit v1.2.3-70-g09d2 From 613a41b0d16e617f46776a93b975a1eeea96417c Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 13 Nov 2018 15:38:22 +0000 Subject: s390/cpum_cf: Reject request for sampling in event initialization On s390 command perf top fails [root@s35lp76 perf] # ./perf top -F100000 --stdio Error: cycles: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat' [root@s35lp76 perf] # Using event -e rb0000 works as designed. Event rb0000 is the event number of the sampling facility for basic sampling. During system start up the following PMUs are installed in the kernel's PMU list (from head to tail): cpum_cf --> s390 PMU counter facility device driver cpum_sf --> s390 PMU sampling facility device driver uprobe kprobe tracepoint task_clock cpu_clock Perf top executes following functions and calls perf_event_open(2) system call with different parameters many times: cmd_top --> __cmd_top --> perf_evlist__add_default --> __perf_evlist__add_default --> perf_evlist__new_cycles (creates event type:0 (HW) config 0 (CPU_CYCLES) --> perf_event_attr__set_max_precise_ip Uses perf_event_open(2) to detect correct precise_ip level. Fails 3 times on s390 which is ok. Then functions cmd_top --> __cmd_top --> perf_top__start_counters -->perf_evlist__config --> perf_can_comm_exec --> perf_probe_api This functions test support for the following events: "cycles:u", "instructions:u", "cpu-clock:u" using --> perf_do_probe_api --> perf_event_open_cloexec Test the close on exec flag support with perf_event_open(2). perf_do_probe_api returns true if the event is supported. The function returns true because event cpu-clock is supported by the PMU cpu_clock. This is achieved by many calls to perf_event_open(2). Function perf_top__start_counters now calls perf_evsel__open() for every event, which is the default event cpu_cycles (config:0) and type HARDWARE (type:0) which a predfined frequence of 4000. Given the above order of the PMU list, the PMU cpum_cf gets called first and returns 0, which indicates support for this sampling. The event is fully allocated in the function perf_event_open (file kernel/event/core.c near line 10521 and the following check fails: event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL, NULL, cgroup_fd); if (IS_ERR(event)) { err = PTR_ERR(event); goto err_cred; } if (is_sampling_event(event)) { if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { err = -EOPNOTSUPP; goto err_alloc; } } The check for the interrupt capabilities fails and the system call perf_event_open() returns -EOPNOTSUPP (-95). Add a check to return -ENODEV when sampling is requested in PMU cpum_cf. This allows common kernel code in the perf_event_open() system call to test the next PMU in above list. Fixes: 97b1198fece0 (" "s390, perf: Use common PMU interrupt disabled code") Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_cf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 74091fd3101e..d5523adeddbf 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -346,6 +346,8 @@ static int __hw_perf_event_init(struct perf_event *event) break; case PERF_TYPE_HARDWARE: + if (is_sampling_event(event)) /* No sampling support */ + return -ENOENT; ev = attr->config; /* Count user space (problem-state) only */ if (!attr->exclude_user && attr->exclude_kernel) { -- cgit v1.2.3-70-g09d2 From 86322ba9571a36b0b149a5f27fc3423c2dadbe39 Mon Sep 17 00:00:00 2001 From: Sabyasachi Gupta Date: Sat, 3 Nov 2018 10:54:52 +0530 Subject: arch/sparc: Use kzalloc_node Replaced kmalloc_node + memset with kzalloc_node Signed-off-by: Sabyasachi Gupta Signed-off-by: David S. Miller --- arch/sparc/kernel/iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index 40d008b0bd3e..05eb016fc41b 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -108,10 +108,9 @@ int iommu_table_init(struct iommu *iommu, int tsbsize, /* Allocate and initialize the free area map. */ sz = num_tsb_entries / 8; sz = (sz + 7UL) & ~7UL; - iommu->tbl.map = kmalloc_node(sz, GFP_KERNEL, numa_node); + iommu->tbl.map = kzalloc_node(sz, GFP_KERNEL, numa_node); if (!iommu->tbl.map) return -ENOMEM; - memset(iommu->tbl.map, 0, sz); iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT, (tlb_type != hypervisor ? iommu_flushall : NULL), -- cgit v1.2.3-70-g09d2 From 672e60b72bbe7aace88721db55b380b6a51fb8f9 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Sun, 18 Nov 2018 20:03:02 +0100 Subject: ARM: dts: rockchip: Remove @0 from the veyron memory node The Coreboot version on veyron ChromeOS devices seems to ignore memory@0 nodes when updating the available memory and instead inserts another memory node without the address. This leads to 4GB systems only ever be using 2GB as the memory@0 node takes precedence. So remove the @0 for veyron devices. Fixes: 0b639b815f15 ("ARM: dts: rockchip: Add missing unit name to memory nodes in rk3288 boards") Cc: stable@vger.kernel.org Reported-by: Heikki Lindholm Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3288-veyron.dtsi | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi index 2075120cfc4d..d8bf939a3aff 100644 --- a/arch/arm/boot/dts/rk3288-veyron.dtsi +++ b/arch/arm/boot/dts/rk3288-veyron.dtsi @@ -10,7 +10,11 @@ #include "rk3288.dtsi" / { - memory@0 { + /* + * The default coreboot on veyron devices ignores memory@0 nodes + * and would instead create another memory node. + */ + memory { device_type = "memory"; reg = <0x0 0x0 0x0 0x80000000>; }; -- cgit v1.2.3-70-g09d2 From 512cab3e7e0be11234f965d9898936dce2325382 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 10 Nov 2018 15:53:59 -0200 Subject: ARM: dts: imx51-zii-rdu1: Remove EEPROM node The EEPROM under I2C2 was put by mistake in the dts. Remove it as it is not really present on the real hardware. Fixes: ceef0396f367 ("ARM: dts: imx: add ZII RDU1 board") Reported-by: Chris Healy Signed-off-by: Fabio Estevam Reviewed-by: Chris Healy Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx51-zii-rdu1.dts | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx51-zii-rdu1.dts b/arch/arm/boot/dts/imx51-zii-rdu1.dts index e45a15ceb94b..69d753cac89a 100644 --- a/arch/arm/boot/dts/imx51-zii-rdu1.dts +++ b/arch/arm/boot/dts/imx51-zii-rdu1.dts @@ -492,12 +492,6 @@ pinctrl-0 = <&pinctrl_i2c2>; status = "okay"; - eeprom@50 { - compatible = "atmel,24c04"; - pagesize = <16>; - reg = <0x50>; - }; - hpa1: amp@60 { compatible = "ti,tpa6130a2"; reg = <0x60>; -- cgit v1.2.3-70-g09d2 From 3841840449817ba6cf3e636008bc4e1061a03388 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 20 Nov 2018 08:25:28 +0100 Subject: x86/boot: Mostly revert commit ae7e1238e68f2a ("Add ACPI RSDP address to setup_header") Peter Anvin pointed out that commit: ae7e1238e68f2a ("x86/boot: Add ACPI RSDP address to setup_header") should be reverted as setup_header should only contain items set by the legacy BIOS. So revert said commit. Instead of fully reverting the dependent commit of: e7b66d16fe4172 ("x86/acpi, x86/boot: Take RSDP address for boot params if available") just remove the setup_header reference in order to replace it by a boot_params in a followup patch. Suggested-by: "H. Peter Anvin" Signed-off-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: boris.ostrovsky@oracle.com Cc: bp@alien8.de Cc: daniel.kiper@oracle.com Cc: sstabellini@kernel.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/20181120072529.5489-2-jgross@suse.com Signed-off-by: Ingo Molnar --- Documentation/x86/boot.txt | 32 +------------------------------- arch/x86/boot/header.S | 6 +----- arch/x86/include/asm/x86_init.h | 2 -- arch/x86/include/uapi/asm/bootparam.h | 4 ---- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/head32.c | 1 - arch/x86/kernel/head64.c | 2 -- arch/x86/kernel/setup.c | 17 ----------------- 8 files changed, 3 insertions(+), 63 deletions(-) (limited to 'arch') diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index 7727db8f94bc..5e9b826b5f62 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt @@ -61,18 +61,6 @@ Protocol 2.12: (Kernel 3.8) Added the xloadflags field and extension fields to struct boot_params for loading bzImage and ramdisk above 4G in 64bit. -Protocol 2.13: (Kernel 3.14) Support 32- and 64-bit flags being set in - xloadflags to support booting a 64-bit kernel from 32-bit - EFI - -Protocol 2.14: (Kernel 4.20) Added acpi_rsdp_addr holding the physical - address of the ACPI RSDP table. - The bootloader updates version with: - 0x8000 | min(kernel-version, bootloader-version) - kernel-version being the protocol version supported by - the kernel and bootloader-version the protocol version - supported by the bootloader. - **** MEMORY LAYOUT The traditional memory map for the kernel loader, used for Image or @@ -209,7 +197,6 @@ Offset Proto Name Meaning 0258/8 2.10+ pref_address Preferred loading address 0260/4 2.10+ init_size Linear memory required during initialization 0264/4 2.11+ handover_offset Offset of handover entry point -0268/8 2.14+ acpi_rsdp_addr Physical address of RSDP table (1) For backwards compatibility, if the setup_sects field contains 0, the real value is 4. @@ -322,7 +309,7 @@ Protocol: 2.00+ Contains the magic number "HdrS" (0x53726448). Field name: version -Type: modify +Type: read Offset/size: 0x206/2 Protocol: 2.00+ @@ -330,12 +317,6 @@ Protocol: 2.00+ e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version 10.17. - Up to protocol version 2.13 this information is only read by the - bootloader. From protocol version 2.14 onwards the bootloader will - write the used protocol version or-ed with 0x8000 to the field. The - used protocol version will be the minimum of the supported protocol - versions of the bootloader and the kernel. - Field name: realmode_swtch Type: modify (optional) Offset/size: 0x208/4 @@ -763,17 +744,6 @@ Offset/size: 0x264/4 See EFI HANDOVER PROTOCOL below for more details. -Field name: acpi_rsdp_addr -Type: write -Offset/size: 0x268/8 -Protocol: 2.14+ - - This field can be set by the boot loader to tell the kernel the - physical address of the ACPI RSDP table. - - A value of 0 indicates the kernel should fall back to the standard - methods to locate the RSDP. - **** THE IMAGE CHECKSUM diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 4c881c850125..850b8762e889 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -300,7 +300,7 @@ _start: # Part 2 of the header, from the old setup.S .ascii "HdrS" # header signature - .word 0x020e # header version number (>= 0x0105) + .word 0x020d # header version number (>= 0x0105) # or else old loadlin-1.5 will fail) .globl realmode_swtch realmode_swtch: .word 0, 0 # default_switch, SETUPSEG @@ -558,10 +558,6 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr init_size: .long INIT_SIZE # kernel initialization size handover_offset: .long 0 # Filled in by build.c -acpi_rsdp_addr: .quad 0 # 64-bit physical pointer to the - # ACPI RSDP table, added with - # version 2.14 - # End of setup header ##################################################### .section ".entrytext", "ax" diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 0f842104862c..b85a7c54c6a1 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -303,6 +303,4 @@ extern void x86_init_noop(void); extern void x86_init_uint_noop(unsigned int unused); extern bool x86_pnpbios_disabled(void); -void x86_verify_bootdata_version(void); - #endif diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 22f89d040ddd..a06cbf019744 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -16,9 +16,6 @@ #define RAMDISK_PROMPT_FLAG 0x8000 #define RAMDISK_LOAD_FLAG 0x4000 -/* version flags */ -#define VERSION_WRITTEN 0x8000 - /* loadflags */ #define LOADED_HIGH (1<<0) #define KASLR_FLAG (1<<1) @@ -89,7 +86,6 @@ struct setup_header { __u64 pref_address; __u32 init_size; __u32 handover_offset; - __u64 acpi_rsdp_addr; } __attribute__((packed)); struct sys_desc_table { diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 92c76bf97ad8..fb3b1f3a5aba 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1776,5 +1776,5 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) u64 x86_default_get_root_pointer(void) { - return boot_params.hdr.acpi_rsdp_addr; + return 0; } diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 76fa3b836598..ec6fefbfd3c0 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -37,7 +37,6 @@ asmlinkage __visible void __init i386_start_kernel(void) cr4_init_shadow(); sanitize_boot_params(&boot_params); - x86_verify_bootdata_version(); x86_early_init_platform_quirks(); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 7663a8eb602b..16b1cbd3a61e 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -457,8 +457,6 @@ void __init x86_64_start_reservations(char *real_mode_data) if (!boot_params.hdr.version) copy_bootdata(__va(real_mode_data)); - x86_verify_bootdata_version(); - x86_early_init_platform_quirks(); switch (boot_params.hdr.hardware_subarch) { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b74e7bfed6ab..d494b9bfe618 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1280,23 +1280,6 @@ void __init setup_arch(char **cmdline_p) unwind_init(); } -/* - * From boot protocol 2.14 onwards we expect the bootloader to set the - * version to "0x8000 | ". In case we find a version >= 2.14 - * without the 0x8000 we assume the boot loader supports 2.13 only and - * reset the version accordingly. The 0x8000 flag is removed in any case. - */ -void __init x86_verify_bootdata_version(void) -{ - if (boot_params.hdr.version & VERSION_WRITTEN) - boot_params.hdr.version &= ~VERSION_WRITTEN; - else if (boot_params.hdr.version >= 0x020e) - boot_params.hdr.version = 0x020d; - - if (boot_params.hdr.version < 0x020e) - boot_params.hdr.acpi_rsdp_addr = 0; -} - #ifdef CONFIG_X86_32 static struct resource video_ram_resource = { -- cgit v1.2.3-70-g09d2 From e6e094e053af75cbc164e950814d3d084fb1e698 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 20 Nov 2018 08:25:29 +0100 Subject: x86/acpi, x86/boot: Take RSDP address from boot params if available In case the RSDP address in struct boot_params is specified don't try to find the table by searching, but take the address directly as set by the boot loader. Suggested-by: "H. Peter Anvin" Signed-off-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: boris.ostrovsky@oracle.com Cc: bp@alien8.de Cc: daniel.kiper@oracle.com Cc: sstabellini@kernel.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/20181120072529.5489-3-jgross@suse.com Signed-off-by: Ingo Molnar --- arch/x86/include/uapi/asm/bootparam.h | 3 ++- arch/x86/kernel/acpi/boot.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index a06cbf019744..60733f137e9a 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -155,7 +155,8 @@ struct boot_params { __u8 _pad2[4]; /* 0x054 */ __u64 tboot_addr; /* 0x058 */ struct ist_info ist_info; /* 0x060 */ - __u8 _pad3[16]; /* 0x070 */ + __u64 acpi_rsdp_addr; /* 0x070 */ + __u8 _pad3[8]; /* 0x078 */ __u8 hd0_info[16]; /* obsolete! */ /* 0x080 */ __u8 hd1_info[16]; /* obsolete! */ /* 0x090 */ struct sys_desc_table sys_desc_table; /* obsolete! */ /* 0x0a0 */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index fb3b1f3a5aba..06635fbca81c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1776,5 +1776,5 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) u64 x86_default_get_root_pointer(void) { - return 0; + return boot_params.acpi_rsdp_addr; } -- cgit v1.2.3-70-g09d2 From 68239654acafe6aad5a3c1dc7237e60accfebc03 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 20 Nov 2018 11:26:35 +0100 Subject: x86/fpu: Disable bottom halves while loading FPU registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sequence fpu->initialized = 1; /* step A */ preempt_disable(); /* step B */ fpu__restore(fpu); preempt_enable(); in __fpu__restore_sig() is racy in regard to a context switch. For 32bit frames, __fpu__restore_sig() prepares the FPU state within fpu->state. To ensure that a context switch (switch_fpu_prepare() in particular) does not modify fpu->state it uses fpu__drop() which sets fpu->initialized to 0. After fpu->initialized is cleared, the CPU's FPU state is not saved to fpu->state during a context switch. The new state is loaded via fpu__restore(). It gets loaded into fpu->state from userland and ensured it is sane. fpu->initialized is then set to 1 in order to avoid fpu__initialize() doing anything (overwrite the new state) which is part of fpu__restore(). A context switch between step A and B above would save CPU's current FPU registers to fpu->state and overwrite the newly prepared state. This looks like a tiny race window but the Kernel Test Robot reported this back in 2016 while we had lazy FPU support. Borislav Petkov made the link between that report and another patch that has been posted. Since the removal of the lazy FPU support, this race goes unnoticed because the warning has been removed. Disable bottom halves around the restore sequence to avoid the race. BH need to be disabled because BH is allowed to run (even with preemption disabled) and might invoke kernel_fpu_begin() by doing IPsec. [ bp: massage commit message a bit. ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Acked-by: Ingo Molnar Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: stable@vger.kernel.org Cc: x86-ml Link: http://lkml.kernel.org/r/20181120102635.ddv3fvavxajjlfqk@linutronix.de Link: https://lkml.kernel.org/r/20160226074940.GA28911@pd.tnic --- arch/x86/kernel/fpu/signal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 61a949d84dfa..d99a8ee9e185 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -344,10 +344,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); } + local_bh_disable(); fpu->initialized = 1; - preempt_disable(); fpu__restore(fpu); - preempt_enable(); + local_bh_enable(); return err; } else { -- cgit v1.2.3-70-g09d2 From 2a5bf23d5b795d5df33dc284e8f5cf8b6a5b4042 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 20 Nov 2018 18:08:42 +0100 Subject: perf/x86/intel: Fix regression by default disabling perfmon v4 interrupt handling Kyle Huey reported that 'rr', a replay debugger, broke due to the following commit: af3bdb991a5c ("perf/x86/intel: Add a separate Arch Perfmon v4 PMI handler") Rework the 'disable_counter_freezing' __setup() parameter such that we can explicitly enable/disable it and switch to default disabled. To this purpose, rename the parameter to "perf_v4_pmi=" which is a much better description and allows requiring a bool argument. [ mingo: Improved the changelog some more. ] Reported-by: Kyle Huey Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Robert O'Callahan Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Link: http://lkml.kernel.org/r/20181120170842.GZ2131@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- Documentation/admin-guide/kernel-parameters.txt | 3 ++- arch/x86/events/intel/core.c | 12 ++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 81d1d5a74728..5463d5a4d85c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -856,7 +856,8 @@ causing system reset or hang due to sending INIT from AP to BSP. - disable_counter_freezing [HW] + perf_v4_pmi= [X86,INTEL] + Format: Disable Intel PMU counter freezing feature. The feature only exists starting from Arch Perfmon v4 (Skylake and newer). diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 273c62e81546..af8bea9d4006 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2306,14 +2306,18 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) return handled; } -static bool disable_counter_freezing; +static bool disable_counter_freezing = true; static int __init intel_perf_counter_freezing_setup(char *s) { - disable_counter_freezing = true; - pr_info("Intel PMU Counter freezing feature disabled\n"); + bool res; + + if (kstrtobool(s, &res)) + return -EINVAL; + + disable_counter_freezing = !res; return 1; } -__setup("disable_counter_freezing", intel_perf_counter_freezing_setup); +__setup("perf_v4_pmi=", intel_perf_counter_freezing_setup); /* * Simplified handler for Arch Perfmon v4: -- cgit v1.2.3-70-g09d2 From 4ab7ca092c3c7ac8b16aa28eba723a8868f82f14 Mon Sep 17 00:00:00 2001 From: Romain Izard Date: Tue, 20 Nov 2018 17:57:37 +0100 Subject: ARM: dts: at91: sama5d2: use the divided clock for SMC The SAMA5D2 is different from SAMA5D3 and SAMA5D4, as there are two different clocks for the peripherals in the SoC. The Static Memory controller is connected to the divided master clock. Unfortunately, the device tree does not correctly show this and uses the master clock directly. This clock is then used by the code for the NAND controller to calculate the timings for the controller, and we end up with slow NAND Flash access. Fix the device tree, and the performance of Flash access is improved. Signed-off-by: Romain Izard Signed-off-by: Alexandre Belloni --- arch/arm/boot/dts/sama5d2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 843052f14f1c..dd0dda6ed44b 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -314,7 +314,7 @@ 0x1 0x0 0x60000000 0x10000000 0x2 0x0 0x70000000 0x10000000 0x3 0x0 0x80000000 0x10000000>; - clocks = <&mck>; + clocks = <&h32ck>; status = "disabled"; nand_controller: nand-controller { -- cgit v1.2.3-70-g09d2 From c50cbd85cd7027d32ac5945bb60217936b4f7eaf Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Wed, 21 Nov 2018 22:14:39 +0300 Subject: mips: fix mips_get_syscall_arg o32 check When checking for TIF_32BIT_REGS flag, mips_get_syscall_arg() should use the task specified as its argument instead of the current task. This potentially affects all syscall_get_arguments() users who specify tasks different from the current. Fixes: c0ff3c53d4f99 ("MIPS: Enable HAVE_ARCH_TRACEHOOK.") Signed-off-by: Dmitry V. Levin Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/21185/ Cc: Elvira Khabirova Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v3.13+ --- arch/mips/include/asm/syscall.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index 0170602a1e4e..6cf8ffb5367e 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -73,7 +73,7 @@ static inline unsigned long mips_get_syscall_arg(unsigned long *arg, #ifdef CONFIG_64BIT case 4: case 5: case 6: case 7: #ifdef CONFIG_MIPS32_O32 - if (test_thread_flag(TIF_32BIT_REGS)) + if (test_tsk_thread_flag(task, TIF_32BIT_REGS)) return get_user(*arg, (int *)usp + n); else #endif -- cgit v1.2.3-70-g09d2 From 6b04114f6fae5e84d33404c2970b1949c032546e Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Tue, 20 Nov 2018 13:30:19 +0300 Subject: arc: [devboards] Add support of NFSv3 ACL By default NFSv3 doesn't support ACL (Access Control Lists) which might be quite convenient to have so that mounted NFS behaves exactly as any other local file-system. In particular missing support of ACL makes umask useless. This among other thigs fixes Glibc's "nptl/tst-umask1". Signed-off-by: Alexey Brodkin Cc: Cupertino Miranda Cc: stable@vger.kernel.org #4.14+ Signed-off-by: Vineet Gupta --- arch/arc/configs/axs101_defconfig | 1 + arch/arc/configs/axs103_defconfig | 1 + arch/arc/configs/axs103_smp_defconfig | 1 + arch/arc/configs/hsdk_defconfig | 1 + arch/arc/configs/nps_defconfig | 1 + arch/arc/configs/nsimosci_defconfig | 1 + arch/arc/configs/nsimosci_hs_defconfig | 1 + arch/arc/configs/nsimosci_hs_smp_defconfig | 1 + arch/arc/configs/vdk_hs38_defconfig | 1 + arch/arc/configs/vdk_hs38_smp_defconfig | 1 + 10 files changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index 41bc08be6a3b..8c23bd086cd0 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -95,6 +95,7 @@ CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index 1e1c4a8011b5..666314fffc60 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -94,6 +94,7 @@ CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index 6b0c0cfd5c30..429832b8560b 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -97,6 +97,7 @@ CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index eca10b8baea5..87b23b7fb781 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -68,6 +68,7 @@ CONFIG_EXT3_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig index 31ba224bbfb4..ae7a0d8be98d 100644 --- a/arch/arc/configs/nps_defconfig +++ b/arch/arc/configs/nps_defconfig @@ -73,6 +73,7 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_ROOT_NFS=y CONFIG_DEBUG_INFO=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index f14eeff7d308..ad77f20e5aa6 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -66,5 +66,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index 025298a48305..1638e5bc9672 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -65,5 +65,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index df7b77b13b82..11cfbdb0f441 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -76,6 +76,7 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_TMPFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_FTRACE=y diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig index db47c3541f15..1e59a2e9c602 100644 --- a/arch/arc/configs/vdk_hs38_defconfig +++ b/arch/arc/configs/vdk_hs38_defconfig @@ -85,6 +85,7 @@ CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig index a8ac5e917d9a..b5c3f6c54b03 100644 --- a/arch/arc/configs/vdk_hs38_smp_defconfig +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -90,6 +90,7 @@ CONFIG_NTFS_FS=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set -- cgit v1.2.3-70-g09d2 From ed6101bbf6266ee83e620b19faa7c6ad56bb41ab Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 21 Nov 2018 11:16:10 +0100 Subject: perf/x86/intel: Move branch tracing setup to the Intel-specific source file Moving branch tracing setup to Intel core object into separate intel_pmu_bts_config function, because it's Intel specific. Suggested-by: Peter Zijlstra Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20181121101612.16272-1-jolsa@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 20 -------------------- arch/x86/events/intel/core.c | 41 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 106911b603bd..374a19712e20 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -438,26 +438,6 @@ int x86_setup_perfctr(struct perf_event *event) if (config == -1LL) return -EINVAL; - /* - * Branch tracing: - */ - if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && - !attr->freq && hwc->sample_period == 1) { - /* BTS is not supported by this architecture. */ - if (!x86_pmu.bts_active) - return -EOPNOTSUPP; - - /* BTS is currently only allowed for user-mode. */ - if (!attr->exclude_kernel) - return -EOPNOTSUPP; - - /* disallow bts if conflicting events are present */ - if (x86_add_exclusive(x86_lbr_exclusive_lbr)) - return -EBUSY; - - event->destroy = hw_perf_lbr_event_destroy; - } - hwc->config |= config; return 0; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index af8bea9d4006..a95079abeb03 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3102,10 +3102,49 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event) return flags; } +static int intel_pmu_bts_config(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + + if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && + !attr->freq && hwc->sample_period == 1) { + /* BTS is not supported by this architecture. */ + if (!x86_pmu.bts_active) + return -EOPNOTSUPP; + + /* BTS is currently only allowed for user-mode. */ + if (!attr->exclude_kernel) + return -EOPNOTSUPP; + + /* disallow bts if conflicting events are present */ + if (x86_add_exclusive(x86_lbr_exclusive_lbr)) + return -EBUSY; + + event->destroy = hw_perf_lbr_event_destroy; + } + + return 0; +} + +static int core_pmu_hw_config(struct perf_event *event) +{ + int ret = x86_pmu_hw_config(event); + + if (ret) + return ret; + + return intel_pmu_bts_config(event); +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); + if (ret) + return ret; + + ret = intel_pmu_bts_config(event); if (ret) return ret; @@ -3600,7 +3639,7 @@ static __initconst const struct x86_pmu core_pmu = { .enable_all = core_pmu_enable_all, .enable = core_pmu_enable_event, .disable = x86_pmu_disable_event, - .hw_config = x86_pmu_hw_config, + .hw_config = core_pmu_hw_config, .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, -- cgit v1.2.3-70-g09d2 From 67266c1080ad56c31af72b9c18355fde8ccc124a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 21 Nov 2018 11:16:11 +0100 Subject: perf/x86/intel: Add generic branch tracing check to intel_pmu_has_bts() Currently we check the branch tracing only by checking for the PERF_COUNT_HW_BRANCH_INSTRUCTIONS event of PERF_TYPE_HARDWARE type. But we can define the same event with the PERF_TYPE_RAW type. Changing the intel_pmu_has_bts() code to check on event's final hw config value, so both HW types are covered. Adding unlikely to intel_pmu_has_bts() condition calls, because it was used in the original code in intel_bts_constraints. Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20181121101612.16272-2-jolsa@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 17 +++-------------- arch/x86/events/perf_event.h | 13 +++++++++---- 2 files changed, 12 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a95079abeb03..a15a73788693 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2474,16 +2474,7 @@ done: static struct event_constraint * intel_bts_constraints(struct perf_event *event) { - struct hw_perf_event *hwc = &event->hw; - unsigned int hw_event, bts_event; - - if (event->attr.freq) - return NULL; - - hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; - bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); - - if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) + if (unlikely(intel_pmu_has_bts(event))) return &bts_constraint; return NULL; @@ -3105,10 +3096,8 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event) static int intel_pmu_bts_config(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; - struct hw_perf_event *hwc = &event->hw; - if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && - !attr->freq && hwc->sample_period == 1) { + if (unlikely(intel_pmu_has_bts(event))) { /* BTS is not supported by this architecture. */ if (!x86_pmu.bts_active) return -EOPNOTSUPP; @@ -3170,7 +3159,7 @@ static int intel_pmu_hw_config(struct perf_event *event) /* * BTS is set up earlier in this path, so don't account twice */ - if (!intel_pmu_has_bts(event)) { + if (!unlikely(intel_pmu_has_bts(event))) { /* disallow lbr if conflicting events are present */ if (x86_add_exclusive(x86_lbr_exclusive_lbr)) return -EBUSY; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index adae087cecdd..78d7b7031bfc 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -859,11 +859,16 @@ static inline int amd_pmu_init(void) static inline bool intel_pmu_has_bts(struct perf_event *event) { - if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && - !event->attr.freq && event->hw.sample_period == 1) - return true; + struct hw_perf_event *hwc = &event->hw; + unsigned int hw_event, bts_event; + + if (event->attr.freq) + return false; + + hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; + bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); - return false; + return hw_event == bts_event && hwc->sample_period == 1; } int intel_pmu_save_and_restart(struct perf_event *event); -- cgit v1.2.3-70-g09d2 From 472de49fdc53365c880ab81ae2b5cfdd83db0b06 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 21 Nov 2018 11:16:12 +0100 Subject: perf/x86/intel: Disallow precise_ip on BTS events Vince reported a crash in the BTS flush code when touching the callchain data, which was supposed to be initialized as an 'early' callchain, but intel_pmu_drain_bts_buffer() does not do that: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 ... Call Trace: intel_pmu_drain_bts_buffer+0x151/0x220 ? intel_get_event_constraints+0x219/0x360 ? perf_assign_events+0xe2/0x2a0 ? select_idle_sibling+0x22/0x3a0 ? __update_load_avg_se+0x1ec/0x270 ? enqueue_task_fair+0x377/0xdd0 ? cpumask_next_and+0x19/0x20 ? load_balance+0x134/0x950 ? check_preempt_curr+0x7a/0x90 ? ttwu_do_wakeup+0x19/0x140 x86_pmu_stop+0x3b/0x90 x86_pmu_del+0x57/0x160 event_sched_out.isra.106+0x81/0x170 group_sched_out.part.108+0x51/0xc0 __perf_event_disable+0x7f/0x160 event_function+0x8c/0xd0 remote_function+0x3c/0x50 flush_smp_call_function_queue+0x35/0xe0 smp_call_function_single_interrupt+0x3a/0xd0 call_function_single_interrupt+0xf/0x20 It was triggered by fuzzer but can be easily reproduced by: # perf record -e cpu/branch-instructions/pu -g -c 1 Peter suggested not to allow branch tracing for precise events: > Now arguably, this is really stupid behaviour. Who in his right mind > wants callchain output on BTS entries. And even if they do, BTS + > precise_ip is nonsensical. > > So in my mind disallowing precise_ip on BTS would be the simplest fix. Suggested-by: Peter Zijlstra Reported-by: Vince Weaver Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Stephane Eranian Cc: Thomas Gleixner Fixes: 6cbc304f2f36 ("perf/x86/intel: Fix unwind errors from PEBS entries (mk-II)") Link: http://lkml.kernel.org/r/20181121101612.16272-3-jolsa@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a15a73788693..ecc3e34ca955 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3106,6 +3106,10 @@ static int intel_pmu_bts_config(struct perf_event *event) if (!attr->exclude_kernel) return -EOPNOTSUPP; + /* BTS is not allowed for precise events. */ + if (attr->precise_ip) + return -EOPNOTSUPP; + /* disallow bts if conflicting events are present */ if (x86_add_exclusive(x86_lbr_exclusive_lbr)) return -EBUSY; -- cgit v1.2.3-70-g09d2 From f2a5fef1248beccacec0deecb67c1be693d72ae6 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 19 Nov 2018 14:59:45 +0100 Subject: x86/xen: cleanup includes in arch/x86/xen/spinlock.c arch/x86/xen/spinlock.c includes several headers which are not needed. Remove the #includes. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/spinlock.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 1c8a8816a402..3776122c87cc 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -3,22 +3,17 @@ * Split spinlock implementation out into its own file, so it can be * compiled in a FTRACE-compatible way. */ -#include +#include #include -#include -#include -#include #include #include #include #include -#include #include #include "xen-ops.h" -#include "debugfs.h" static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; static DEFINE_PER_CPU(char *, irq_name); -- cgit v1.2.3-70-g09d2 From 58a0afbf4c99ac355df16773af835b919b9432ee Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:12 +0100 Subject: ARM: davinci: da8xx: define gpio interrupts as separate resources Since commit eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") the davinci GPIO driver fails to probe if we boot in legacy mode from any of the board files. Since the driver now expects every interrupt to be defined as a separate resource, split the definition of IRQ resources instead of having a single continuous interrupt range. Fixes: eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/devices-da8xx.c | 40 +++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c index 1fd3619f6a09..cf78da5ab054 100644 --- a/arch/arm/mach-davinci/devices-da8xx.c +++ b/arch/arm/mach-davinci/devices-da8xx.c @@ -701,6 +701,46 @@ static struct resource da8xx_gpio_resources[] = { }, { /* interrupt */ .start = IRQ_DA8XX_GPIO0, + .end = IRQ_DA8XX_GPIO0, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO1, + .end = IRQ_DA8XX_GPIO1, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO2, + .end = IRQ_DA8XX_GPIO2, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO3, + .end = IRQ_DA8XX_GPIO3, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO4, + .end = IRQ_DA8XX_GPIO4, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO5, + .end = IRQ_DA8XX_GPIO5, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO6, + .end = IRQ_DA8XX_GPIO6, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO7, + .end = IRQ_DA8XX_GPIO7, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO8, .end = IRQ_DA8XX_GPIO8, .flags = IORESOURCE_IRQ, }, -- cgit v1.2.3-70-g09d2 From 193c04374e281a56c7d4f96e66d329671945bebe Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:13 +0100 Subject: ARM: davinci: dm365: define gpio interrupts as separate resources Since commit eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") the davinci GPIO driver fails to probe if we boot in legacy mode from any of the board files. Since the driver now expects every interrupt to be defined as a separate resource, split the definition of IRQ resources instead of having a single continuous interrupt range. Fixes: eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm365.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index abcf2a5ed89b..42665914166a 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -267,6 +267,41 @@ static struct resource dm365_gpio_resources[] = { }, { /* interrupt */ .start = IRQ_DM365_GPIO0, + .end = IRQ_DM365_GPIO0, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO1, + .end = IRQ_DM365_GPIO1, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO2, + .end = IRQ_DM365_GPIO2, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO3, + .end = IRQ_DM365_GPIO3, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO4, + .end = IRQ_DM365_GPIO4, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO5, + .end = IRQ_DM365_GPIO5, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO6, + .end = IRQ_DM365_GPIO6, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO7, .end = IRQ_DM365_GPIO7, .flags = IORESOURCE_IRQ, }, -- cgit v1.2.3-70-g09d2 From 2c9c83491f30afbce25796e185cd4d5e36080e31 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:14 +0100 Subject: ARM: davinci: dm646x: define gpio interrupts as separate resources Since commit eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") the davinci GPIO driver fails to probe if we boot in legacy mode from any of the board files. Since the driver now expects every interrupt to be defined as a separate resource, split the definition of IRQ resources instead of having a single continuous interrupt range. Fixes: eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm646x.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c index 6bd2ed069d0d..d9b93e2806d2 100644 --- a/arch/arm/mach-davinci/dm646x.c +++ b/arch/arm/mach-davinci/dm646x.c @@ -442,6 +442,16 @@ static struct resource dm646x_gpio_resources[] = { }, { /* interrupt */ .start = IRQ_DM646X_GPIOBNK0, + .end = IRQ_DM646X_GPIOBNK0, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM646X_GPIOBNK1, + .end = IRQ_DM646X_GPIOBNK1, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM646X_GPIOBNK2, .end = IRQ_DM646X_GPIOBNK2, .flags = IORESOURCE_IRQ, }, -- cgit v1.2.3-70-g09d2 From 27db7baab640ea28d7994eda943fef170e347081 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:15 +0100 Subject: ARM: davinci: dm355: define gpio interrupts as separate resources Since commit eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") the davinci GPIO driver fails to probe if we boot in legacy mode from any of the board files. Since the driver now expects every interrupt to be defined as a separate resource, split the definition of IRQ resources instead of having a single continuous interrupt range. Fixes: eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm355.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c index 9f7d38d12c88..2b0f5d97ab7c 100644 --- a/arch/arm/mach-davinci/dm355.c +++ b/arch/arm/mach-davinci/dm355.c @@ -548,6 +548,36 @@ static struct resource dm355_gpio_resources[] = { }, { /* interrupt */ .start = IRQ_DM355_GPIOBNK0, + .end = IRQ_DM355_GPIOBNK0, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM355_GPIOBNK1, + .end = IRQ_DM355_GPIOBNK1, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM355_GPIOBNK2, + .end = IRQ_DM355_GPIOBNK2, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM355_GPIOBNK3, + .end = IRQ_DM355_GPIOBNK3, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM355_GPIOBNK4, + .end = IRQ_DM355_GPIOBNK4, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM355_GPIOBNK5, + .end = IRQ_DM355_GPIOBNK5, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM355_GPIOBNK6, .end = IRQ_DM355_GPIOBNK6, .flags = IORESOURCE_IRQ, }, -- cgit v1.2.3-70-g09d2 From adcf60ce14c8250761af9de907eb6c7d096c26d3 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:16 +0100 Subject: ARM: davinci: dm644x: define gpio interrupts as separate resources Since commit eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") the davinci GPIO driver fails to probe if we boot in legacy mode from any of the board files. Since the driver now expects every interrupt to be defined as a separate resource, split the definition of IRQ resources instead of having a single continuous interrupt range. Fixes: eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm644x.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c index 0720da7809a6..de1ec6dc01e9 100644 --- a/arch/arm/mach-davinci/dm644x.c +++ b/arch/arm/mach-davinci/dm644x.c @@ -492,6 +492,26 @@ static struct resource dm644_gpio_resources[] = { }, { /* interrupt */ .start = IRQ_GPIOBNK0, + .end = IRQ_GPIOBNK0, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_GPIOBNK1, + .end = IRQ_GPIOBNK1, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_GPIOBNK2, + .end = IRQ_GPIOBNK2, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_GPIOBNK3, + .end = IRQ_GPIOBNK3, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_GPIOBNK4, .end = IRQ_GPIOBNK4, .flags = IORESOURCE_IRQ, }, -- cgit v1.2.3-70-g09d2 From 45ed94b9e2d2e310fa7fb884b26080dfd4f0e31d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:18 +0100 Subject: ARM: davinci: da850: set the GPIO base to 0 Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") broke the network support in legacy boot mode for da850-evm since we can no longer request the MDIO clock GPIO. We now have the option to specify the GPIO base manually for davinci, so add the relevant fields to platform data. Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/da850.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c index 4528bbf0c861..e7b78df2bfef 100644 --- a/arch/arm/mach-davinci/da850.c +++ b/arch/arm/mach-davinci/da850.c @@ -719,7 +719,9 @@ int __init da850_register_vpif_capture(struct vpif_capture_config } static struct davinci_gpio_platform_data da850_gpio_platform_data = { - .ngpio = 144, + .no_auto_base = true, + .base = 0, + .ngpio = 144, }; int __init da850_register_gpio(void) -- cgit v1.2.3-70-g09d2 From 133cd2e48305887709675847da1f6ee2b7363929 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:19 +0100 Subject: ARM: davinci: dm365: set the GPIO base to 0 Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") broke the GPIO support on DaVinci boards in legacy mode by allowing gpiolib to set the GPIO base automatically. DaVinci board files use the legacy GPIO API with hard-coded GPIO line numbers. Use the new fields in struct davinci_gpio_platform_data to manually set the GPIO base to 0. Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm365.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index 42665914166a..01fb2b0c82de 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -308,6 +308,8 @@ static struct resource dm365_gpio_resources[] = { }; static struct davinci_gpio_platform_data dm365_gpio_platform_data = { + .no_auto_base = true, + .base = 0, .ngpio = 104, .gpio_unbanked = 8, }; -- cgit v1.2.3-70-g09d2 From fb9e7f0bba151281d7587c1262fae8bdf0497296 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:20 +0100 Subject: ARM: davinci: dm646x: set the GPIO base to 0 Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") broke the GPIO support on DaVinci boards in legacy mode by allowing gpiolib to set the GPIO base automatically. DaVinci board files use the legacy GPIO API with hard-coded GPIO line numbers. Use the new fields in struct davinci_gpio_platform_data to manually set the GPIO base to 0. Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm646x.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c index d9b93e2806d2..7dc54b2a610f 100644 --- a/arch/arm/mach-davinci/dm646x.c +++ b/arch/arm/mach-davinci/dm646x.c @@ -458,6 +458,8 @@ static struct resource dm646x_gpio_resources[] = { }; static struct davinci_gpio_platform_data dm646x_gpio_platform_data = { + .no_auto_base = true, + .base = 0, .ngpio = 43, }; -- cgit v1.2.3-70-g09d2 From 7d35baa4e9ec4b717bc0e58a39cdb6a1c50f5465 Mon Sep 17 00:00:00 2001 From: Mathias Kresin Date: Mon, 26 Nov 2018 11:25:40 +0100 Subject: MIPS: ralink: Fix mt7620 nd_sd pinmux In case the nd_sd group is set to the sd-card function, Pins 45 + 46 are configured as GPIOs. If they are blocked by the sd function, they can't be used as GPIOs. Reported-by: Kristian Evensen Signed-off-by: Mathias Kresin Signed-off-by: Paul Burton Fixes: f576fb6a0700 ("MIPS: ralink: cleanup the soc specific pinmux data") Patchwork: https://patchwork.linux-mips.org/patch/21220/ Cc: John Crispin Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v3.18+ --- arch/mips/ralink/mt7620.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c index 41b71c4352c2..c1ce6f43642b 100644 --- a/arch/mips/ralink/mt7620.c +++ b/arch/mips/ralink/mt7620.c @@ -84,7 +84,7 @@ static struct rt2880_pmx_func pcie_rst_grp[] = { }; static struct rt2880_pmx_func nd_sd_grp[] = { FUNC("nand", MT7620_GPIO_MODE_NAND, 45, 15), - FUNC("sd", MT7620_GPIO_MODE_SD, 45, 15) + FUNC("sd", MT7620_GPIO_MODE_SD, 47, 13) }; static struct rt2880_pmx_group mt7620a_pinmux_data[] = { -- cgit v1.2.3-70-g09d2 From a6ca633e13f888cbb0a92309a1e090818cffcc86 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:21 +0100 Subject: ARM: davinci: dm355: set the GPIO base to 0 Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") broke the GPIO support on DaVinci boards in legacy mode by allowing gpiolib to set the GPIO base automatically. DaVinci board files use the legacy GPIO API with hard-coded GPIO line numbers. Use the new fields in struct davinci_gpio_platform_data to manually set the GPIO base to 0. Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm355.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c index 2b0f5d97ab7c..4c6e0bef4509 100644 --- a/arch/arm/mach-davinci/dm355.c +++ b/arch/arm/mach-davinci/dm355.c @@ -584,6 +584,8 @@ static struct resource dm355_gpio_resources[] = { }; static struct davinci_gpio_platform_data dm355_gpio_platform_data = { + .no_auto_base = true, + .base = 0, .ngpio = 104, }; -- cgit v1.2.3-70-g09d2 From 55a891d0d0bd3db9303c580857147c37e2c76a5a Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:22 +0100 Subject: ARM: davinci: da830: set the GPIO base to 0 Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") broke the GPIO support on DaVinci boards in legacy mode by allowing gpiolib to set the GPIO base automatically. DaVinci board files use the legacy GPIO API with hard-coded GPIO line numbers. Use the new fields in struct davinci_gpio_platform_data to manually set the GPIO base to 0. Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/da830.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-davinci/da830.c b/arch/arm/mach-davinci/da830.c index 0bc5bd2665df..2cc9fe4c3a91 100644 --- a/arch/arm/mach-davinci/da830.c +++ b/arch/arm/mach-davinci/da830.c @@ -759,7 +759,9 @@ static struct davinci_id da830_ids[] = { }; static struct davinci_gpio_platform_data da830_gpio_platform_data = { - .ngpio = 128, + .no_auto_base = true, + .base = 0, + .ngpio = 128, }; int __init da830_register_gpio(void) -- cgit v1.2.3-70-g09d2 From 27df7977099c2b8d32399cd1752f527c5a343dfa Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:23 +0100 Subject: ARM: davinci: dm644x: set the GPIO base to 0 Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") broke the GPIO support on DaVinci boards in legacy mode by allowing gpiolib to set the GPIO base automatically. DaVinci board files use the legacy GPIO API with hard-coded GPIO line numbers. Use the new fields in struct davinci_gpio_platform_data to manually set the GPIO base to 0. Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm644x.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c index de1ec6dc01e9..38f92b7d413e 100644 --- a/arch/arm/mach-davinci/dm644x.c +++ b/arch/arm/mach-davinci/dm644x.c @@ -518,6 +518,8 @@ static struct resource dm644_gpio_resources[] = { }; static struct davinci_gpio_platform_data dm644_gpio_platform_data = { + .no_auto_base = true, + .base = 0, .ngpio = 71, }; -- cgit v1.2.3-70-g09d2 From 814cedbc0b78d75e335c96da9b9391142eab5600 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 27 Nov 2018 14:04:04 +0100 Subject: s390/mm: correct pgtable_bytes on page table downgrade The downgrade of a page table from 3 levels to 2 levels for a 31-bit compat process removes a pmd table which has to be counted against pgtable_bytes. Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgalloc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 814f26520aa2..6791562779ee 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -131,6 +131,7 @@ void crst_table_downgrade(struct mm_struct *mm) } pgd = mm->pgd; + mm_dec_nr_pmds(mm); mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); mm->context.asce_limit = _REGION3_SIZE; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | -- cgit v1.2.3-70-g09d2 From ac26d1f74cfc19c8dc9d533b5f20e99dbee3d9bd Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 27 Nov 2018 14:32:00 +0100 Subject: x86/fpu: Use the correct exception table macro in the XSTATE_OP wrapper Commit 75045f77f7a7 ("x86/extable: Introduce _ASM_EXTABLE_UA for uaccess fixups") incorrectly replaced the fixup entry for XSTATE_OP with a user-#PF-only fixup. XRSTOR can also raise #GP if the xstate content is invalid, and _ASM_EXTABLE_UA doesn't expect that. Change this fixup back to _ASM_EXTABLE so that #GP gets fixed up. Fixes: 75045f77f7a7 ("x86/extable: Introduce _ASM_EXTABLE_UA for uaccess fixups") Reported-by: Sebastian Andrzej Siewior Signed-off-by: Jann Horn Signed-off-by: Borislav Petkov Acked-by: Sebastian Andrzej Siewior Tested-by: Sebastian Andrzej Siewior Cc: "H. Peter Anvin" Cc: "Naveen N. Rao" Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Thomas Gleixner Cc: kernel-hardening@lists.openwall.com Cc: x86-ml Link: https://lkml.kernel.org/r/20181126165957.xhsyu2dhyy45mrjo@linutronix.de Link: https://lkml.kernel.org/r/20181127133200.38322-1-jannh@google.com --- arch/x86/include/asm/fpu/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 5f7290e6e954..69dcdf195b61 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -226,7 +226,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) "3: movl $-2,%[err]\n\t" \ "jmp 2b\n\t" \ ".popsection\n\t" \ - _ASM_EXTABLE_UA(1b, 3b) \ + _ASM_EXTABLE(1b, 3b) \ : [err] "=r" (err) \ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") -- cgit v1.2.3-70-g09d2 From f6f8c1c09c224076a6c9ca3d97c24b81698f98f8 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 26 Nov 2018 17:59:40 +1100 Subject: sparc: suppress the implicit-fallthrough warning sparc builds with -Werror Signed-off-by: Stephen Rothwell Signed-off-by: David S. Miller --- arch/sparc/kernel/signal_64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index e9de1803a22e..ca70787efd8e 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -533,6 +533,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0) regs->tpc -= 4; regs->tnpc -= 4; pt_regs_clear_syscall(regs); + /* fall through */ case ERESTART_RESTARTBLOCK: regs->u_regs[UREG_G1] = __NR_restart_syscall; regs->tpc -= 4; -- cgit v1.2.3-70-g09d2 From e945067d95cc99a6e4601b6bda665f30c7917c3b Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 26 Nov 2018 18:11:46 +1100 Subject: sparc32: suppress an implicit-fallthrough warning Signed-off-by: Stephen Rothwell Signed-off-by: David S. Miller --- arch/sparc/kernel/signal32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index 4c5b3fcbed94..e800ce13cc6e 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -683,6 +683,7 @@ void do_signal32(struct pt_regs * regs) regs->tpc -= 4; regs->tnpc -= 4; pt_regs_clear_syscall(regs); + /* fall through */ case ERESTART_RESTARTBLOCK: regs->u_regs[UREG_G1] = __NR_restart_syscall; regs->tpc -= 4; -- cgit v1.2.3-70-g09d2 From e399ef194171717aefa78838d02b1e92267be2b1 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 26 Nov 2018 18:45:26 +1100 Subject: sparc32: supress another implicit-fallthrough warning Signed-off-by: Stephen Rothwell Signed-off-by: David S. Miller --- arch/sparc/kernel/signal_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index 5665261cee37..83953780ca01 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -508,6 +508,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0) regs->pc -= 4; regs->npc -= 4; pt_regs_clear_syscall(regs); + /* fall through */ case ERESTART_RESTARTBLOCK: regs->u_regs[UREG_G1] = __NR_restart_syscall; regs->pc -= 4; -- cgit v1.2.3-70-g09d2 From 07f7175b43827640d1e69c9eded89aa089a234b4 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:14:10 -0500 Subject: x86/function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have x86 use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/x86/kernel/ftrace.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 01ebcb6f263e..7ee8067cbf45 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -994,7 +994,6 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, { unsigned long old; int faulted; - struct ftrace_graph_ent trace; unsigned long return_hooker = (unsigned long) &return_to_handler; @@ -1046,19 +1045,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, return; } - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { + if (function_graph_enter(old, self_addr, frame_pointer, parent)) *parent = old; - return; - } - - if (ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer, parent) == -EBUSY) { - *parent = old; - return; - } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v1.2.3-70-g09d2 From f1f5b14afd7cce39e6a9b25c685e1ea34c231096 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:19:26 -0500 Subject: ARM: function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have ARM use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: Russell King Cc: linux-arm-kernel@lists.infradead.org Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/arm/kernel/ftrace.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 0142fcfcc3d3..bda949fd84e8 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -183,9 +183,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, unsigned long frame_pointer) { unsigned long return_hooker = (unsigned long) &return_to_handler; - struct ftrace_graph_ent trace; unsigned long old; - int err; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; @@ -193,21 +191,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, old = *parent; *parent = return_hooker; - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { + if (function_graph_enter(old, self_addr, frame_pointer, NULL)) *parent = old; - return; - } - - err = ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer, NULL); - if (err == -EBUSY) { - *parent = old; - return; - } } #ifdef CONFIG_DYNAMIC_FTRACE -- cgit v1.2.3-70-g09d2 From 01e0ab2c4ff12358f15a856fd1a7bbea0670972b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:21:51 -0500 Subject: arm64: function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have arm64 use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: Catalin Marinas Cc: linux-arm-kernel@lists.infradead.org Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Acked-by: Will Deacon Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/arm64/kernel/ftrace.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 50986e388d2b..57e962290df3 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -216,8 +216,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, { unsigned long return_hooker = (unsigned long)&return_to_handler; unsigned long old; - struct ftrace_graph_ent trace; - int err; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; @@ -229,18 +227,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, */ old = *parent; - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) - return; - - err = ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer, NULL); - if (err == -EBUSY) - return; - else + if (!function_graph_enter(old, self_addr, frame_pointer, NULL)) *parent = return_hooker; } -- cgit v1.2.3-70-g09d2 From 556763e5a500d71879d632867b75826551acd49c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:23:30 -0500 Subject: microblaze: function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have microblaze use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: Michal Simek Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/microblaze/kernel/ftrace.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c index d57563c58a26..224eea40e1ee 100644 --- a/arch/microblaze/kernel/ftrace.c +++ b/arch/microblaze/kernel/ftrace.c @@ -22,8 +22,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) { unsigned long old; - int faulted, err; - struct ftrace_graph_ent trace; + int faulted; unsigned long return_hooker = (unsigned long) &return_to_handler; @@ -63,18 +62,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) return; } - err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL); - if (err == -EBUSY) { + if (function_graph_enter(old, self_addr, 0, NULL)) *parent = old; - return; - } - - trace.func = self_addr; - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; - *parent = old; - } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v1.2.3-70-g09d2 From 8712b27c5723c26400a2b350faf1d6d9fd7ffaad Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:25:18 -0500 Subject: MIPS: function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have MIPS use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: Ralf Baechle Cc: Paul Burton Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/mips/kernel/ftrace.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c index 7f3dfdbc3657..b122cbb4aad1 100644 --- a/arch/mips/kernel/ftrace.c +++ b/arch/mips/kernel/ftrace.c @@ -322,7 +322,6 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, unsigned long fp) { unsigned long old_parent_ra; - struct ftrace_graph_ent trace; unsigned long return_hooker = (unsigned long) &return_to_handler; int faulted, insns; @@ -369,12 +368,6 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, if (unlikely(faulted)) goto out; - if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp, - NULL) == -EBUSY) { - *parent_ra_addr = old_parent_ra; - return; - } - /* * Get the recorded ip of the current mcount calling site in the * __mcount_loc section, which will be used to filter the function @@ -382,13 +375,10 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, */ insns = core_kernel_text(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1; - trace.func = self_ra - (MCOUNT_INSN_SIZE * insns); + self_ra -= (MCOUNT_INSN_SIZE * insns); - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; + if (function_graph_enter(old_parent_ra, self_ra, fp, NULL)) *parent_ra_addr = old_parent_ra; - } return; out: ftrace_graph_stop(); -- cgit v1.2.3-70-g09d2 From d48ebb24866edea2c35be02a878f25bc65529370 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:26:35 -0500 Subject: nds32: function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have nds32 use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: Greentime Hu Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/nds32/kernel/ftrace.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index a0a9679ad5de..8a41372551ff 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -211,29 +211,15 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, unsigned long frame_pointer) { unsigned long return_hooker = (unsigned long)&return_to_handler; - struct ftrace_graph_ent trace; unsigned long old; - int err; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; old = *parent; - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) - return; - - err = ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer, NULL); - - if (err == -EBUSY) - return; - - *parent = return_hooker; + if (!function_graph_enter(old, self_addr, frame_pointer, NULL)) + *parent = return_hooker; } noinline void ftrace_graph_caller(void) -- cgit v1.2.3-70-g09d2 From a87532c78d291265efadc4b20a8c7a70cd59ea29 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:27:43 -0500 Subject: parisc: function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have parisc use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: linux-parisc@vger.kernel.org Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/parisc/kernel/ftrace.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 6fa8535d3cce..e46a4157a894 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -30,7 +30,6 @@ static void __hot prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) { unsigned long old; - struct ftrace_graph_ent trace; extern int parisc_return_to_handler; if (unlikely(ftrace_graph_is_dead())) @@ -41,19 +40,9 @@ static void __hot prepare_ftrace_return(unsigned long *parent, old = *parent; - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) - return; - - if (ftrace_push_return_trace(old, self_addr, &trace.depth, - 0, NULL) == -EBUSY) - return; - - /* activate parisc_return_to_handler() as return point */ - *parent = (unsigned long) &parisc_return_to_handler; + if (!function_graph_enter(old, self_addr, 0, NULL)) + /* activate parisc_return_to_handler() as return point */ + *parent = (unsigned long) &parisc_return_to_handler; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v1.2.3-70-g09d2 From fe60522ec60082a1dd735691b82c64f65d4ad15e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:28:53 -0500 Subject: powerpc/function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have powerpc use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/powerpc/kernel/trace/ftrace.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 4bf051d3e21e..b65c8a34ad6e 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -950,7 +950,6 @@ int ftrace_disable_ftrace_graph_caller(void) */ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) { - struct ftrace_graph_ent trace; unsigned long return_hooker; if (unlikely(ftrace_graph_is_dead())) @@ -961,18 +960,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) return_hooker = ppc_function_entry(return_to_handler); - trace.func = ip; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) - goto out; - - if (ftrace_push_return_trace(parent, ip, &trace.depth, 0, - NULL) == -EBUSY) - goto out; - - parent = return_hooker; + if (!function_graph_enter(parent, ip, 0, NULL)) + parent = return_hooker; out: return parent; } -- cgit v1.2.3-70-g09d2 From e949b6db51dc172a35c962bc4414ca148315fe21 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:31:44 -0500 Subject: riscv/function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have riscv use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: Greentime Hu Cc: Alan Kao Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Palmer Dabbelt Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/riscv/kernel/ftrace.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 1157b6b52d25..c433f6d3dd64 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -132,7 +132,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, { unsigned long return_hooker = (unsigned long)&return_to_handler; unsigned long old; - struct ftrace_graph_ent trace; int err; if (unlikely(atomic_read(¤t->tracing_graph_pause))) @@ -144,17 +143,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, */ old = *parent; - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - if (!ftrace_graph_entry(&trace)) - return; - - err = ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer, parent); - if (err == -EBUSY) - return; - *parent = return_hooker; + if (function_graph_enter(old, self_addr, frame_pointer, parent)) + *parent = return_hooker; } #ifdef CONFIG_DYNAMIC_FTRACE -- cgit v1.2.3-70-g09d2 From 18588e1487b19e45bd90bd55ec8d3a1d44f3257f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:33:17 -0500 Subject: s390/function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have s390 use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Acked-by: Martin Schwidefsky Cc: Heiko Carstens Cc: Julian Wiedmann Cc: linux-s390@vger.kernel.org Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/s390/kernel/ftrace.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 84be7f02d0c2..39b13d71a8fe 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -203,22 +203,13 @@ device_initcall(ftrace_plt_init); */ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) { - struct ftrace_graph_ent trace; - if (unlikely(ftrace_graph_is_dead())) goto out; if (unlikely(atomic_read(¤t->tracing_graph_pause))) goto out; ip -= MCOUNT_INSN_SIZE; - trace.func = ip; - trace.depth = current->curr_ret_stack + 1; - /* Only trace if the calling function expects to. */ - if (!ftrace_graph_entry(&trace)) - goto out; - if (ftrace_push_return_trace(parent, ip, &trace.depth, 0, - NULL) == -EBUSY) - goto out; - parent = (unsigned long) return_to_handler; + if (!function_graph_enter(parent, ip, 0, NULL)) + parent = (unsigned long) return_to_handler; out: return parent; } -- cgit v1.2.3-70-g09d2 From bc715ee4dbc5db462c59b9cfba92d31b3274fe3a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:35:37 -0500 Subject: sh/function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have superh use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: Yoshinori Sato Cc: Rich Felker Cc: linux-sh@vger.kernel.org Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/sh/kernel/ftrace.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 96dd9f7da250..1b04270e5460 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -321,8 +321,7 @@ int ftrace_disable_ftrace_graph_caller(void) void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) { unsigned long old; - int faulted, err; - struct ftrace_graph_ent trace; + int faulted; unsigned long return_hooker = (unsigned long)&return_to_handler; if (unlikely(ftrace_graph_is_dead())) @@ -365,18 +364,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) return; } - err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL); - if (err == -EBUSY) { + if (function_graph_enter(old, self_addr, 0, NULL)) __raw_writel(old, parent); - return; - } - - trace.func = self_addr; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; - __raw_writel(old, parent); - } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v1.2.3-70-g09d2 From 9c4bf5e0db164f330a2d3e128e9832661f69f0e9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:37:40 -0500 Subject: sparc/function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have sparc use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: "David S. Miller" Cc: sparclinux@vger.kernel.org Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/sparc/kernel/ftrace.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c index 915dda4ae412..684b84ce397f 100644 --- a/arch/sparc/kernel/ftrace.c +++ b/arch/sparc/kernel/ftrace.c @@ -126,20 +126,11 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long frame_pointer) { unsigned long return_hooker = (unsigned long) &return_to_handler; - struct ftrace_graph_ent trace; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return parent + 8UL; - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) - return parent + 8UL; - - if (ftrace_push_return_trace(parent, self_addr, &trace.depth, - frame_pointer, NULL) == -EBUSY) + if (function_graph_enter(parent, self_addr, frame_pointer, NULL)) return parent + 8UL; return return_hooker; -- cgit v1.2.3-70-g09d2 From d206e6b7ea3fdc8ec8f6be9a2ecfe58142b49e37 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 5 Nov 2018 21:50:13 -0800 Subject: arm64: dts: qcom: sdm845-mtp: Mark protected gcc clocks As of v4.20-rc1 probing the GCC driver on a SDM845 device with the standard security implementation causes an access violation and an immediate system restart. Use the protected-clocks property to mark the offending clocks protected for the MTP, in order to allow it to boot. Cc: Stephen Boyd Signed-off-by: Bjorn Andersson Acked-by: Andy Gross Signed-off-by: Stephen Boyd --- arch/arm64/boot/dts/qcom/sdm845-mtp.dts | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts index eedfaf8922e2..ef2d059c2db1 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts @@ -343,6 +343,12 @@ }; }; +&gcc { + protected-clocks = , + , + ; +}; + &i2c10 { status = "okay"; clock-frequency = <400000>; -- cgit v1.2.3-70-g09d2 From 60c8144afc287ef09ce8c1230c6aa972659ba1bb Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 27 Nov 2018 14:41:37 +0100 Subject: x86/MCE/AMD: Fix the thresholding machinery initialization order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the code sets up the thresholding interrupt vector and only then goes about initializing the thresholding banks. Which is wrong, because an early thresholding interrupt would cause a NULL pointer dereference when accessing those banks and prevent the machine from booting. Therefore, set the thresholding interrupt vector only *after* having initialized the banks successfully. Fixes: 18807ddb7f88 ("x86/mce/AMD: Reset Threshold Limit after logging error") Reported-by: Rafał Miłecki Reported-by: John Clemens Signed-off-by: Borislav Petkov Tested-by: Rafał Miłecki Tested-by: John Clemens Cc: Aravind Gopalakrishnan Cc: linux-edac@vger.kernel.org Cc: stable@vger.kernel.org Cc: Tony Luck Cc: x86@kernel.org Cc: Yazen Ghannam Link: https://lkml.kernel.org/r/20181127101700.2964-1-zajec5@gmail.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=201291 --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index dd33c357548f..e12454e21b8a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -56,7 +56,7 @@ /* Threshold LVT offset is at MSR0xC0000410[15:12] */ #define SMCA_THR_LVT_OFF 0xF000 -static bool thresholding_en; +static bool thresholding_irq_en; static const char * const th_names[] = { "load_store", @@ -534,9 +534,8 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, set_offset: offset = setup_APIC_mce_threshold(offset, new); - - if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt)) - mce_threshold_vector = amd_threshold_interrupt; + if (offset == new) + thresholding_irq_en = true; done: mce_threshold_block_init(&b, offset); @@ -1357,9 +1356,6 @@ int mce_threshold_remove_device(unsigned int cpu) { unsigned int bank; - if (!thresholding_en) - return 0; - for (bank = 0; bank < mca_cfg.banks; ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; @@ -1377,9 +1373,6 @@ int mce_threshold_create_device(unsigned int cpu) struct threshold_bank **bp; int err = 0; - if (!thresholding_en) - return 0; - bp = per_cpu(threshold_banks, cpu); if (bp) return 0; @@ -1408,9 +1401,6 @@ static __init int threshold_init_device(void) { unsigned lcpu = 0; - if (mce_threshold_vector == amd_threshold_interrupt) - thresholding_en = true; - /* to hit CPUs online before the notifier is up */ for_each_online_cpu(lcpu) { int err = mce_threshold_create_device(lcpu); @@ -1419,6 +1409,9 @@ static __init int threshold_init_device(void) return err; } + if (thresholding_irq_en) + mce_threshold_vector = amd_threshold_interrupt; + return 0; } /* -- cgit v1.2.3-70-g09d2 From 4cd24de3a0980bf3100c9dcb08ef65ca7c31af48 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Fri, 2 Nov 2018 01:45:41 -0700 Subject: x86/retpoline: Make CONFIG_RETPOLINE depend on compiler support Since retpoline capable compilers are widely available, make CONFIG_RETPOLINE hard depend on the compiler capability. Break the build when CONFIG_RETPOLINE is enabled and the compiler does not support it. Emit an error message in that case: "arch/x86/Makefile:226: *** You are building kernel with non-retpoline compiler, please update your compiler.. Stop." [dwmw: Fail the build with non-retpoline compiler] Suggested-by: Peter Zijlstra Signed-off-by: Zhenzhong Duan Signed-off-by: Thomas Gleixner Cc: David Woodhouse Cc: Borislav Petkov Cc: Daniel Borkmann Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Andy Lutomirski Cc: Masahiro Yamada Cc: Michal Marek Cc: Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/cca0cb20-f9e2-4094-840b-fb0f8810cd34@default --- arch/x86/Kconfig | 4 ---- arch/x86/Makefile | 5 +++-- arch/x86/include/asm/nospec-branch.h | 10 ++++++---- arch/x86/kernel/cpu/bugs.c | 2 +- scripts/Makefile.build | 2 -- 5 files changed, 10 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9d734f3c8234..b5286ad2a982 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -444,10 +444,6 @@ config RETPOLINE branches. Requires a compiler with -mindirect-branch=thunk-extern support for full protection. The kernel may run slower. - Without compiler support, at least indirect branches in assembler - code are eliminated. Since this includes the syscall entry path, - it is not entirely pointless. - config INTEL_RDT bool "Intel Resource Director Technology support" depends on X86 && CPU_SUP_INTEL diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 88398fdf8129..f5d7f4134524 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -220,9 +220,10 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # Avoid indirect branches in kernel to deal with Spectre ifdef CONFIG_RETPOLINE -ifneq ($(RETPOLINE_CFLAGS),) - KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE +ifeq ($(RETPOLINE_CFLAGS),) + $(error You are building kernel with non-retpoline compiler, please update your compiler.) endif + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) endif archscripts: scripts_basic diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 80dc14422495..8b09cbb2d52c 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -162,11 +162,12 @@ _ASM_PTR " 999b\n\t" \ ".popsection\n\t" -#if defined(CONFIG_X86_64) && defined(RETPOLINE) +#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_X86_64 /* - * Since the inline asm uses the %V modifier which is only in newer GCC, - * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE. + * Inline asm uses the %V modifier which is only in newer GCC + * which is ensured when CONFIG_RETPOLINE is defined. */ # define CALL_NOSPEC \ ANNOTATE_NOSPEC_ALTERNATIVE \ @@ -181,7 +182,7 @@ X86_FEATURE_RETPOLINE_AMD) # define THUNK_TARGET(addr) [thunk_target] "r" (addr) -#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) +#else /* CONFIG_X86_32 */ /* * For i386 we use the original ret-equivalent retpoline, because * otherwise we'll run out of registers. We don't care about CET @@ -211,6 +212,7 @@ X86_FEATURE_RETPOLINE_AMD) # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#endif #else /* No retpoline for C / inline asm */ # define CALL_NOSPEC "call *%[thunk_target]\n" # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c37e66e493bf..d0108fb6e4dd 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -252,7 +252,7 @@ static void __init spec2_print_if_secure(const char *reason) static inline bool retp_compiler(void) { - return __is_defined(RETPOLINE); + return __is_defined(CONFIG_RETPOLINE); } static inline bool match_option(const char *arg, int arglen, const char *opt) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index a8e7ba9f73e8..6a6be9f440cf 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -236,10 +236,8 @@ ifdef CONFIG_GCOV_KERNEL objtool_args += --no-unreachable endif ifdef CONFIG_RETPOLINE -ifneq ($(RETPOLINE_CFLAGS),) objtool_args += --retpoline endif -endif ifdef CONFIG_MODVERSIONS -- cgit v1.2.3-70-g09d2 From ef014aae8f1cd2793e4e014bbb102bed53f852b7 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Fri, 2 Nov 2018 01:45:41 -0700 Subject: x86/retpoline: Remove minimal retpoline support Now that CONFIG_RETPOLINE hard depends on compiler support, there is no reason to keep the minimal retpoline support around which only provided basic protection in the assembly files. Suggested-by: Peter Zijlstra Signed-off-by: Zhenzhong Duan Signed-off-by: Thomas Gleixner Cc: David Woodhouse Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/f06f0a89-5587-45db-8ed2-0a9d6638d5c0@default --- arch/x86/include/asm/nospec-branch.h | 2 -- arch/x86/kernel/cpu/bugs.c | 13 ++----------- 2 files changed, 2 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 8b09cbb2d52c..c202a64edd95 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -221,8 +221,6 @@ /* The Spectre V2 mitigation variants */ enum spectre_v2_mitigation { SPECTRE_V2_NONE, - SPECTRE_V2_RETPOLINE_MINIMAL, - SPECTRE_V2_RETPOLINE_MINIMAL_AMD, SPECTRE_V2_RETPOLINE_GENERIC, SPECTRE_V2_RETPOLINE_AMD, SPECTRE_V2_IBRS_ENHANCED, diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d0108fb6e4dd..7f6d8159398e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -135,8 +135,6 @@ enum spectre_v2_mitigation_cmd { static const char *spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", - [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline", - [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", @@ -250,11 +248,6 @@ static void __init spec2_print_if_secure(const char *reason) pr_info("%s selected on command line.\n", reason); } -static inline bool retp_compiler(void) -{ - return __is_defined(CONFIG_RETPOLINE); -} - static inline bool match_option(const char *arg, int arglen, const char *opt) { int len = strlen(opt); @@ -417,14 +410,12 @@ retpoline_auto: pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); goto retpoline_generic; } - mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : - SPECTRE_V2_RETPOLINE_MINIMAL_AMD; + mode = SPECTRE_V2_RETPOLINE_AMD; setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); setup_force_cpu_cap(X86_FEATURE_RETPOLINE); } else { retpoline_generic: - mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC : - SPECTRE_V2_RETPOLINE_MINIMAL; + mode = SPECTRE_V2_RETPOLINE_GENERIC; setup_force_cpu_cap(X86_FEATURE_RETPOLINE); } -- cgit v1.2.3-70-g09d2 From 8eb729b77faf83ac4c1f363a9ad68d042415f24c Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:29 +0100 Subject: x86/speculation: Update the TIF_SSBD comment "Reduced Data Speculation" is an obsolete term. The correct new name is "Speculative store bypass disable" - which is abbreviated into SSBD. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.593893901@linutronix.de --- arch/x86/include/asm/thread_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 2ff2a30a264f..523c69efc38a 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -79,7 +79,7 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_SSBD 5 /* Reduced data speculation */ +#define TIF_SSBD 5 /* Speculative store bypass disable */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ -- cgit v1.2.3-70-g09d2 From 24848509aa55eac39d524b587b051f4e86df3c12 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:30 +0100 Subject: x86/speculation: Clean up spectre_v2_parse_cmdline() Remove the unnecessary 'else' statement in spectre_v2_parse_cmdline() to save an indentation level. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.688010903@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7f6d8159398e..839ab4103e89 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -276,22 +276,21 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) return SPECTRE_V2_CMD_NONE; - else { - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); - if (ret < 0) - return SPECTRE_V2_CMD_AUTO; - for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { - if (!match_option(arg, ret, mitigation_options[i].option)) - continue; - cmd = mitigation_options[i].cmd; - break; - } + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_CMD_AUTO; - if (i >= ARRAY_SIZE(mitigation_options)) { - pr_err("unknown option (%s). Switching to AUTO select\n", arg); - return SPECTRE_V2_CMD_AUTO; - } + for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { + if (!match_option(arg, ret, mitigation_options[i].option)) + continue; + cmd = mitigation_options[i].cmd; + break; + } + + if (i >= ARRAY_SIZE(mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", arg); + return SPECTRE_V2_CMD_AUTO; } if ((cmd == SPECTRE_V2_CMD_RETPOLINE || -- cgit v1.2.3-70-g09d2 From b86bda0426853bfe8a3506c7d2a5b332760ae46b Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:31 +0100 Subject: x86/speculation: Remove unnecessary ret variable in cpu_show_common() Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.783903657@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 839ab4103e89..b52a48966e01 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -847,8 +847,6 @@ static ssize_t l1tf_show_state(char *buf) static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { - int ret; - if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); @@ -866,13 +864,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "Mitigation: __user pointer sanitization\n"); case X86_BUG_SPECTRE_V2: - ret = sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", spectre_v2_module_string()); - return ret; case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); -- cgit v1.2.3-70-g09d2 From a8f76ae41cd633ac00be1b3019b1eb4741be3828 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:32 +0100 Subject: x86/speculation: Move STIPB/IBPB string conditionals out of cpu_show_common() The Spectre V2 printout in cpu_show_common() handles conditionals for the various mitigation methods directly in the sprintf() argument list. That's hard to read and will become unreadable if more complex decisions need to be made for a particular method. Move the conditionals for STIBP and IBPB string selection into helper functions, so they can be extended later on. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.874479208@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b52a48966e01..a1502bce9eb8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -844,6 +844,22 @@ static ssize_t l1tf_show_state(char *buf) } #endif +static char *stibp_state(void) +{ + if (x86_spec_ctrl_base & SPEC_CTRL_STIBP) + return ", STIBP"; + else + return ""; +} + +static char *ibpb_state(void) +{ + if (boot_cpu_has(X86_FEATURE_USE_IBPB)) + return ", IBPB"; + else + return ""; +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -865,9 +881,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SPECTRE_V2: return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + ibpb_state(), boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", + stibp_state(), boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", spectre_v2_module_string()); -- cgit v1.2.3-70-g09d2 From 34bce7c9690b1d897686aac89604ba7adc365556 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:33 +0100 Subject: x86/speculation: Disable STIBP when enhanced IBRS is in use If enhanced IBRS is active, STIBP is redundant for mitigating Spectre v2 user space exploits from hyperthread sibling. Disable STIBP when enhanced IBRS is used. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.966801480@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a1502bce9eb8..924cd06dd43b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -321,6 +321,10 @@ static bool stibp_needed(void) if (spectre_v2_enabled == SPECTRE_V2_NONE) return false; + /* Enhanced IBRS makes using STIBP unnecessary. */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return false; + if (!boot_cpu_has(X86_FEATURE_STIBP)) return false; @@ -846,6 +850,9 @@ static ssize_t l1tf_show_state(char *buf) static char *stibp_state(void) { + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return ""; + if (x86_spec_ctrl_base & SPEC_CTRL_STIBP) return ", STIBP"; else -- cgit v1.2.3-70-g09d2 From 26c4d75b234040c11728a8acb796b3a85ba7507c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:34 +0100 Subject: x86/speculation: Rename SSBD update functions During context switch, the SSBD bit in SPEC_CTRL MSR is updated according to changes of the TIF_SSBD flag in the current and next running task. Currently, only the bit controlling speculative store bypass disable in SPEC_CTRL MSR is updated and the related update functions all have "speculative_store" or "ssb" in their names. For enhanced mitigation control other bits in SPEC_CTRL MSR need to be updated as well, which makes the SSB names inadequate. Rename the "speculative_store*" functions to a more generic name. No functional change. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.058866968@linutronix.de --- arch/x86/include/asm/spec-ctrl.h | 6 +++--- arch/x86/kernel/cpu/bugs.c | 4 ++-- arch/x86/kernel/process.c | 12 ++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index ae7c2c5cd7f0..8e2f8411c7a7 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -70,11 +70,11 @@ extern void speculative_store_bypass_ht_init(void); static inline void speculative_store_bypass_ht_init(void) { } #endif -extern void speculative_store_bypass_update(unsigned long tif); +extern void speculation_ctrl_update(unsigned long tif); -static inline void speculative_store_bypass_update_current(void) +static inline void speculation_ctrl_update_current(void) { - speculative_store_bypass_update(current_thread_info()->flags); + speculation_ctrl_update(current_thread_info()->flags); } #endif diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 924cd06dd43b..a723af0c4400 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -200,7 +200,7 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) : ssbd_spec_ctrl_to_tif(hostval); - speculative_store_bypass_update(tif); + speculation_ctrl_update(tif); } } EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); @@ -632,7 +632,7 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) * mitigation until it is next scheduled. */ if (task == current && update) - speculative_store_bypass_update_current(); + speculation_ctrl_update_current(); return 0; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c93fcfdf1673..8aa49604f9ae 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -395,27 +395,27 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); } -static __always_inline void intel_set_ssb_state(unsigned long tifn) +static __always_inline void spec_ctrl_update_msr(unsigned long tifn) { u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); wrmsrl(MSR_IA32_SPEC_CTRL, msr); } -static __always_inline void __speculative_store_bypass_update(unsigned long tifn) +static __always_inline void __speculation_ctrl_update(unsigned long tifn) { if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) amd_set_ssb_virt_state(tifn); else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) amd_set_core_ssb_state(tifn); else - intel_set_ssb_state(tifn); + spec_ctrl_update_msr(tifn); } -void speculative_store_bypass_update(unsigned long tif) +void speculation_ctrl_update(unsigned long tif) { preempt_disable(); - __speculative_store_bypass_update(tif); + __speculation_ctrl_update(tif); preempt_enable(); } @@ -452,7 +452,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); if ((tifp ^ tifn) & _TIF_SSBD) - __speculative_store_bypass_update(tifn); + __speculation_ctrl_update(tifn); } /* -- cgit v1.2.3-70-g09d2 From 01daf56875ee0cd50ed496a09b20eb369b45dfa5 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:35 +0100 Subject: x86/speculation: Reorganize speculation control MSRs update The logic to detect whether there's a change in the previous and next task's flag relevant to update speculation control MSRs is spread out across multiple functions. Consolidate all checks needed for updating speculation control MSRs into the new __speculation_ctrl_update() helper function. This makes it easy to pick the right speculation control MSR and the bits in MSR_IA32_SPEC_CTRL that need updating based on TIF flags changes. Originally-by: Thomas Lendacky Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.151077005@linutronix.de --- arch/x86/kernel/process.c | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 8aa49604f9ae..70e9832379e1 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -395,27 +395,40 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); } -static __always_inline void spec_ctrl_update_msr(unsigned long tifn) -{ - u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); - - wrmsrl(MSR_IA32_SPEC_CTRL, msr); -} +/* + * Update the MSRs managing speculation control, during context switch. + * + * tifp: Previous task's thread flags + * tifn: Next task's thread flags + */ +static __always_inline void __speculation_ctrl_update(unsigned long tifp, + unsigned long tifn) +{ + u64 msr = x86_spec_ctrl_base; + bool updmsr = false; + + /* If TIF_SSBD is different, select the proper mitigation method */ + if ((tifp ^ tifn) & _TIF_SSBD) { + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + amd_set_ssb_virt_state(tifn); + } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + amd_set_core_ssb_state(tifn); + } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + msr |= ssbd_tif_to_spec_ctrl(tifn); + updmsr = true; + } + } -static __always_inline void __speculation_ctrl_update(unsigned long tifn) -{ - if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) - amd_set_ssb_virt_state(tifn); - else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) - amd_set_core_ssb_state(tifn); - else - spec_ctrl_update_msr(tifn); + if (updmsr) + wrmsrl(MSR_IA32_SPEC_CTRL, msr); } void speculation_ctrl_update(unsigned long tif) { + /* Forced update. Make sure all relevant TIF flags are different */ preempt_disable(); - __speculation_ctrl_update(tif); + __speculation_ctrl_update(~tif, tif); preempt_enable(); } @@ -451,8 +464,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, if ((tifp ^ tifn) & _TIF_NOCPUID) set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); - if ((tifp ^ tifn) & _TIF_SSBD) - __speculation_ctrl_update(tifn); + __speculation_ctrl_update(tifp, tifn); } /* -- cgit v1.2.3-70-g09d2 From dbe733642e01dd108f71436aaea7b328cb28fd87 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:37 +0100 Subject: x86/Kconfig: Select SCHED_SMT if SMP enabled CONFIG_SCHED_SMT is enabled by all distros, so there is not a real point to have it configurable. The runtime overhead in the core scheduler code is minimal because the actual SMT scheduling parts are conditional on a static key. This allows to expose the scheduler's SMT state static key to the speculation control code. Alternatively the scheduler's static key could be made always available when CONFIG_SMP is enabled, but that's just adding an unused static key to every other architecture for nothing. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.337452245@linutronix.de --- arch/x86/Kconfig | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b5286ad2a982..8689e794a43c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1000,13 +1000,7 @@ config NR_CPUS to the kernel image. config SCHED_SMT - bool "SMT (Hyperthreading) scheduler support" - depends on SMP - ---help--- - SMT scheduler support improves the CPU scheduler's decision making - when dealing with Intel Pentium 4 chips with HyperThreading at a - cost of slightly increased overhead in some places. If unsure say - N here. + def_bool y if SMP config SCHED_MC def_bool y -- cgit v1.2.3-70-g09d2 From a74cfffb03b73d41e08f84c2e5c87dec0ce3db9f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:39 +0100 Subject: x86/speculation: Rework SMT state change arch_smt_update() is only called when the sysfs SMT control knob is changed. This means that when SMT is enabled in the sysfs control knob the system is considered to have SMT active even if all siblings are offline. To allow finegrained control of the speculation mitigations, the actual SMT state is more interesting than the fact that siblings could be enabled. Rework the code, so arch_smt_update() is invoked from each individual CPU hotplug function, and simplify the update function while at it. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.521974984@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 11 +++++------ include/linux/sched/smt.h | 2 ++ kernel/cpu.c | 15 +++++++++------ 3 files changed, 16 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a723af0c4400..5625b323ff32 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -344,16 +345,14 @@ void arch_smt_update(void) return; mutex_lock(&spec_ctrl_mutex); - mask = x86_spec_ctrl_base; - if (cpu_smt_control == CPU_SMT_ENABLED) + + mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; + if (sched_smt_active()) mask |= SPEC_CTRL_STIBP; - else - mask &= ~SPEC_CTRL_STIBP; if (mask != x86_spec_ctrl_base) { pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", - cpu_smt_control == CPU_SMT_ENABLED ? - "Enabling" : "Disabling"); + mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); x86_spec_ctrl_base = mask; on_each_cpu(update_stibp_msr, NULL, 1); } diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h index c9e0be514110..59d3736c454c 100644 --- a/include/linux/sched/smt.h +++ b/include/linux/sched/smt.h @@ -15,4 +15,6 @@ static __always_inline bool sched_smt_active(void) static inline bool sched_smt_active(void) { return false; } #endif +void arch_smt_update(void); + #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 3c7f3b4c453c..91d5c38eb7e5 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -367,6 +368,12 @@ static void lockdep_release_cpus_lock(void) #endif /* CONFIG_HOTPLUG_CPU */ +/* + * Architectures that need SMT-specific errata handling during SMT hotplug + * should override this. + */ +void __weak arch_smt_update(void) { } + #ifdef CONFIG_HOTPLUG_SMT enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; EXPORT_SYMBOL_GPL(cpu_smt_control); @@ -1011,6 +1018,7 @@ out: * concurrent CPU hotplug via cpu_add_remove_lock. */ lockup_detector_cleanup(); + arch_smt_update(); return ret; } @@ -1139,6 +1147,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) ret = cpuhp_up_callbacks(cpu, st, target); out: cpus_write_unlock(); + arch_smt_update(); return ret; } @@ -2055,12 +2064,6 @@ static void cpuhp_online_cpu_device(unsigned int cpu) kobject_uevent(&dev->kobj, KOBJ_ONLINE); } -/* - * Architectures that need SMT-specific errata handling during SMT hotplug - * should override this. - */ -void __weak arch_smt_update(void) { }; - static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { int cpu, ret = 0; -- cgit v1.2.3-70-g09d2 From 130d6f946f6f2a972ee3ec8540b7243ab99abe97 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:40 +0100 Subject: x86/l1tf: Show actual SMT state Use the now exposed real SMT state, not the SMT sysfs control knob state. This reflects the state of the system when the mitigation status is queried. This does not change the warning in the VMX launch code. There the dependency on the control knob makes sense because siblings could be brought online anytime after launching the VM. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.613357354@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 5625b323ff32..2dc4ee2bedcb 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -832,13 +832,14 @@ static ssize_t l1tf_show_state(char *buf) if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED || (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER && - cpu_smt_control == CPU_SMT_ENABLED)) + sched_smt_active())) { return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG, l1tf_vmx_states[l1tf_vmx_mitigation]); + } return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG, l1tf_vmx_states[l1tf_vmx_mitigation], - cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled"); + sched_smt_active() ? "vulnerable" : "disabled"); } #else static ssize_t l1tf_show_state(char *buf) -- cgit v1.2.3-70-g09d2 From 15d6b7aab0793b2de8a05d8a828777dd24db424e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:41 +0100 Subject: x86/speculation: Reorder the spec_v2 code Reorder the code so it is better grouped. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.707122879@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 168 ++++++++++++++++++++++----------------------- 1 file changed, 84 insertions(+), 84 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 2dc4ee2bedcb..c9542b9fb329 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -124,29 +124,6 @@ void __init check_bugs(void) #endif } -/* The kernel command line selection */ -enum spectre_v2_mitigation_cmd { - SPECTRE_V2_CMD_NONE, - SPECTRE_V2_CMD_AUTO, - SPECTRE_V2_CMD_FORCE, - SPECTRE_V2_CMD_RETPOLINE, - SPECTRE_V2_CMD_RETPOLINE_GENERIC, - SPECTRE_V2_CMD_RETPOLINE_AMD, -}; - -static const char *spectre_v2_strings[] = { - [SPECTRE_V2_NONE] = "Vulnerable", - [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", - [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", - [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", -}; - -#undef pr_fmt -#define pr_fmt(fmt) "Spectre V2 : " fmt - -static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = - SPECTRE_V2_NONE; - void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { @@ -216,6 +193,12 @@ static void x86_amd_ssb_disable(void) wrmsrl(MSR_AMD64_LS_CFG, msrval); } +#undef pr_fmt +#define pr_fmt(fmt) "Spectre V2 : " fmt + +static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = + SPECTRE_V2_NONE; + #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -237,18 +220,6 @@ static inline const char *spectre_v2_module_string(void) static inline const char *spectre_v2_module_string(void) { return ""; } #endif -static void __init spec2_print_if_insecure(const char *reason) -{ - if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s selected on command line.\n", reason); -} - -static void __init spec2_print_if_secure(const char *reason) -{ - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s selected on command line.\n", reason); -} - static inline bool match_option(const char *arg, int arglen, const char *opt) { int len = strlen(opt); @@ -256,24 +227,53 @@ static inline bool match_option(const char *arg, int arglen, const char *opt) return len == arglen && !strncmp(arg, opt, len); } +/* The kernel command line selection for spectre v2 */ +enum spectre_v2_mitigation_cmd { + SPECTRE_V2_CMD_NONE, + SPECTRE_V2_CMD_AUTO, + SPECTRE_V2_CMD_FORCE, + SPECTRE_V2_CMD_RETPOLINE, + SPECTRE_V2_CMD_RETPOLINE_GENERIC, + SPECTRE_V2_CMD_RETPOLINE_AMD, +}; + +static const char *spectre_v2_strings[] = { + [SPECTRE_V2_NONE] = "Vulnerable", + [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", + [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", + [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", +}; + static const struct { const char *option; enum spectre_v2_mitigation_cmd cmd; bool secure; } mitigation_options[] = { - { "off", SPECTRE_V2_CMD_NONE, false }, - { "on", SPECTRE_V2_CMD_FORCE, true }, - { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, - { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, - { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, - { "auto", SPECTRE_V2_CMD_AUTO, false }, + { "off", SPECTRE_V2_CMD_NONE, false }, + { "on", SPECTRE_V2_CMD_FORCE, true }, + { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, + { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, + { "auto", SPECTRE_V2_CMD_AUTO, false }, }; +static void __init spec2_print_if_insecure(const char *reason) +{ + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + pr_info("%s selected on command line.\n", reason); +} + +static void __init spec2_print_if_secure(const char *reason) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + pr_info("%s selected on command line.\n", reason); +} + static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) { + enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; char arg[20]; int ret, i; - enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) return SPECTRE_V2_CMD_NONE; @@ -317,48 +317,6 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } -static bool stibp_needed(void) -{ - if (spectre_v2_enabled == SPECTRE_V2_NONE) - return false; - - /* Enhanced IBRS makes using STIBP unnecessary. */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) - return false; - - if (!boot_cpu_has(X86_FEATURE_STIBP)) - return false; - - return true; -} - -static void update_stibp_msr(void *info) -{ - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); -} - -void arch_smt_update(void) -{ - u64 mask; - - if (!stibp_needed()) - return; - - mutex_lock(&spec_ctrl_mutex); - - mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; - if (sched_smt_active()) - mask |= SPEC_CTRL_STIBP; - - if (mask != x86_spec_ctrl_base) { - pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", - mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); - x86_spec_ctrl_base = mask; - on_each_cpu(update_stibp_msr, NULL, 1); - } - mutex_unlock(&spec_ctrl_mutex); -} - static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -462,6 +420,48 @@ specv2_set_mode: arch_smt_update(); } +static bool stibp_needed(void) +{ + if (spectre_v2_enabled == SPECTRE_V2_NONE) + return false; + + /* Enhanced IBRS makes using STIBP unnecessary. */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return false; + + if (!boot_cpu_has(X86_FEATURE_STIBP)) + return false; + + return true; +} + +static void update_stibp_msr(void *info) +{ + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); +} + +void arch_smt_update(void) +{ + u64 mask; + + if (!stibp_needed()) + return; + + mutex_lock(&spec_ctrl_mutex); + + mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; + if (sched_smt_active()) + mask |= SPEC_CTRL_STIBP; + + if (mask != x86_spec_ctrl_base) { + pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", + mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); + x86_spec_ctrl_base = mask; + on_each_cpu(update_stibp_msr, NULL, 1); + } + mutex_unlock(&spec_ctrl_mutex); +} + #undef pr_fmt #define pr_fmt(fmt) "Speculative Store Bypass: " fmt -- cgit v1.2.3-70-g09d2 From 8770709f411763884535662744a3786a1806afd3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:42 +0100 Subject: x86/speculation: Mark string arrays const correctly checkpatch.pl muttered when reshuffling the code: WARNING: static const char * array should probably be static const char * const Fix up all the string arrays. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.800018931@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c9542b9fb329..4fcbccb16200 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -237,7 +237,7 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_RETPOLINE_AMD, }; -static const char *spectre_v2_strings[] = { +static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", @@ -476,7 +476,7 @@ enum ssb_mitigation_cmd { SPEC_STORE_BYPASS_CMD_SECCOMP, }; -static const char *ssb_strings[] = { +static const char * const ssb_strings[] = { [SPEC_STORE_BYPASS_NONE] = "Vulnerable", [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl", @@ -816,7 +816,7 @@ early_param("l1tf", l1tf_cmdline); #define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion" #if IS_ENABLED(CONFIG_KVM_INTEL) -static const char *l1tf_vmx_states[] = { +static const char * const l1tf_vmx_states[] = { [VMENTER_L1D_FLUSH_AUTO] = "auto", [VMENTER_L1D_FLUSH_NEVER] = "vulnerable", [VMENTER_L1D_FLUSH_COND] = "conditional cache flushes", -- cgit v1.2.3-70-g09d2 From 30ba72a990f5096ae08f284de17986461efcc408 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:43 +0100 Subject: x86/speculataion: Mark command line parser data __initdata No point to keep that around. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.893886356@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4fcbccb16200..9279cbabe16e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -248,7 +248,7 @@ static const struct { const char *option; enum spectre_v2_mitigation_cmd cmd; bool secure; -} mitigation_options[] = { +} mitigation_options[] __initdata = { { "off", SPECTRE_V2_CMD_NONE, false }, { "on", SPECTRE_V2_CMD_FORCE, true }, { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, @@ -486,7 +486,7 @@ static const char * const ssb_strings[] = { static const struct { const char *option; enum ssb_mitigation_cmd cmd; -} ssb_mitigation_options[] = { +} ssb_mitigation_options[] __initdata = { { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ -- cgit v1.2.3-70-g09d2 From 495d470e9828500e0155027f230449ac5e29c025 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:44 +0100 Subject: x86/speculation: Unify conditional spectre v2 print functions There is no point in having two functions and a conditional at the call site. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.986890749@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 9279cbabe16e..4f5a6319dca6 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -257,15 +257,9 @@ static const struct { { "auto", SPECTRE_V2_CMD_AUTO, false }, }; -static void __init spec2_print_if_insecure(const char *reason) +static void __init spec_v2_print_cond(const char *reason, bool secure) { - if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s selected on command line.\n", reason); -} - -static void __init spec2_print_if_secure(const char *reason) -{ - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure) pr_info("%s selected on command line.\n", reason); } @@ -309,11 +303,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_AUTO; } - if (mitigation_options[i].secure) - spec2_print_if_secure(mitigation_options[i].option); - else - spec2_print_if_insecure(mitigation_options[i].option); - + spec_v2_print_cond(mitigation_options[i].option, + mitigation_options[i].secure); return cmd; } -- cgit v1.2.3-70-g09d2 From fa1202ef224391b6f5b26cdd44cc50495e8fab54 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:45 +0100 Subject: x86/speculation: Add command line control for indirect branch speculation Add command line control for user space indirect branch speculation mitigations. The new option is: spectre_v2_user= The initial options are: - on: Unconditionally enabled - off: Unconditionally disabled -auto: Kernel selects mitigation (default off for now) When the spectre_v2= command line argument is either 'on' or 'off' this implies that the application to application control follows that state even if a contradicting spectre_v2_user= argument is supplied. Originally-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.082720373@linutronix.de --- Documentation/admin-guide/kernel-parameters.txt | 32 +++++- arch/x86/include/asm/nospec-branch.h | 10 ++ arch/x86/kernel/cpu/bugs.c | 133 +++++++++++++++++++++--- 3 files changed, 156 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 19f4423e70d9..b6e5b33b9d75 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4194,9 +4194,13 @@ spectre_v2= [X86] Control mitigation of Spectre variant 2 (indirect branch speculation) vulnerability. + The default operation protects the kernel from + user space attacks. - on - unconditionally enable - off - unconditionally disable + on - unconditionally enable, implies + spectre_v2_user=on + off - unconditionally disable, implies + spectre_v2_user=off auto - kernel detects whether your CPU model is vulnerable @@ -4206,6 +4210,12 @@ CONFIG_RETPOLINE configuration option, and the compiler with which the kernel was built. + Selecting 'on' will also enable the mitigation + against user space to user space task attacks. + + Selecting 'off' will disable both the kernel and + the user space protections. + Specific mitigations can also be selected manually: retpoline - replace indirect branches @@ -4215,6 +4225,24 @@ Not specifying this option is equivalent to spectre_v2=auto. + spectre_v2_user= + [X86] Control mitigation of Spectre variant 2 + (indirect branch speculation) vulnerability between + user space tasks + + on - Unconditionally enable mitigations. Is + enforced by spectre_v2=on + + off - Unconditionally disable mitigations. Is + enforced by spectre_v2=off + + auto - Kernel selects the mitigation depending on + the available CPU features and vulnerability. + Default is off. + + Not specifying this option is equivalent to + spectre_v2_user=auto. + spec_store_bypass_disable= [HW] Control Speculative Store Bypass (SSB) Disable mitigation (Speculative Store Bypass vulnerability) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index c202a64edd95..be0b0aa780e2 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -3,6 +3,8 @@ #ifndef _ASM_X86_NOSPEC_BRANCH_H_ #define _ASM_X86_NOSPEC_BRANCH_H_ +#include + #include #include #include @@ -226,6 +228,12 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS_ENHANCED, }; +/* The indirect branch speculation control variants */ +enum spectre_v2_user_mitigation { + SPECTRE_V2_USER_NONE, + SPECTRE_V2_USER_STRICT, +}; + /* The Speculative Store Bypass disable variants */ enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, @@ -303,6 +311,8 @@ do { \ preempt_enable(); \ } while (0) +DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4f5a6319dca6..3a223cce1fac 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -54,6 +54,9 @@ static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; u64 __ro_after_init x86_amd_ls_cfg_base; u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; +/* Control conditional STIPB in switch_to() */ +DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp); + void __init check_bugs(void) { identify_boot_cpu(); @@ -199,6 +202,9 @@ static void x86_amd_ssb_disable(void) static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; +static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init = + SPECTRE_V2_USER_NONE; + #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -237,6 +243,104 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_RETPOLINE_AMD, }; +enum spectre_v2_user_cmd { + SPECTRE_V2_USER_CMD_NONE, + SPECTRE_V2_USER_CMD_AUTO, + SPECTRE_V2_USER_CMD_FORCE, +}; + +static const char * const spectre_v2_user_strings[] = { + [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", + [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", +}; + +static const struct { + const char *option; + enum spectre_v2_user_cmd cmd; + bool secure; +} v2_user_options[] __initdata = { + { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, + { "off", SPECTRE_V2_USER_CMD_NONE, false }, + { "on", SPECTRE_V2_USER_CMD_FORCE, true }, +}; + +static void __init spec_v2_user_print_cond(const char *reason, bool secure) +{ + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure) + pr_info("spectre_v2_user=%s forced on command line.\n", reason); +} + +static enum spectre_v2_user_cmd __init +spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) +{ + char arg[20]; + int ret, i; + + switch (v2_cmd) { + case SPECTRE_V2_CMD_NONE: + return SPECTRE_V2_USER_CMD_NONE; + case SPECTRE_V2_CMD_FORCE: + return SPECTRE_V2_USER_CMD_FORCE; + default: + break; + } + + ret = cmdline_find_option(boot_command_line, "spectre_v2_user", + arg, sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_USER_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) { + if (match_option(arg, ret, v2_user_options[i].option)) { + spec_v2_user_print_cond(v2_user_options[i].option, + v2_user_options[i].secure); + return v2_user_options[i].cmd; + } + } + + pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg); + return SPECTRE_V2_USER_CMD_AUTO; +} + +static void __init +spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) +{ + enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; + bool smt_possible = IS_ENABLED(CONFIG_SMP); + + if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP)) + return; + + if (cpu_smt_control == CPU_SMT_FORCE_DISABLED || + cpu_smt_control == CPU_SMT_NOT_SUPPORTED) + smt_possible = false; + + switch (spectre_v2_parse_user_cmdline(v2_cmd)) { + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_NONE: + goto set_mode; + case SPECTRE_V2_USER_CMD_FORCE: + mode = SPECTRE_V2_USER_STRICT; + break; + } + + /* Initialize Indirect Branch Prediction Barrier */ + if (boot_cpu_has(X86_FEATURE_IBPB)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBPB); + pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); + } + + /* If enhanced IBRS is enabled no STIPB required */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return; + +set_mode: + spectre_v2_user = mode; + /* Only print the STIBP mode when SMT possible */ + if (smt_possible) + pr_info("%s\n", spectre_v2_user_strings[mode]); +} + static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", @@ -385,12 +489,6 @@ specv2_set_mode: setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); - /* Initialize Indirect Branch Prediction Barrier if supported */ - if (boot_cpu_has(X86_FEATURE_IBPB)) { - setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); - } - /* * Retpoline means the kernel is safe because it has no indirect * branches. Enhanced IBRS protects firmware too, so, enable restricted @@ -407,23 +505,21 @@ specv2_set_mode: pr_info("Enabling Restricted Speculation for firmware calls\n"); } + /* Set up IBPB and STIBP depending on the general spectre V2 command */ + spectre_v2_user_select_mitigation(cmd); + /* Enable STIBP if appropriate */ arch_smt_update(); } static bool stibp_needed(void) { - if (spectre_v2_enabled == SPECTRE_V2_NONE) - return false; - /* Enhanced IBRS makes using STIBP unnecessary. */ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return false; - if (!boot_cpu_has(X86_FEATURE_STIBP)) - return false; - - return true; + /* Check for strict user mitigation mode */ + return spectre_v2_user == SPECTRE_V2_USER_STRICT; } static void update_stibp_msr(void *info) @@ -844,10 +940,13 @@ static char *stibp_state(void) if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return ""; - if (x86_spec_ctrl_base & SPEC_CTRL_STIBP) - return ", STIBP"; - else - return ""; + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return ", STIBP: disabled"; + case SPECTRE_V2_USER_STRICT: + return ", STIBP: forced"; + } + return ""; } static char *ibpb_state(void) -- cgit v1.2.3-70-g09d2 From 5bfbe3ad5840d941b89bcac54b821ba14f50a0ba Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:46 +0100 Subject: x86/speculation: Prepare for per task indirect branch speculation control To avoid the overhead of STIBP always on, it's necessary to allow per task control of STIBP. Add a new task flag TIF_SPEC_IB and evaluate it during context switch if SMT is active and flag evaluation is enabled by the speculation control code. Add the conditional evaluation to x86_virt_spec_ctrl() as well so the guest/host switch works properly. This has no effect because TIF_SPEC_IB cannot be set yet and the static key which controls evaluation is off. Preparatory patch for adding the control code. [ tglx: Simplify the context switch logic and make the TIF evaluation depend on SMP=y and on the static key controlling the conditional update. Rename it to TIF_SPEC_IB because it controls both STIBP and IBPB ] Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.176917199@linutronix.de --- arch/x86/include/asm/msr-index.h | 5 +++-- arch/x86/include/asm/spec-ctrl.h | 12 ++++++++++++ arch/x86/include/asm/thread_info.h | 5 ++++- arch/x86/kernel/cpu/bugs.c | 4 ++++ arch/x86/kernel/process.c | 20 ++++++++++++++++++-- 5 files changed, 41 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 80f4a4f38c79..c8f73efb4ece 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -41,9 +41,10 @@ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ -#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ +#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ +#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ -#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 8e2f8411c7a7..27b0bce3933b 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -53,12 +53,24 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } +static inline u64 stibp_tif_to_spec_ctrl(u64 tifn) +{ + BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); + return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); +} + static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) { BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } +static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl) +{ + BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); + return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); +} + static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) { return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 523c69efc38a..fa583ec99e3e 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -83,6 +83,7 @@ struct thread_info { #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ +#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_PATCH_PENDING 13 /* pending live patching update */ @@ -110,6 +111,7 @@ struct thread_info { #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_SPEC_IB (1 << TIF_SPEC_IB) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) @@ -146,7 +148,8 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \ + _TIF_SSBD|_TIF_SPEC_IB) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3a223cce1fac..1e13dbfc0919 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -148,6 +148,10 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) static_cpu_has(X86_FEATURE_AMD_SSBD)) hostval |= ssbd_tif_to_spec_ctrl(ti->flags); + /* Conditional STIBP enabled? */ + if (static_branch_unlikely(&switch_to_cond_stibp)) + hostval |= stibp_tif_to_spec_ctrl(ti->flags); + if (hostval != guestval) { msrval = setguest ? guestval : hostval; wrmsrl(MSR_IA32_SPEC_CTRL, msrval); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 70e9832379e1..574b144d2b53 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -404,11 +404,17 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) static __always_inline void __speculation_ctrl_update(unsigned long tifp, unsigned long tifn) { + unsigned long tif_diff = tifp ^ tifn; u64 msr = x86_spec_ctrl_base; bool updmsr = false; - /* If TIF_SSBD is different, select the proper mitigation method */ - if ((tifp ^ tifn) & _TIF_SSBD) { + /* + * If TIF_SSBD is different, select the proper mitigation + * method. Note that if SSBD mitigation is disabled or permanentely + * enabled this branch can't be taken because nothing can set + * TIF_SSBD. + */ + if (tif_diff & _TIF_SSBD) { if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { amd_set_ssb_virt_state(tifn); } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { @@ -420,6 +426,16 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } } + /* + * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled, + * otherwise avoid the MSR write. + */ + if (IS_ENABLED(CONFIG_SMP) && + static_branch_unlikely(&switch_to_cond_stibp)) { + updmsr |= !!(tif_diff & _TIF_SPEC_IB); + msr |= stibp_tif_to_spec_ctrl(tifn); + } + if (updmsr) wrmsrl(MSR_IA32_SPEC_CTRL, msr); } -- cgit v1.2.3-70-g09d2 From ff16701a29cba3aafa0bd1656d766813b2d0a811 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:47 +0100 Subject: x86/process: Consolidate and simplify switch_to_xtra() code Move the conditional invocation of __switch_to_xtra() into an inline function so the logic can be shared between 32 and 64 bit. Remove the handthrough of the TSS pointer and retrieve the pointer directly in the bitmap handling function. Use this_cpu_ptr() instead of the per_cpu() indirection. This is a preparatory change so integration of conditional indirect branch speculation optimization happens only in one place. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.280855518@linutronix.de --- arch/x86/include/asm/switch_to.h | 3 --- arch/x86/kernel/process.c | 12 +++++++----- arch/x86/kernel/process.h | 24 ++++++++++++++++++++++++ arch/x86/kernel/process_32.c | 10 +++------- arch/x86/kernel/process_64.c | 10 +++------- 5 files changed, 37 insertions(+), 22 deletions(-) create mode 100644 arch/x86/kernel/process.h (limited to 'arch') diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 36bd243843d6..7cf1a270d891 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -11,9 +11,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev, __visible struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); -struct tss_struct; -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss); /* This runs runs on the previous thread's stack. */ static inline void prepare_switch_to(struct task_struct *next) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 574b144d2b53..cdf8e6694f71 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -40,6 +40,8 @@ #include #include +#include "process.h" + /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. The TSS size is kept cacheline-aligned @@ -252,11 +254,12 @@ void arch_setup_new_exec(void) enable_cpuid(); } -static inline void switch_to_bitmap(struct tss_struct *tss, - struct thread_struct *prev, +static inline void switch_to_bitmap(struct thread_struct *prev, struct thread_struct *next, unsigned long tifp, unsigned long tifn) { + struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); + if (tifn & _TIF_IO_BITMAP) { /* * Copy the relevant range of the IO bitmap. @@ -448,8 +451,7 @@ void speculation_ctrl_update(unsigned long tif) preempt_enable(); } -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev, *next; unsigned long tifp, tifn; @@ -459,7 +461,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, tifn = READ_ONCE(task_thread_info(next_p)->flags); tifp = READ_ONCE(task_thread_info(prev_p)->flags); - switch_to_bitmap(tss, prev, next, tifp, tifn); + switch_to_bitmap(prev, next, tifp, tifn); propagate_user_return_notify(prev_p, next_p); diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h new file mode 100644 index 000000000000..020fbfac3a27 --- /dev/null +++ b/arch/x86/kernel/process.h @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Code shared between 32 and 64 bit + +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p); + +/* + * This needs to be inline to optimize for the common case where no extra + * work needs to be done. + */ +static inline void switch_to_extra(struct task_struct *prev, + struct task_struct *next) +{ + unsigned long next_tif = task_thread_info(next)->flags; + unsigned long prev_tif = task_thread_info(prev)->flags; + + /* + * __switch_to_xtra() handles debug registers, i/o bitmaps, + * speculation mitigations etc. + */ + if (unlikely(next_tif & _TIF_WORK_CTXSW_NEXT || + prev_tif & _TIF_WORK_CTXSW_PREV)) + __switch_to_xtra(prev, next); +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 5046a3c9dec2..d3e593eb189f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -59,6 +59,8 @@ #include #include +#include "process.h" + void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; @@ -232,7 +234,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *prev_fpu = &prev->fpu; struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ @@ -264,12 +265,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) set_iopl_mask(next->iopl); - /* - * Now maybe handle debug registers and/or IO bitmaps - */ - if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || - task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) - __switch_to_xtra(prev_p, next_p, tss); + switch_to_extra(prev_p, next_p); /* * Leave lazy mode, flushing any hypercalls made here. diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 0e0b4288a4b2..bbfbf017065c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -60,6 +60,8 @@ #include #endif +#include "process.h" + /* Prints also some state that isn't saved in the pt_regs */ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) { @@ -553,7 +555,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *prev_fpu = &prev->fpu; struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(irq_count) != -1); @@ -617,12 +618,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* Reload sp0. */ update_task_stack(next_p); - /* - * Now maybe reload the debug registers and handle I/O bitmaps - */ - if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT || - task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) - __switch_to_xtra(prev_p, next_p, tss); + switch_to_extra(prev_p, next_p); #ifdef CONFIG_XEN_PV /* -- cgit v1.2.3-70-g09d2 From 5635d99953f04b550738f6f4c1c532667c3fd872 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:48 +0100 Subject: x86/speculation: Avoid __switch_to_xtra() calls The TIF_SPEC_IB bit does not need to be evaluated in the decision to invoke __switch_to_xtra() when: - CONFIG_SMP is disabled - The conditional STIPB mode is disabled The TIF_SPEC_IB bit still controls IBPB in both cases so the TIF work mask checks might invoke __switch_to_xtra() for nothing if TIF_SPEC_IB is the only set bit in the work masks. Optimize it out by masking the bit at compile time for CONFIG_SMP=n and at run time when the static key controlling the conditional STIBP mode is disabled. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.374062201@linutronix.de --- arch/x86/include/asm/thread_info.h | 13 +++++++++++-- arch/x86/kernel/process.h | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index fa583ec99e3e..6d201699c651 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -147,9 +147,18 @@ struct thread_info { _TIF_FSCHECK) /* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW \ +#define _TIF_WORK_CTXSW_BASE \ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \ - _TIF_SSBD|_TIF_SPEC_IB) + _TIF_SSBD) + +/* + * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated. + */ +#ifdef CONFIG_SMP +# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE | _TIF_SPEC_IB) +#else +# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE) +#endif #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h index 020fbfac3a27..898e97cf6629 100644 --- a/arch/x86/kernel/process.h +++ b/arch/x86/kernel/process.h @@ -2,6 +2,8 @@ // // Code shared between 32 and 64 bit +#include + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p); /* @@ -14,6 +16,19 @@ static inline void switch_to_extra(struct task_struct *prev, unsigned long next_tif = task_thread_info(next)->flags; unsigned long prev_tif = task_thread_info(prev)->flags; + if (IS_ENABLED(CONFIG_SMP)) { + /* + * Avoid __switch_to_xtra() invocation when conditional + * STIPB is disabled and the only different bit is + * TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not + * in the TIF_WORK_CTXSW masks. + */ + if (!static_branch_likely(&switch_to_cond_stibp)) { + prev_tif &= ~_TIF_SPEC_IB; + next_tif &= ~_TIF_SPEC_IB; + } + } + /* * __switch_to_xtra() handles debug registers, i/o bitmaps, * speculation mitigations etc. -- cgit v1.2.3-70-g09d2 From 4c71a2b6fd7e42814aa68a6dec88abf3b42ea573 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:49 +0100 Subject: x86/speculation: Prepare for conditional IBPB in switch_mm() The IBPB speculation barrier is issued from switch_mm() when the kernel switches to a user space task with a different mm than the user space task which ran last on the same CPU. An additional optimization is to avoid IBPB when the incoming task can be ptraced by the outgoing task. This optimization only works when switching directly between two user space tasks. When switching from a kernel task to a user space task the optimization fails because the previous task cannot be accessed anymore. So for quite some scenarios the optimization is just adding overhead. The upcoming conditional IBPB support will issue IBPB only for user space tasks which have the TIF_SPEC_IB bit set. This requires to handle the following cases: 1) Switch from a user space task (potential attacker) which has TIF_SPEC_IB set to a user space task (potential victim) which has TIF_SPEC_IB not set. 2) Switch from a user space task (potential attacker) which has TIF_SPEC_IB not set to a user space task (potential victim) which has TIF_SPEC_IB set. This needs to be optimized for the case where the IBPB can be avoided when only kernel threads ran in between user space tasks which belong to the same process. The current check whether two tasks belong to the same context is using the tasks context id. While correct, it's simpler to use the mm pointer because it allows to mangle the TIF_SPEC_IB bit into it. The context id based mechanism requires extra storage, which creates worse code. When a task is scheduled out its TIF_SPEC_IB bit is mangled as bit 0 into the per CPU storage which is used to track the last user space mm which was running on a CPU. This bit can be used together with the TIF_SPEC_IB bit of the incoming task to make the decision whether IBPB needs to be issued or not to cover the two cases above. As conditional IBPB is going to be the default, remove the dubious ptrace check for the IBPB always case and simply issue IBPB always when the process changes. Move the storage to a different place in the struct as the original one created a hole. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.466447057@linutronix.de --- arch/x86/include/asm/nospec-branch.h | 2 + arch/x86/include/asm/tlbflush.h | 8 ++- arch/x86/kernel/cpu/bugs.c | 29 +++++++-- arch/x86/mm/tlb.c | 115 ++++++++++++++++++++++++++--------- 4 files changed, 118 insertions(+), 36 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index be0b0aa780e2..d4d35baf0430 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -312,6 +312,8 @@ do { \ } while (0) DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); +DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); +DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index d760611cfc35..f4204bf377fc 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -169,10 +169,14 @@ struct tlb_state { #define LOADED_MM_SWITCHING ((struct mm_struct *)1) + /* Last user mm for optimizing IBPB */ + union { + struct mm_struct *last_user_mm; + unsigned long last_user_mm_ibpb; + }; + u16 loaded_mm_asid; u16 next_asid; - /* last user mm's ctx id */ - u64 last_ctx_id; /* * We can be in one of several states: diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1e13dbfc0919..7c946a9af947 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -56,6 +56,10 @@ u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; /* Control conditional STIPB in switch_to() */ DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp); +/* Control conditional IBPB in switch_mm() */ +DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); +/* Control unconditional IBPB in switch_mm() */ +DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); void __init check_bugs(void) { @@ -331,7 +335,17 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) /* Initialize Indirect Branch Prediction Barrier */ if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); + + switch (mode) { + case SPECTRE_V2_USER_STRICT: + static_branch_enable(&switch_mm_always_ibpb); + break; + default: + break; + } + + pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", + mode == SPECTRE_V2_USER_STRICT ? "always-on" : "conditional"); } /* If enhanced IBRS is enabled no STIPB required */ @@ -955,10 +969,15 @@ static char *stibp_state(void) static char *ibpb_state(void) { - if (boot_cpu_has(X86_FEATURE_USE_IBPB)) - return ", IBPB"; - else - return ""; + if (boot_cpu_has(X86_FEATURE_IBPB)) { + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return ", IBPB: disabled"; + case SPECTRE_V2_USER_STRICT: + return ", IBPB: always-on"; + } + } + return ""; } static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index bddd6b3cee1d..03b6b4c2238d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -30,6 +29,12 @@ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi */ +/* + * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is + * stored in cpu_tlb_state.last_user_mm_ibpb. + */ +#define LAST_USER_MM_IBPB 0x1UL + /* * We get here when we do something requiring a TLB invalidation * but could not go invalidate all of the contexts. We do the @@ -181,17 +186,87 @@ static void sync_current_stack_to_mm(struct mm_struct *mm) } } -static bool ibpb_needed(struct task_struct *tsk, u64 last_ctx_id) +static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next) +{ + unsigned long next_tif = task_thread_info(next)->flags; + unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB; + + return (unsigned long)next->mm | ibpb; +} + +static void cond_ibpb(struct task_struct *next) { + if (!next || !next->mm) + return; + /* - * Check if the current (previous) task has access to the memory - * of the @tsk (next) task. If access is denied, make sure to - * issue a IBPB to stop user->user Spectre-v2 attacks. - * - * Note: __ptrace_may_access() returns 0 or -ERRNO. + * Both, the conditional and the always IBPB mode use the mm + * pointer to avoid the IBPB when switching between tasks of the + * same process. Using the mm pointer instead of mm->context.ctx_id + * opens a hypothetical hole vs. mm_struct reuse, which is more or + * less impossible to control by an attacker. Aside of that it + * would only affect the first schedule so the theoretically + * exposed data is not really interesting. */ - return (tsk && tsk->mm && tsk->mm->context.ctx_id != last_ctx_id && - ptrace_may_access_sched(tsk, PTRACE_MODE_SPEC_IBPB)); + if (static_branch_likely(&switch_mm_cond_ibpb)) { + unsigned long prev_mm, next_mm; + + /* + * This is a bit more complex than the always mode because + * it has to handle two cases: + * + * 1) Switch from a user space task (potential attacker) + * which has TIF_SPEC_IB set to a user space task + * (potential victim) which has TIF_SPEC_IB not set. + * + * 2) Switch from a user space task (potential attacker) + * which has TIF_SPEC_IB not set to a user space task + * (potential victim) which has TIF_SPEC_IB set. + * + * This could be done by unconditionally issuing IBPB when + * a task which has TIF_SPEC_IB set is either scheduled in + * or out. Though that results in two flushes when: + * + * - the same user space task is scheduled out and later + * scheduled in again and only a kernel thread ran in + * between. + * + * - a user space task belonging to the same process is + * scheduled in after a kernel thread ran in between + * + * - a user space task belonging to the same process is + * scheduled in immediately. + * + * Optimize this with reasonably small overhead for the + * above cases. Mangle the TIF_SPEC_IB bit into the mm + * pointer of the incoming task which is stored in + * cpu_tlbstate.last_user_mm_ibpb for comparison. + */ + next_mm = mm_mangle_tif_spec_ib(next); + prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb); + + /* + * Issue IBPB only if the mm's are different and one or + * both have the IBPB bit set. + */ + if (next_mm != prev_mm && + (next_mm | prev_mm) & LAST_USER_MM_IBPB) + indirect_branch_prediction_barrier(); + + this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm); + } + + if (static_branch_unlikely(&switch_mm_always_ibpb)) { + /* + * Only flush when switching to a user space task with a + * different context than the user space task which ran + * last on this CPU. + */ + if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) { + indirect_branch_prediction_barrier(); + this_cpu_write(cpu_tlbstate.last_user_mm, next->mm); + } + } } void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, @@ -292,22 +367,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, new_asid = prev_asid; need_flush = true; } else { - u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); - /* * Avoid user/user BTB poisoning by flushing the branch * predictor when switching between processes. This stops * one process from doing Spectre-v2 attacks on another. - * - * As an optimization, flush indirect branches only when - * switching into a processes that can't be ptrace by the - * current one (as in such case, attacker has much more - * convenient way how to tamper with the next process than - * branch buffer poisoning). */ - if (static_cpu_has(X86_FEATURE_USE_IBPB) && - ibpb_needed(tsk, last_ctx_id)) - indirect_branch_prediction_barrier(); + cond_ibpb(tsk); if (IS_ENABLED(CONFIG_VMAP_STACK)) { /* @@ -365,14 +430,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); } - /* - * Record last user mm's context id, so we can avoid - * flushing branch buffer with IBPB if we switch back - * to the same user. - */ - if (next != &init_mm) - this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); - /* Make sure we write CR3 before loaded_mm. */ barrier(); @@ -441,7 +498,7 @@ void initialize_tlbstate_and_flush(void) write_cr3(build_cr3(mm->pgd, 0)); /* Reinitialize tlbstate. */ - this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id); + this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB); this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); this_cpu_write(cpu_tlbstate.next_asid, 1); this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); -- cgit v1.2.3-70-g09d2 From e6da8bb6f9abb2628381904b24163c770e630bac Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:51 +0100 Subject: x86/speculation: Split out TIF update The update of the TIF_SSBD flag and the conditional speculation control MSR update is done in the ssb_prctl_set() function directly. The upcoming prctl support for controlling indirect branch speculation via STIBP needs the same mechanism. Split the code out and make it reusable. Reword the comment about updates for other tasks. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.652305076@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7c946a9af947..3b65a53d2c33 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -702,10 +702,29 @@ static void ssb_select_mitigation(void) #undef pr_fmt #define pr_fmt(fmt) "Speculation prctl: " fmt -static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) +static void task_update_spec_tif(struct task_struct *tsk, int tifbit, bool on) { bool update; + if (on) + update = !test_and_set_tsk_thread_flag(tsk, tifbit); + else + update = test_and_clear_tsk_thread_flag(tsk, tifbit); + + /* + * Immediately update the speculation control MSRs for the current + * task, but for a non-current task delay setting the CPU + * mitigation until it is scheduled next. + * + * This can only happen for SECCOMP mitigation. For PRCTL it's + * always the current task. + */ + if (tsk == current && update) + speculation_ctrl_update_current(); +} + +static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) +{ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL && ssb_mode != SPEC_STORE_BYPASS_SECCOMP) return -ENXIO; @@ -716,28 +735,20 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) if (task_spec_ssb_force_disable(task)) return -EPERM; task_clear_spec_ssb_disable(task); - update = test_and_clear_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task, TIF_SSBD, false); break; case PR_SPEC_DISABLE: task_set_spec_ssb_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task, TIF_SSBD, true); break; case PR_SPEC_FORCE_DISABLE: task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task, TIF_SSBD, true); break; default: return -ERANGE; } - - /* - * If being set on non-current task, delay setting the CPU - * mitigation until it is next scheduled. - */ - if (task == current && update) - speculation_ctrl_update_current(); - return 0; } -- cgit v1.2.3-70-g09d2 From 6d991ba509ebcfcc908e009d1db51972a4f7a064 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 28 Nov 2018 10:56:57 +0100 Subject: x86/speculation: Prevent stale SPEC_CTRL msr content The seccomp speculation control operates on all tasks of a process, but only the current task of a process can update the MSR immediately. For the other threads the update is deferred to the next context switch. This creates the following situation with Process A and B: Process A task 2 and Process B task 1 are pinned on CPU1. Process A task 2 does not have the speculation control TIF bit set. Process B task 1 has the speculation control TIF bit set. CPU0 CPU1 MSR bit is set ProcB.T1 schedules out ProcA.T2 schedules in MSR bit is cleared ProcA.T1 seccomp_update() set TIF bit on ProcA.T2 ProcB.T1 schedules in MSR is not updated <-- FAIL This happens because the context switch code tries to avoid the MSR update if the speculation control TIF bits of the incoming and the outgoing task are the same. In the worst case ProcB.T1 and ProcA.T2 are the only tasks scheduling back and forth on CPU1, which keeps the MSR stale forever. In theory this could be remedied by IPIs, but chasing the remote task which could be migrated is complex and full of races. The straight forward solution is to avoid the asychronous update of the TIF bit and defer it to the next context switch. The speculation control state is stored in task_struct::atomic_flags by the prctl and seccomp updates already. Add a new TIF_SPEC_FORCE_UPDATE bit and set this after updating the atomic_flags. Check the bit on context switch and force a synchronous update of the speculation control if set. Use the same mechanism for updating the current task. Reported-by: Tim Chen Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1811272247140.1875@nanos.tec.linutronix.de --- arch/x86/include/asm/spec-ctrl.h | 6 +----- arch/x86/include/asm/thread_info.h | 4 +++- arch/x86/kernel/cpu/bugs.c | 18 +++++++----------- arch/x86/kernel/process.c | 30 +++++++++++++++++++++++++++++- 4 files changed, 40 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 27b0bce3933b..5393babc0598 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -83,10 +83,6 @@ static inline void speculative_store_bypass_ht_init(void) { } #endif extern void speculation_ctrl_update(unsigned long tif); - -static inline void speculation_ctrl_update_current(void) -{ - speculation_ctrl_update(current_thread_info()->flags); -} +extern void speculation_ctrl_update_current(void); #endif diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 6d201699c651..82b73b75d67c 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -84,6 +84,7 @@ struct thread_info { #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ #define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ +#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_PATCH_PENDING 13 /* pending live patching update */ @@ -112,6 +113,7 @@ struct thread_info { #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_SPEC_IB (1 << TIF_SPEC_IB) +#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) @@ -149,7 +151,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW_BASE \ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \ - _TIF_SSBD) + _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE) /* * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3b65a53d2c33..29f40a92f5a8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -702,14 +702,10 @@ static void ssb_select_mitigation(void) #undef pr_fmt #define pr_fmt(fmt) "Speculation prctl: " fmt -static void task_update_spec_tif(struct task_struct *tsk, int tifbit, bool on) +static void task_update_spec_tif(struct task_struct *tsk) { - bool update; - - if (on) - update = !test_and_set_tsk_thread_flag(tsk, tifbit); - else - update = test_and_clear_tsk_thread_flag(tsk, tifbit); + /* Force the update of the real TIF bits */ + set_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE); /* * Immediately update the speculation control MSRs for the current @@ -719,7 +715,7 @@ static void task_update_spec_tif(struct task_struct *tsk, int tifbit, bool on) * This can only happen for SECCOMP mitigation. For PRCTL it's * always the current task. */ - if (tsk == current && update) + if (tsk == current) speculation_ctrl_update_current(); } @@ -735,16 +731,16 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) if (task_spec_ssb_force_disable(task)) return -EPERM; task_clear_spec_ssb_disable(task); - task_update_spec_tif(task, TIF_SSBD, false); + task_update_spec_tif(task); break; case PR_SPEC_DISABLE: task_set_spec_ssb_disable(task); - task_update_spec_tif(task, TIF_SSBD, true); + task_update_spec_tif(task); break; case PR_SPEC_FORCE_DISABLE: task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); - task_update_spec_tif(task, TIF_SSBD, true); + task_update_spec_tif(task); break; default: return -ERANGE; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index cdf8e6694f71..afbe2eb4a1c6 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -443,6 +443,18 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, wrmsrl(MSR_IA32_SPEC_CTRL, msr); } +static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) +{ + if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) { + if (task_spec_ssb_disable(tsk)) + set_tsk_thread_flag(tsk, TIF_SSBD); + else + clear_tsk_thread_flag(tsk, TIF_SSBD); + } + /* Return the updated threadinfo flags*/ + return task_thread_info(tsk)->flags; +} + void speculation_ctrl_update(unsigned long tif) { /* Forced update. Make sure all relevant TIF flags are different */ @@ -451,6 +463,14 @@ void speculation_ctrl_update(unsigned long tif) preempt_enable(); } +/* Called from seccomp/prctl update */ +void speculation_ctrl_update_current(void) +{ + preempt_disable(); + speculation_ctrl_update(speculation_ctrl_update_tif(current)); + preempt_enable(); +} + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev, *next; @@ -482,7 +502,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) if ((tifp ^ tifn) & _TIF_NOCPUID) set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); - __speculation_ctrl_update(tifp, tifn); + if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) { + __speculation_ctrl_update(tifp, tifn); + } else { + speculation_ctrl_update_tif(prev_p); + tifn = speculation_ctrl_update_tif(next_p); + + /* Enforce MSR update to ensure consistent state */ + __speculation_ctrl_update(~tifn, tifn); + } } /* -- cgit v1.2.3-70-g09d2 From 6893a959d7fdebbab5f5aa112c277d5a44435ba1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:52 +0100 Subject: x86/speculation: Prepare arch_smt_update() for PRCTL mode The upcoming fine grained per task STIBP control needs to be updated on CPU hotplug as well. Split out the code which controls the strict mode so the prctl control code can be added later. Mark the SMP function call argument __unused while at it. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.759457117@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 29f40a92f5a8..9cab538e10f1 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -530,40 +530,44 @@ specv2_set_mode: arch_smt_update(); } -static bool stibp_needed(void) +static void update_stibp_msr(void * __unused) { - /* Enhanced IBRS makes using STIBP unnecessary. */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) - return false; - - /* Check for strict user mitigation mode */ - return spectre_v2_user == SPECTRE_V2_USER_STRICT; + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); } -static void update_stibp_msr(void *info) +/* Update x86_spec_ctrl_base in case SMT state changed. */ +static void update_stibp_strict(void) { - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; + + if (sched_smt_active()) + mask |= SPEC_CTRL_STIBP; + + if (mask == x86_spec_ctrl_base) + return; + + pr_info("Update user space SMT mitigation: STIBP %s\n", + mask & SPEC_CTRL_STIBP ? "always-on" : "off"); + x86_spec_ctrl_base = mask; + on_each_cpu(update_stibp_msr, NULL, 1); } void arch_smt_update(void) { - u64 mask; - - if (!stibp_needed()) + /* Enhanced IBRS implies STIBP. No update required. */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return; mutex_lock(&spec_ctrl_mutex); - mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; - if (sched_smt_active()) - mask |= SPEC_CTRL_STIBP; - - if (mask != x86_spec_ctrl_base) { - pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", - mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); - x86_spec_ctrl_base = mask; - on_each_cpu(update_stibp_msr, NULL, 1); + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + break; + case SPECTRE_V2_USER_STRICT: + update_stibp_strict(); + break; } + mutex_unlock(&spec_ctrl_mutex); } -- cgit v1.2.3-70-g09d2 From 9137bb27e60e554dab694eafa4cca241fa3a694f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:53 +0100 Subject: x86/speculation: Add prctl() control for indirect branch speculation Add the PR_SPEC_INDIRECT_BRANCH option for the PR_GET_SPECULATION_CTRL and PR_SET_SPECULATION_CTRL prctls to allow fine grained per task control of indirect branch speculation via STIBP and IBPB. Invocations: Check indirect branch speculation status with - prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0); Enable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0); Disable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0); Force disable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0); See Documentation/userspace-api/spec_ctrl.rst. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.866780996@linutronix.de --- Documentation/userspace-api/spec_ctrl.rst | 9 +++++ arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 67 +++++++++++++++++++++++++++++++ arch/x86/kernel/process.c | 5 +++ include/linux/sched.h | 9 +++++ include/uapi/linux/prctl.h | 1 + tools/include/uapi/linux/prctl.h | 1 + 7 files changed, 93 insertions(+) (limited to 'arch') diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst index 32f3d55c54b7..c4dbe6f7cdae 100644 --- a/Documentation/userspace-api/spec_ctrl.rst +++ b/Documentation/userspace-api/spec_ctrl.rst @@ -92,3 +92,12 @@ Speculation misfeature controls * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0); * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0); + +- PR_SPEC_INDIR_BRANCH: Indirect Branch Speculation in User Processes + (Mitigate Spectre V2 style attacks against user processes) + + Invocations: + * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0); + * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0); diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index d4d35baf0430..2adbe7b047fa 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -232,6 +232,7 @@ enum spectre_v2_mitigation { enum spectre_v2_user_mitigation { SPECTRE_V2_USER_NONE, SPECTRE_V2_USER_STRICT, + SPECTRE_V2_USER_PRCTL, }; /* The Speculative Store Bypass disable variants */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 9cab538e10f1..74359fff87fd 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -566,6 +566,8 @@ void arch_smt_update(void) case SPECTRE_V2_USER_STRICT: update_stibp_strict(); break; + case SPECTRE_V2_USER_PRCTL: + break; } mutex_unlock(&spec_ctrl_mutex); @@ -752,12 +754,50 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) return 0; } +static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + switch (ctrl) { + case PR_SPEC_ENABLE: + if (spectre_v2_user == SPECTRE_V2_USER_NONE) + return 0; + /* + * Indirect branch speculation is always disabled in strict + * mode. + */ + if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + return -EPERM; + task_clear_spec_ib_disable(task); + task_update_spec_tif(task); + break; + case PR_SPEC_DISABLE: + case PR_SPEC_FORCE_DISABLE: + /* + * Indirect branch speculation is always allowed when + * mitigation is force disabled. + */ + if (spectre_v2_user == SPECTRE_V2_USER_NONE) + return -EPERM; + if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + return 0; + task_set_spec_ib_disable(task); + if (ctrl == PR_SPEC_FORCE_DISABLE) + task_set_spec_ib_force_disable(task); + task_update_spec_tif(task); + break; + default: + return -ERANGE; + } + return 0; +} + int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, unsigned long ctrl) { switch (which) { case PR_SPEC_STORE_BYPASS: return ssb_prctl_set(task, ctrl); + case PR_SPEC_INDIRECT_BRANCH: + return ib_prctl_set(task, ctrl); default: return -ENODEV; } @@ -790,11 +830,34 @@ static int ssb_prctl_get(struct task_struct *task) } } +static int ib_prctl_get(struct task_struct *task) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + return PR_SPEC_NOT_AFFECTED; + + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return PR_SPEC_ENABLE; + case SPECTRE_V2_USER_PRCTL: + if (task_spec_ib_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ib_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + case SPECTRE_V2_USER_STRICT: + return PR_SPEC_DISABLE; + default: + return PR_SPEC_NOT_AFFECTED; + } +} + int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) { switch (which) { case PR_SPEC_STORE_BYPASS: return ssb_prctl_get(task); + case PR_SPEC_INDIRECT_BRANCH: + return ib_prctl_get(task); default: return -ENODEV; } @@ -974,6 +1037,8 @@ static char *stibp_state(void) return ", STIBP: disabled"; case SPECTRE_V2_USER_STRICT: return ", STIBP: forced"; + case SPECTRE_V2_USER_PRCTL: + return ""; } return ""; } @@ -986,6 +1051,8 @@ static char *ibpb_state(void) return ", IBPB: disabled"; case SPECTRE_V2_USER_STRICT: return ", IBPB: always-on"; + case SPECTRE_V2_USER_PRCTL: + return ""; } } return ""; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index afbe2eb4a1c6..7d31192296a8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -450,6 +450,11 @@ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) set_tsk_thread_flag(tsk, TIF_SSBD); else clear_tsk_thread_flag(tsk, TIF_SSBD); + + if (task_spec_ib_disable(tsk)) + set_tsk_thread_flag(tsk, TIF_SPEC_IB); + else + clear_tsk_thread_flag(tsk, TIF_SPEC_IB); } /* Return the updated threadinfo flags*/ return task_thread_info(tsk)->flags; diff --git a/include/linux/sched.h b/include/linux/sched.h index a51c13c2b1a0..d607db5fcc6a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1453,6 +1453,8 @@ static inline bool is_percpu_thread(void) #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ #define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */ #define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/ +#define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */ +#define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */ #define TASK_PFA_TEST(name, func) \ static inline bool task_##func(struct task_struct *p) \ @@ -1484,6 +1486,13 @@ TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) +TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable) +TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable) +TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable) + +TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) +TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) + static inline void current_restore_flags(unsigned long orig_flags, unsigned long flags) { diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index c0d7ea0bf5b6..b17201edfa09 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -212,6 +212,7 @@ struct prctl_mm_map { #define PR_SET_SPECULATION_CTRL 53 /* Speculation control variants */ # define PR_SPEC_STORE_BYPASS 0 +# define PR_SPEC_INDIRECT_BRANCH 1 /* Return and control values for PR_SET/GET_SPECULATION_CTRL */ # define PR_SPEC_NOT_AFFECTED 0 # define PR_SPEC_PRCTL (1UL << 0) diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index c0d7ea0bf5b6..b17201edfa09 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -212,6 +212,7 @@ struct prctl_mm_map { #define PR_SET_SPECULATION_CTRL 53 /* Speculation control variants */ # define PR_SPEC_STORE_BYPASS 0 +# define PR_SPEC_INDIRECT_BRANCH 1 /* Return and control values for PR_SET/GET_SPECULATION_CTRL */ # define PR_SPEC_NOT_AFFECTED 0 # define PR_SPEC_PRCTL (1UL << 0) -- cgit v1.2.3-70-g09d2 From 7cc765a67d8e04ef7d772425ca5a2a1e2b894c15 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:54 +0100 Subject: x86/speculation: Enable prctl mode for spectre_v2_user Now that all prerequisites are in place: - Add the prctl command line option - Default the 'auto' mode to 'prctl' - When SMT state changes, update the static key which controls the conditional STIBP evaluation on context switch. - At init update the static key which controls the conditional IBPB evaluation on context switch. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.958421388@linutronix.de --- Documentation/admin-guide/kernel-parameters.txt | 7 ++++- arch/x86/kernel/cpu/bugs.c | 41 +++++++++++++++++++------ 2 files changed, 38 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b6e5b33b9d75..a9b98a4e8789 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4236,9 +4236,14 @@ off - Unconditionally disable mitigations. Is enforced by spectre_v2=off + prctl - Indirect branch speculation is enabled, + but mitigation can be enabled via prctl + per thread. The mitigation control state + is inherited on fork. + auto - Kernel selects the mitigation depending on the available CPU features and vulnerability. - Default is off. + Default is prctl. Not specifying this option is equivalent to spectre_v2_user=auto. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 74359fff87fd..d0137d10f9a6 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -255,11 +255,13 @@ enum spectre_v2_user_cmd { SPECTRE_V2_USER_CMD_NONE, SPECTRE_V2_USER_CMD_AUTO, SPECTRE_V2_USER_CMD_FORCE, + SPECTRE_V2_USER_CMD_PRCTL, }; static const char * const spectre_v2_user_strings[] = { [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", + [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl", }; static const struct { @@ -270,6 +272,7 @@ static const struct { { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, { "off", SPECTRE_V2_USER_CMD_NONE, false }, { "on", SPECTRE_V2_USER_CMD_FORCE, true }, + { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, }; static void __init spec_v2_user_print_cond(const char *reason, bool secure) @@ -324,12 +327,15 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) smt_possible = false; switch (spectre_v2_parse_user_cmdline(v2_cmd)) { - case SPECTRE_V2_USER_CMD_AUTO: case SPECTRE_V2_USER_CMD_NONE: goto set_mode; case SPECTRE_V2_USER_CMD_FORCE: mode = SPECTRE_V2_USER_STRICT; break; + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_PRCTL: + mode = SPECTRE_V2_USER_PRCTL; + break; } /* Initialize Indirect Branch Prediction Barrier */ @@ -340,6 +346,9 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) case SPECTRE_V2_USER_STRICT: static_branch_enable(&switch_mm_always_ibpb); break; + case SPECTRE_V2_USER_PRCTL: + static_branch_enable(&switch_mm_cond_ibpb); + break; default: break; } @@ -352,6 +361,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return; + /* + * If SMT is not possible or STIBP is not available clear the STIPB + * mode. + */ + if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP)) + mode = SPECTRE_V2_USER_NONE; set_mode: spectre_v2_user = mode; /* Only print the STIBP mode when SMT possible */ @@ -552,6 +567,15 @@ static void update_stibp_strict(void) on_each_cpu(update_stibp_msr, NULL, 1); } +/* Update the static key controlling the evaluation of TIF_SPEC_IB */ +static void update_indir_branch_cond(void) +{ + if (sched_smt_active()) + static_branch_enable(&switch_to_cond_stibp); + else + static_branch_disable(&switch_to_cond_stibp); +} + void arch_smt_update(void) { /* Enhanced IBRS implies STIBP. No update required. */ @@ -567,6 +591,7 @@ void arch_smt_update(void) update_stibp_strict(); break; case SPECTRE_V2_USER_PRCTL: + update_indir_branch_cond(); break; } @@ -1038,7 +1063,8 @@ static char *stibp_state(void) case SPECTRE_V2_USER_STRICT: return ", STIBP: forced"; case SPECTRE_V2_USER_PRCTL: - return ""; + if (static_key_enabled(&switch_to_cond_stibp)) + return ", STIBP: conditional"; } return ""; } @@ -1046,14 +1072,11 @@ static char *stibp_state(void) static char *ibpb_state(void) { if (boot_cpu_has(X86_FEATURE_IBPB)) { - switch (spectre_v2_user) { - case SPECTRE_V2_USER_NONE: - return ", IBPB: disabled"; - case SPECTRE_V2_USER_STRICT: + if (static_key_enabled(&switch_mm_always_ibpb)) return ", IBPB: always-on"; - case SPECTRE_V2_USER_PRCTL: - return ""; - } + if (static_key_enabled(&switch_mm_cond_ibpb)) + return ", IBPB: conditional"; + return ", IBPB: disabled"; } return ""; } -- cgit v1.2.3-70-g09d2 From 6b3e64c237c072797a9ec918654a60e3a46488e2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:55 +0100 Subject: x86/speculation: Add seccomp Spectre v2 user space protection mode If 'prctl' mode of user space protection from spectre v2 is selected on the kernel command-line, STIBP and IBPB are applied on tasks which restrict their indirect branch speculation via prctl. SECCOMP enables the SSBD mitigation for sandboxed tasks already, so it makes sense to prevent spectre v2 user space to user space attacks as well. The Intel mitigation guide documents how STIPB works: Setting bit 1 (STIBP) of the IA32_SPEC_CTRL MSR on a logical processor prevents the predicted targets of indirect branches on any logical processor of that core from being controlled by software that executes (or executed previously) on another logical processor of the same core. Ergo setting STIBP protects the task itself from being attacked from a task running on a different hyper-thread and protects the tasks running on different hyper-threads from being attacked. While the document suggests that the branch predictors are shielded between the logical processors, the observed performance regressions suggest that STIBP simply disables the branch predictor more or less completely. Of course the document wording is vague, but the fact that there is also no requirement for issuing IBPB when STIBP is used points clearly in that direction. The kernel still issues IBPB even when STIBP is used until Intel clarifies the whole mechanism. IBPB is issued when the task switches out, so malicious sandbox code cannot mistrain the branch predictor for the next user space task on the same logical processor. Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185006.051663132@linutronix.de --- Documentation/admin-guide/kernel-parameters.txt | 9 ++++++++- arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 17 ++++++++++++++++- 3 files changed, 25 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a9b98a4e8789..f405281bb202 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4241,9 +4241,16 @@ per thread. The mitigation control state is inherited on fork. + seccomp + - Same as "prctl" above, but all seccomp + threads will enable the mitigation unless + they explicitly opt out. + auto - Kernel selects the mitigation depending on the available CPU features and vulnerability. - Default is prctl. + + Default mitigation: + If CONFIG_SECCOMP=y then "seccomp", otherwise "prctl" Not specifying this option is equivalent to spectre_v2_user=auto. diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 2adbe7b047fa..032b6009baab 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -233,6 +233,7 @@ enum spectre_v2_user_mitigation { SPECTRE_V2_USER_NONE, SPECTRE_V2_USER_STRICT, SPECTRE_V2_USER_PRCTL, + SPECTRE_V2_USER_SECCOMP, }; /* The Speculative Store Bypass disable variants */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d0137d10f9a6..c9e304960534 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -256,12 +256,14 @@ enum spectre_v2_user_cmd { SPECTRE_V2_USER_CMD_AUTO, SPECTRE_V2_USER_CMD_FORCE, SPECTRE_V2_USER_CMD_PRCTL, + SPECTRE_V2_USER_CMD_SECCOMP, }; static const char * const spectre_v2_user_strings[] = { [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl", + [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl", }; static const struct { @@ -273,6 +275,7 @@ static const struct { { "off", SPECTRE_V2_USER_CMD_NONE, false }, { "on", SPECTRE_V2_USER_CMD_FORCE, true }, { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, + { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false }, }; static void __init spec_v2_user_print_cond(const char *reason, bool secure) @@ -332,10 +335,16 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) case SPECTRE_V2_USER_CMD_FORCE: mode = SPECTRE_V2_USER_STRICT; break; - case SPECTRE_V2_USER_CMD_AUTO: case SPECTRE_V2_USER_CMD_PRCTL: mode = SPECTRE_V2_USER_PRCTL; break; + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_SECCOMP: + if (IS_ENABLED(CONFIG_SECCOMP)) + mode = SPECTRE_V2_USER_SECCOMP; + else + mode = SPECTRE_V2_USER_PRCTL; + break; } /* Initialize Indirect Branch Prediction Barrier */ @@ -347,6 +356,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) static_branch_enable(&switch_mm_always_ibpb); break; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: static_branch_enable(&switch_mm_cond_ibpb); break; default: @@ -591,6 +601,7 @@ void arch_smt_update(void) update_stibp_strict(); break; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: update_indir_branch_cond(); break; } @@ -833,6 +844,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task) { if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); + if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP) + ib_prctl_set(task, PR_SPEC_FORCE_DISABLE); } #endif @@ -864,6 +877,7 @@ static int ib_prctl_get(struct task_struct *task) case SPECTRE_V2_USER_NONE: return PR_SPEC_ENABLE; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: if (task_spec_ib_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; if (task_spec_ib_disable(task)) @@ -1063,6 +1077,7 @@ static char *stibp_state(void) case SPECTRE_V2_USER_STRICT: return ", STIBP: forced"; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: if (static_key_enabled(&switch_to_cond_stibp)) return ", STIBP: conditional"; } -- cgit v1.2.3-70-g09d2 From 55a974021ec952ee460dc31ca08722158639de72 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:56 +0100 Subject: x86/speculation: Provide IBPB always command line options Provide the possibility to enable IBPB always in combination with 'prctl' and 'seccomp'. Add the extra command line options and rework the IBPB selection to evaluate the command instead of the mode selected by the STIPB switch case. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185006.144047038@linutronix.de --- Documentation/admin-guide/kernel-parameters.txt | 12 +++++++++ arch/x86/kernel/cpu/bugs.c | 34 +++++++++++++++++-------- 2 files changed, 35 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f405281bb202..05a252e5178d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4241,11 +4241,23 @@ per thread. The mitigation control state is inherited on fork. + prctl,ibpb + - Like "prctl" above, but only STIBP is + controlled per thread. IBPB is issued + always when switching between different user + space processes. + seccomp - Same as "prctl" above, but all seccomp threads will enable the mitigation unless they explicitly opt out. + seccomp,ibpb + - Like "seccomp" above, but only STIBP is + controlled per thread. IBPB is issued + always when switching between different + user space processes. + auto - Kernel selects the mitigation depending on the available CPU features and vulnerability. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c9e304960534..500278f5308e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -256,7 +256,9 @@ enum spectre_v2_user_cmd { SPECTRE_V2_USER_CMD_AUTO, SPECTRE_V2_USER_CMD_FORCE, SPECTRE_V2_USER_CMD_PRCTL, + SPECTRE_V2_USER_CMD_PRCTL_IBPB, SPECTRE_V2_USER_CMD_SECCOMP, + SPECTRE_V2_USER_CMD_SECCOMP_IBPB, }; static const char * const spectre_v2_user_strings[] = { @@ -271,11 +273,13 @@ static const struct { enum spectre_v2_user_cmd cmd; bool secure; } v2_user_options[] __initdata = { - { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, - { "off", SPECTRE_V2_USER_CMD_NONE, false }, - { "on", SPECTRE_V2_USER_CMD_FORCE, true }, - { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, - { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false }, + { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, + { "off", SPECTRE_V2_USER_CMD_NONE, false }, + { "on", SPECTRE_V2_USER_CMD_FORCE, true }, + { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, + { "prctl,ibpb", SPECTRE_V2_USER_CMD_PRCTL_IBPB, false }, + { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false }, + { "seccomp,ibpb", SPECTRE_V2_USER_CMD_SECCOMP_IBPB, false }, }; static void __init spec_v2_user_print_cond(const char *reason, bool secure) @@ -321,6 +325,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) { enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; bool smt_possible = IS_ENABLED(CONFIG_SMP); + enum spectre_v2_user_cmd cmd; if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP)) return; @@ -329,17 +334,20 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) cpu_smt_control == CPU_SMT_NOT_SUPPORTED) smt_possible = false; - switch (spectre_v2_parse_user_cmdline(v2_cmd)) { + cmd = spectre_v2_parse_user_cmdline(v2_cmd); + switch (cmd) { case SPECTRE_V2_USER_CMD_NONE: goto set_mode; case SPECTRE_V2_USER_CMD_FORCE: mode = SPECTRE_V2_USER_STRICT; break; case SPECTRE_V2_USER_CMD_PRCTL: + case SPECTRE_V2_USER_CMD_PRCTL_IBPB: mode = SPECTRE_V2_USER_PRCTL; break; case SPECTRE_V2_USER_CMD_AUTO: case SPECTRE_V2_USER_CMD_SECCOMP: + case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: if (IS_ENABLED(CONFIG_SECCOMP)) mode = SPECTRE_V2_USER_SECCOMP; else @@ -351,12 +359,15 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - switch (mode) { - case SPECTRE_V2_USER_STRICT: + switch (cmd) { + case SPECTRE_V2_USER_CMD_FORCE: + case SPECTRE_V2_USER_CMD_PRCTL_IBPB: + case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: static_branch_enable(&switch_mm_always_ibpb); break; - case SPECTRE_V2_USER_PRCTL: - case SPECTRE_V2_USER_SECCOMP: + case SPECTRE_V2_USER_CMD_PRCTL: + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_SECCOMP: static_branch_enable(&switch_mm_cond_ibpb); break; default: @@ -364,7 +375,8 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) } pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", - mode == SPECTRE_V2_USER_STRICT ? "always-on" : "conditional"); + static_key_enabled(&switch_mm_always_ibpb) ? + "always-on" : "conditional"); } /* If enhanced IBRS is enabled no STIPB required */ -- cgit v1.2.3-70-g09d2 From ce8c80c536dac9f325a051b30bf7730ee505eddc Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 19 Nov 2018 11:27:28 +0000 Subject: arm64: Add workaround for Cortex-A76 erratum 1286807 On the affected Cortex-A76 cores (r0p0 to r3p0), if a virtual address for a cacheable mapping of a location is being accessed by a core while another core is remapping the virtual address to a new physical page using the recommended break-before-make sequence, then under very rare circumstances TLBI+DSB completes before a read using the translation being invalidated has been observed by other observers. The workaround repeats the TLBI+DSB operation and is shared with the Qualcomm Falkor erratum 1009 Reviewed-by: Suzuki K Poulose Signed-off-by: Catalin Marinas --- Documentation/arm64/silicon-errata.txt | 1 + arch/arm64/Kconfig | 25 +++++++++++++++++++++++++ arch/arm64/include/asm/tlbflush.h | 4 ++-- arch/arm64/kernel/cpu_errata.c | 20 +++++++++++++++++--- 4 files changed, 45 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 76ccded8b74c..8f9577621144 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -57,6 +57,7 @@ stable kernels. | ARM | Cortex-A73 | #858921 | ARM64_ERRATUM_858921 | | ARM | Cortex-A55 | #1024718 | ARM64_ERRATUM_1024718 | | ARM | Cortex-A76 | #1188873 | ARM64_ERRATUM_1188873 | +| ARM | Cortex-A76 | #1286807 | ARM64_ERRATUM_1286807 | | ARM | MMU-500 | #841119,#826419 | N/A | | | | | | | Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 787d7850e064..ea2ab0330e3a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -497,6 +497,24 @@ config ARM64_ERRATUM_1188873 If unsure, say Y. +config ARM64_ERRATUM_1286807 + bool "Cortex-A76: Modification of the translation table for a virtual address might lead to read-after-read ordering violation" + default y + select ARM64_WORKAROUND_REPEAT_TLBI + help + This option adds workaround for ARM Cortex-A76 erratum 1286807 + + On the affected Cortex-A76 cores (r0p0 to r3p0), if a virtual + address for a cacheable mapping of a location is being + accessed by a core while another core is remapping the virtual + address to a new physical page using the recommended + break-before-make sequence, then under very rare circumstances + TLBI+DSB completes before a read using the translation being + invalidated has been observed by other observers. The + workaround repeats the TLBI+DSB operation. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y @@ -566,9 +584,16 @@ config QCOM_FALKOR_ERRATUM_1003 is unchanged. Work around the erratum by invalidating the walk cache entries for the trampoline before entering the kernel proper. +config ARM64_WORKAROUND_REPEAT_TLBI + bool + help + Enable the repeat TLBI workaround for Falkor erratum 1009 and + Cortex-A76 erratum 1286807. + config QCOM_FALKOR_ERRATUM_1009 bool "Falkor E1009: Prematurely complete a DSB after a TLBI" default y + select ARM64_WORKAROUND_REPEAT_TLBI help On Falkor v1, the CPU may prematurely complete a DSB following a TLBI xxIS invalidate maintenance operation. Repeat the TLBI operation diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index c3c0387aee18..5dfd23897dea 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -41,14 +41,14 @@ ALTERNATIVE("nop\n nop", \ "dsb ish\n tlbi " #op, \ ARM64_WORKAROUND_REPEAT_TLBI, \ - CONFIG_QCOM_FALKOR_ERRATUM_1009) \ + CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ : : ) #define __TLBI_1(op, arg) asm ("tlbi " #op ", %0\n" \ ALTERNATIVE("nop\n nop", \ "dsb ish\n tlbi " #op ", %0", \ ARM64_WORKAROUND_REPEAT_TLBI, \ - CONFIG_QCOM_FALKOR_ERRATUM_1009) \ + CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ : : "r" (arg)) #define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index a509e35132d2..6ad715d67df8 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -570,6 +570,20 @@ static const struct midr_range arm64_harden_el2_vectors[] = { #endif +#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI + +static const struct midr_range arm64_repeat_tlbi_cpus[] = { +#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009 + MIDR_RANGE(MIDR_QCOM_FALKOR_V1, 0, 0, 0, 0), +#endif +#ifdef CONFIG_ARM64_ERRATUM_1286807 + MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0), +#endif + {}, +}; + +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #if defined(CONFIG_ARM64_ERRATUM_826319) || \ defined(CONFIG_ARM64_ERRATUM_827319) || \ @@ -695,11 +709,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .matches = is_kryo_midr, }, #endif -#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009 +#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI { - .desc = "Qualcomm Technologies Falkor erratum 1009", + .desc = "Qualcomm erratum 1009, ARM erratum 1286807", .capability = ARM64_WORKAROUND_REPEAT_TLBI, - ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0), + ERRATA_MIDR_RANGE_LIST(arm64_repeat_tlbi_cpus), }, #endif #ifdef CONFIG_ARM64_ERRATUM_858921 -- cgit v1.2.3-70-g09d2 From 874bfc6e5422d2421f7e4d5ea318d30e91679dfe Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 29 Nov 2018 14:39:33 +0900 Subject: arm64: ftrace: Fix to enable syscall events on arm64 Since commit 4378a7d4be30 ("arm64: implement syscall wrappers") introduced "__arm64_" prefix to all syscall wrapper symbols in sys_call_table, syscall tracer can not find corresponding metadata from syscall name. In the result, we have no syscall ftrace events on arm64 kernel, and some bpf testcases are failed on arm64. To fix this issue, this introduces custom arch_syscall_match_sym_name() which skips first 8 bytes when comparing the syscall and symbol names. Fixes: 4378a7d4be30 ("arm64: implement syscall wrappers") Reported-by: Naresh Kamboju Signed-off-by: Masami Hiramatsu Acked-by: Will Deacon Tested-by: Naresh Kamboju Cc: stable@vger.kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/ftrace.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index caa955f10e19..fac54fb050d0 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -56,6 +56,19 @@ static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs) { return is_compat_task(); } + +#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME + +static inline bool arch_syscall_match_sym_name(const char *sym, + const char *name) +{ + /* + * Since all syscall functions have __arm64_ prefix, we must skip it. + * However, as we described above, we decided to ignore compat + * syscalls, so we don't care about __arm64_compat_ prefix here. + */ + return !strcmp(sym + 8, name); +} #endif /* ifndef __ASSEMBLY__ */ #endif /* __ASM_FTRACE_H */ -- cgit v1.2.3-70-g09d2 From a7b403104e17209ea71eea59d4a71bf9e0d8cb83 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 23 Nov 2018 17:24:51 +0100 Subject: xen/x86: add diagnostic printout to xen_mc_flush() in case of error Failure of an element of a Xen multicall is signalled via a WARN() only if the kernel is compiled with MC_DEBUG. It is impossible to know which element failed and why it did so. Change that by printing the related information even without MC_DEBUG, even if maybe in some limited form (e.g. without information which caller produced the failing element). Move the printing out of the switch statement in order to have the same information for a single call. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/multicalls.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 2bce7958ce8b..0766a08bdf45 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -69,6 +69,11 @@ void xen_mc_flush(void) trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx); +#if MC_DEBUG + memcpy(b->debug, b->entries, + b->mcidx * sizeof(struct multicall_entry)); +#endif + switch (b->mcidx) { case 0: /* no-op */ @@ -87,32 +92,34 @@ void xen_mc_flush(void) break; default: -#if MC_DEBUG - memcpy(b->debug, b->entries, - b->mcidx * sizeof(struct multicall_entry)); -#endif - if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0) BUG(); for (i = 0; i < b->mcidx; i++) if (b->entries[i].result < 0) ret++; + } + if (WARN_ON(ret)) { + pr_err("%d of %d multicall(s) failed: cpu %d\n", + ret, b->mcidx, smp_processor_id()); + for (i = 0; i < b->mcidx; i++) { + if (b->entries[i].result < 0) { #if MC_DEBUG - if (ret) { - printk(KERN_ERR "%d multicall(s) failed: cpu %d\n", - ret, smp_processor_id()); - dump_stack(); - for (i = 0; i < b->mcidx; i++) { - printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\t%pF\n", - i+1, b->mcidx, + pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pF\n", + i + 1, b->debug[i].op, b->debug[i].args[0], b->entries[i].result, b->caller[i]); +#else + pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\n", + i + 1, + b->entries[i].op, + b->entries[i].args[0], + b->entries[i].result); +#endif } } -#endif } b->mcidx = 0; @@ -126,8 +133,6 @@ void xen_mc_flush(void) b->cbidx = 0; local_irq_restore(flags); - - WARN_ON(ret); } struct multicall_space __xen_mc_entry(size_t args) -- cgit v1.2.3-70-g09d2 From 123664101aa2156d05251704fc63f9bcbf77741a Mon Sep 17 00:00:00 2001 From: Igor Druzhinin Date: Tue, 27 Nov 2018 20:58:21 +0000 Subject: Revert "xen/balloon: Mark unallocated host memory as UNUSABLE" This reverts commit b3cf8528bb21febb650a7ecbf080d0647be40b9f. That commit unintentionally broke Xen balloon memory hotplug with "hotplug_unpopulated" set to 1. As long as "System RAM" resource got assigned under a new "Unusable memory" resource in IO/Mem tree any attempt to online this memory would fail due to general kernel restrictions on having "System RAM" resources as 1st level only. The original issue that commit has tried to workaround fa564ad96366 ("x86/PCI: Enable a 64bit BAR on AMD Family 15h (Models 00-1f, 30-3f, 60-7f)") also got amended by the following 03a551734 ("x86/PCI: Move and shrink AMD 64-bit window to avoid conflict") which made the original fix to Xen ballooning unnecessary. Signed-off-by: Igor Druzhinin Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 78 ------------------------------------------------ arch/x86/xen/setup.c | 6 ++-- drivers/xen/balloon.c | 65 ++++++---------------------------------- include/xen/balloon.h | 5 ---- 4 files changed, 13 insertions(+), 141 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 67b2f31a1265..aa1cc483bd2a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include @@ -346,80 +345,3 @@ void xen_arch_unregister_cpu(int num) } EXPORT_SYMBOL(xen_arch_unregister_cpu); #endif - -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG -void __init arch_xen_balloon_init(struct resource *hostmem_resource) -{ - struct xen_memory_map memmap; - int rc; - unsigned int i, last_guest_ram; - phys_addr_t max_addr = PFN_PHYS(max_pfn); - struct e820_table *xen_e820_table; - const struct e820_entry *entry; - struct resource *res; - - if (!xen_initial_domain()) - return; - - xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL); - if (!xen_e820_table) - return; - - memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries); - set_xen_guest_handle(memmap.buffer, xen_e820_table->entries); - rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap); - if (rc) { - pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc); - goto out; - } - - last_guest_ram = 0; - for (i = 0; i < memmap.nr_entries; i++) { - if (xen_e820_table->entries[i].addr >= max_addr) - break; - if (xen_e820_table->entries[i].type == E820_TYPE_RAM) - last_guest_ram = i; - } - - entry = &xen_e820_table->entries[last_guest_ram]; - if (max_addr >= entry->addr + entry->size) - goto out; /* No unallocated host RAM. */ - - hostmem_resource->start = max_addr; - hostmem_resource->end = entry->addr + entry->size; - - /* - * Mark non-RAM regions between the end of dom0 RAM and end of host RAM - * as unavailable. The rest of that region can be used for hotplug-based - * ballooning. - */ - for (; i < memmap.nr_entries; i++) { - entry = &xen_e820_table->entries[i]; - - if (entry->type == E820_TYPE_RAM) - continue; - - if (entry->addr >= hostmem_resource->end) - break; - - res = kzalloc(sizeof(*res), GFP_KERNEL); - if (!res) - goto out; - - res->name = "Unavailable host RAM"; - res->start = entry->addr; - res->end = (entry->addr + entry->size < hostmem_resource->end) ? - entry->addr + entry->size : hostmem_resource->end; - rc = insert_resource(hostmem_resource, res); - if (rc) { - pr_warn("%s: Can't insert [%llx - %llx) (%d)\n", - __func__, res->start, res->end, rc); - kfree(res); - goto out; - } - } - - out: - kfree(xen_e820_table); -} -#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 1163e33121fb..075ed47993bb 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -808,6 +808,7 @@ char * __init xen_memory_setup(void) addr = xen_e820_table.entries[0].addr; size = xen_e820_table.entries[0].size; while (i < xen_e820_table.nr_entries) { + bool discard = false; chunk_size = size; type = xen_e820_table.entries[i].type; @@ -823,10 +824,11 @@ char * __init xen_memory_setup(void) xen_add_extra_mem(pfn_s, n_pfns); xen_max_p2m_pfn = pfn_s + n_pfns; } else - type = E820_TYPE_UNUSABLE; + discard = true; } - xen_align_and_add_e820_region(addr, chunk_size, type); + if (!discard) + xen_align_and_add_e820_region(addr, chunk_size, type); addr += chunk_size; size -= chunk_size; diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index e12bb256036f..7ab6caef599c 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -251,25 +251,10 @@ static void release_memory_resource(struct resource *resource) kfree(resource); } -/* - * Host memory not allocated to dom0. We can use this range for hotplug-based - * ballooning. - * - * It's a type-less resource. Setting IORESOURCE_MEM will make resource - * management algorithms (arch_remove_reservations()) look into guest e820, - * which we don't want. - */ -static struct resource hostmem_resource = { - .name = "Host RAM", -}; - -void __attribute__((weak)) __init arch_xen_balloon_init(struct resource *res) -{} - static struct resource *additional_memory_resource(phys_addr_t size) { - struct resource *res, *res_hostmem; - int ret = -ENOMEM; + struct resource *res; + int ret; res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) @@ -278,42 +263,13 @@ static struct resource *additional_memory_resource(phys_addr_t size) res->name = "System RAM"; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - res_hostmem = kzalloc(sizeof(*res), GFP_KERNEL); - if (res_hostmem) { - /* Try to grab a range from hostmem */ - res_hostmem->name = "Host memory"; - ret = allocate_resource(&hostmem_resource, res_hostmem, - size, 0, -1, - PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); - } - - if (!ret) { - /* - * Insert this resource into iomem. Because hostmem_resource - * tracks portion of guest e820 marked as UNUSABLE noone else - * should try to use it. - */ - res->start = res_hostmem->start; - res->end = res_hostmem->end; - ret = insert_resource(&iomem_resource, res); - if (ret < 0) { - pr_err("Can't insert iomem_resource [%llx - %llx]\n", - res->start, res->end); - release_memory_resource(res_hostmem); - res_hostmem = NULL; - res->start = res->end = 0; - } - } - - if (ret) { - ret = allocate_resource(&iomem_resource, res, - size, 0, -1, - PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); - if (ret < 0) { - pr_err("Cannot allocate new System RAM resource\n"); - kfree(res); - return NULL; - } + ret = allocate_resource(&iomem_resource, res, + size, 0, -1, + PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); + if (ret < 0) { + pr_err("Cannot allocate new System RAM resource\n"); + kfree(res); + return NULL; } #ifdef CONFIG_SPARSEMEM @@ -325,7 +281,6 @@ static struct resource *additional_memory_resource(phys_addr_t size) pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n", pfn, limit); release_memory_resource(res); - release_memory_resource(res_hostmem); return NULL; } } @@ -747,8 +702,6 @@ static int __init balloon_init(void) set_online_page_callback(&xen_online_page); register_memory_notifier(&xen_memory_nb); register_sysctl_table(xen_root); - - arch_xen_balloon_init(&hostmem_resource); #endif #ifdef CONFIG_XEN_PV diff --git a/include/xen/balloon.h b/include/xen/balloon.h index 61f410fd74e4..4914b93a23f2 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -44,8 +44,3 @@ static inline void xen_balloon_init(void) { } #endif - -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG -struct resource; -void arch_xen_balloon_init(struct resource *hostmem_resource); -#endif -- cgit v1.2.3-70-g09d2 From b84a64fad40637b1c9fa4f4dbf847a23e29e672b Mon Sep 17 00:00:00 2001 From: Eric Snowberg Date: Thu, 29 Nov 2018 18:12:20 +0100 Subject: x86/efi: Allocate e820 buffer before calling efi_exit_boot_service The following commit: d64934019f6c ("x86/efi: Use efi_exit_boot_services()") introduced a regression on systems with large memory maps causing them to hang on boot. The first "goto get_map" that was removed from exit_boot() ensured there was enough room for the memory map when efi_call_early(exit_boot_services) was called. This happens when (nr_desc > ARRAY_SIZE(params->e820_table). Chain of events: exit_boot() efi_exit_boot_services() efi_get_memory_map <- at this point the mm can't grow over 8 desc priv_func() exit_boot_func() allocate_e820ext() <- new mm grows over 8 desc from e820 alloc efi_call_early(exit_boot_services) <- mm key doesn't match so retry efi_call_early(get_memory_map) <- not enough room for new mm system hangs This patch allocates the e820 buffer before calling efi_exit_boot_services() and fixes the regression. [ mingo: minor cleanliness edits. ] Signed-off-by: Eric Snowberg Signed-off-by: Ard Biesheuvel Cc: Cc: Andy Lutomirski Cc: Arend van Spriel Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Hansen Cc: Hans de Goede Cc: Joe Perches Cc: Jon Hunter Cc: Julien Thierry Cc: Linus Torvalds Cc: Marc Zyngier Cc: Matt Fleming Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Sedat Dilek Cc: Thomas Gleixner Cc: YiFei Zhu Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181129171230.18699-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 65 +++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 8b4c5e001157..544ac4fafd11 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1,3 +1,4 @@ + /* ----------------------------------------------------------------------- * * Copyright 2011 Intel Corporation; author Matt Fleming @@ -634,37 +635,54 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext, return status; } +static efi_status_t allocate_e820(struct boot_params *params, + struct setup_data **e820ext, + u32 *e820ext_size) +{ + unsigned long map_size, desc_size, buff_size; + struct efi_boot_memmap boot_map; + efi_memory_desc_t *map; + efi_status_t status; + __u32 nr_desc; + + boot_map.map = ↦ + boot_map.map_size = &map_size; + boot_map.desc_size = &desc_size; + boot_map.desc_ver = NULL; + boot_map.key_ptr = NULL; + boot_map.buff_size = &buff_size; + + status = efi_get_memory_map(sys_table, &boot_map); + if (status != EFI_SUCCESS) + return status; + + nr_desc = buff_size / desc_size; + + if (nr_desc > ARRAY_SIZE(params->e820_table)) { + u32 nr_e820ext = nr_desc - ARRAY_SIZE(params->e820_table); + + status = alloc_e820ext(nr_e820ext, e820ext, e820ext_size); + if (status != EFI_SUCCESS) + return status; + } + + return EFI_SUCCESS; +} + struct exit_boot_struct { struct boot_params *boot_params; struct efi_info *efi; - struct setup_data *e820ext; - __u32 e820ext_size; }; static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, struct efi_boot_memmap *map, void *priv) { - static bool first = true; const char *signature; __u32 nr_desc; efi_status_t status; struct exit_boot_struct *p = priv; - if (first) { - nr_desc = *map->buff_size / *map->desc_size; - if (nr_desc > ARRAY_SIZE(p->boot_params->e820_table)) { - u32 nr_e820ext = nr_desc - - ARRAY_SIZE(p->boot_params->e820_table); - - status = alloc_e820ext(nr_e820ext, &p->e820ext, - &p->e820ext_size); - if (status != EFI_SUCCESS) - return status; - } - first = false; - } - signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE; memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32)); @@ -687,8 +705,8 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) { unsigned long map_sz, key, desc_size, buff_size; efi_memory_desc_t *mem_map; - struct setup_data *e820ext; - __u32 e820ext_size; + struct setup_data *e820ext = NULL; + __u32 e820ext_size = 0; efi_status_t status; __u32 desc_version; struct efi_boot_memmap map; @@ -702,8 +720,10 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) map.buff_size = &buff_size; priv.boot_params = boot_params; priv.efi = &boot_params->efi_info; - priv.e820ext = NULL; - priv.e820ext_size = 0; + + status = allocate_e820(boot_params, &e820ext, &e820ext_size); + if (status != EFI_SUCCESS) + return status; /* Might as well exit boot services now */ status = efi_exit_boot_services(sys_table, handle, &map, &priv, @@ -711,9 +731,6 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle) if (status != EFI_SUCCESS) return status; - e820ext = priv.e820ext; - e820ext_size = priv.e820ext_size; - /* Historic? */ boot_params->alt_mem_k = 32 * 1024; -- cgit v1.2.3-70-g09d2 From 79c2206d369b87b19ac29cb47601059b6bf5c291 Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Thu, 29 Nov 2018 18:12:30 +0100 Subject: x86/earlyprintk/efi: Fix infinite loop on some screen widths An affected screen resolution is 1366 x 768, which width is not divisible by 8, the default font width. On such screens, when longer lines are earlyprintk'ed, overflow-to-next-line can never trigger, due to the left-most x-coordinate of the next character always less than the screen width. Earlyprintk will infinite loop in trying to print the rest of the string but unable to, due to the line being full. This patch makes the trigger consider the right-most x-coordinate, instead of left-most, as the value to compare against the screen width threshold. Signed-off-by: YiFei Zhu Signed-off-by: Ard Biesheuvel Cc: Andy Lutomirski Cc: Arend van Spriel Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Snowberg Cc: Hans de Goede Cc: Joe Perches Cc: Jon Hunter Cc: Julien Thierry Cc: Linus Torvalds Cc: Marc Zyngier Cc: Matt Fleming Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Sai Praneeth Prakhya Cc: Sedat Dilek Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181129171230.18699-12-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/early_printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c index 7476b3b097e1..7138bc7a265c 100644 --- a/arch/x86/platform/efi/early_printk.c +++ b/arch/x86/platform/efi/early_printk.c @@ -183,7 +183,7 @@ early_efi_write(struct console *con, const char *str, unsigned int num) num--; } - if (efi_x >= si->lfb_width) { + if (efi_x + font->width > si->lfb_width) { efi_x = 0; efi_y += font->height; } -- cgit v1.2.3-70-g09d2 From b7cc40c32a8bfa6f2581a71747f6a7d491fe43ba Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Fri, 30 Nov 2018 15:51:56 +0300 Subject: ARC: change defconfig defaults to ARCv2 Change the default defconfig (used with 'make defconfig') to the ARCv2 nsim_hs_defconfig, and also switch the default Kconfig ISA selection to ARCv2. This allows several default defconfigs (e.g. make defconfig, make allnoconfig, make tinyconfig) to all work with ARCv2 by default. Note since we change default architecture from ARCompact to ARCv2 it's required to explicitly mention architecture type in ARCompact defconfigs otherwise ARCv2 will be implied and binaries will be generated for ARCv2. Cc: # 4.4.x Signed-off-by: Kevin Hilman Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 2 +- arch/arc/Makefile | 2 +- arch/arc/configs/axs101_defconfig | 1 + arch/arc/configs/nps_defconfig | 1 + arch/arc/configs/nsim_700_defconfig | 1 + arch/arc/configs/nsimosci_defconfig | 1 + arch/arc/configs/tb10x_defconfig | 1 + 7 files changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 5fcbda6b37cc..6dd783557330 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -109,7 +109,7 @@ endmenu choice prompt "ARC Instruction Set" - default ISA_ARCOMPACT + default ISA_ARCV2 config ISA_ARCOMPACT bool "ARCompact ISA" diff --git a/arch/arc/Makefile b/arch/arc/Makefile index c64c505d966c..df00578c279d 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -6,7 +6,7 @@ # published by the Free Software Foundation. # -KBUILD_DEFCONFIG := nsim_700_defconfig +KBUILD_DEFCONFIG := nsim_hs_defconfig cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index 8c23bd086cd0..020d4493edfd 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -14,6 +14,7 @@ CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig index ae7a0d8be98d..6e84060e7c90 100644 --- a/arch/arc/configs/nps_defconfig +++ b/arch/arc/configs/nps_defconfig @@ -15,6 +15,7 @@ CONFIG_SYSCTL_SYSCALL=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_KPROBES=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index 8e0b8b134cd9..219c2a65294b 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig @@ -15,6 +15,7 @@ CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_LBDAF is not set diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index ad77f20e5aa6..35dfc6491a09 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -15,6 +15,7 @@ CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_LBDAF is not set diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig index a7f65313f84a..e71ade3cf9c8 100644 --- a/arch/arc/configs/tb10x_defconfig +++ b/arch/arc/configs/tb10x_defconfig @@ -19,6 +19,7 @@ CONFIG_KALLSYMS_ALL=y # CONFIG_AIO is not set CONFIG_EMBEDDED=y # CONFIG_COMPAT_BRK is not set +CONFIG_ISA_ARCOMPACT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y -- cgit v1.2.3-70-g09d2 From 10d443431dc2bb733cf7add99b453e3fb9047a2e Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Fri, 30 Nov 2018 09:47:31 +0000 Subject: ARC: io.h: Implement reads{x}()/writes{x}() Some ARC CPU's do not support unaligned loads/stores. Currently, generic implementation of reads{b/w/l}()/writes{b/w/l}() is being used with ARC. This can lead to misfunction of some drivers as generic functions do a plain dereference of a pointer that can be unaligned. Let's use {get/put}_unaligned() helpers instead of plain dereference of pointer in order to fix. The helpers allow to get and store data from an unaligned address whilst preserving the CPU internal alignment. According to [1], the use of these helpers are costly in terms of performance so we added an initial check for a buffer already aligned so that the usage of the helpers can be avoided, when possible. [1] Documentation/unaligned-memory-access.txt Cc: Alexey Brodkin Cc: Joao Pinto Cc: David Laight Tested-by: Vitor Soares Signed-off-by: Jose Abreu Signed-off-by: Vineet Gupta --- arch/arc/include/asm/io.h | 72 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'arch') diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h index c22b181e8206..2f39d9b3886e 100644 --- a/arch/arc/include/asm/io.h +++ b/arch/arc/include/asm/io.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef CONFIG_ISA_ARCV2 #include @@ -94,6 +95,42 @@ static inline u32 __raw_readl(const volatile void __iomem *addr) return w; } +/* + * {read,write}s{b,w,l}() repeatedly access the same IO address in + * native endianness in 8-, 16-, 32-bit chunks {into,from} memory, + * @count times + */ +#define __raw_readsx(t,f) \ +static inline void __raw_reads##f(const volatile void __iomem *addr, \ + void *ptr, unsigned int count) \ +{ \ + bool is_aligned = ((unsigned long)ptr % ((t) / 8)) == 0; \ + u##t *buf = ptr; \ + \ + if (!count) \ + return; \ + \ + /* Some ARC CPU's don't support unaligned accesses */ \ + if (is_aligned) { \ + do { \ + u##t x = __raw_read##f(addr); \ + *buf++ = x; \ + } while (--count); \ + } else { \ + do { \ + u##t x = __raw_read##f(addr); \ + put_unaligned(x, buf++); \ + } while (--count); \ + } \ +} + +#define __raw_readsb __raw_readsb +__raw_readsx(8, b) +#define __raw_readsw __raw_readsw +__raw_readsx(16, w) +#define __raw_readsl __raw_readsl +__raw_readsx(32, l) + #define __raw_writeb __raw_writeb static inline void __raw_writeb(u8 b, volatile void __iomem *addr) { @@ -126,6 +163,35 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr) } +#define __raw_writesx(t,f) \ +static inline void __raw_writes##f(volatile void __iomem *addr, \ + const void *ptr, unsigned int count) \ +{ \ + bool is_aligned = ((unsigned long)ptr % ((t) / 8)) == 0; \ + const u##t *buf = ptr; \ + \ + if (!count) \ + return; \ + \ + /* Some ARC CPU's don't support unaligned accesses */ \ + if (is_aligned) { \ + do { \ + __raw_write##f(*buf++, addr); \ + } while (--count); \ + } else { \ + do { \ + __raw_write##f(get_unaligned(buf++), addr); \ + } while (--count); \ + } \ +} + +#define __raw_writesb __raw_writesb +__raw_writesx(8, b) +#define __raw_writesw __raw_writesw +__raw_writesx(16, w) +#define __raw_writesl __raw_writesl +__raw_writesx(32, l) + /* * MMIO can also get buffered/optimized in micro-arch, so barriers needed * Based on ARM model for the typical use case @@ -141,10 +207,16 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr) #define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; }) #define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; }) #define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(); __v; }) +#define readsb(p,d,l) ({ __raw_readsb(p,d,l); __iormb(); }) +#define readsw(p,d,l) ({ __raw_readsw(p,d,l); __iormb(); }) +#define readsl(p,d,l) ({ __raw_readsl(p,d,l); __iormb(); }) #define writeb(v,c) ({ __iowmb(); writeb_relaxed(v,c); }) #define writew(v,c) ({ __iowmb(); writew_relaxed(v,c); }) #define writel(v,c) ({ __iowmb(); writel_relaxed(v,c); }) +#define writesb(p,d,l) ({ __iowmb(); __raw_writesb(p,d,l); }) +#define writesw(p,d,l) ({ __iowmb(); __raw_writesw(p,d,l); }) +#define writesl(p,d,l) ({ __iowmb(); __raw_writesl(p,d,l); }) /* * Relaxed API for drivers which can handle barrier ordering themselves -- cgit v1.2.3-70-g09d2 From 1e8249b8a4e960018e4baca6b523b8a4500af600 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 29 Nov 2018 17:05:47 +0100 Subject: parisc: Enable -ffunction-sections for modules on 32-bit kernel Frank Schreiner reported, that since kernel 4.18 he faces sysfs-warnings when loading modules on a 32-bit kernel. Here is one such example: sysfs: cannot create duplicate filename '/module/nfs/sections/.text' CPU: 0 PID: 98 Comm: modprobe Not tainted 4.18.0-2-parisc #1 Debian 4.18.10-2 Backtrace: [<1017ce2c>] show_stack+0x3c/0x50 [<107a7210>] dump_stack+0x28/0x38 [<103f900c>] sysfs_warn_dup+0x88/0xac [<103f8b1c>] sysfs_add_file_mode_ns+0x164/0x1d0 [<103f9e70>] internal_create_group+0x11c/0x304 [<103fa0a0>] sysfs_create_group+0x48/0x60 [<1022abe8>] load_module.constprop.35+0x1f9c/0x23b8 [<1022b278>] sys_finit_module+0xd0/0x11c [<101831dc>] syscall_exit+0x0/0x14 This warning gets triggered by the fact, that due to commit 24b6c22504a2 ("parisc: Build kernel without -ffunction-sections") we now get multiple .text sections in the kernel modules for which sysfs_create_group() can't create multiple virtual files. This patch works around the problem by re-enabling the -ffunction-sections compiler option for modules, while keeping it disabled for the non-module kernel code. Reported-by: Frank Scheiner Fixes: 24b6c22504a2 ("parisc: Build kernel without -ffunction-sections") Cc: # v4.18+ Signed-off-by: Helge Deller --- arch/parisc/Makefile | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index d047a09d660f..1085385e1f06 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -71,6 +71,13 @@ ifdef CONFIG_MLONGCALLS KBUILD_CFLAGS_KERNEL += -mlong-calls endif +# Without this, "ld -r" results in .text sections that are too big (> 0x40000) +# for branches to reach stubs. And multiple .text sections trigger a warning +# when creating the sysfs module information section. +ifndef CONFIG_64BIT +KBUILD_CFLAGS_MODULE += -ffunction-sections +endif + # select which processor to optimise for cflags-$(CONFIG_PA7000) += -march=1.1 -mschedule=7100 cflags-$(CONFIG_PA7200) += -march=1.1 -mschedule=7200 -- cgit v1.2.3-70-g09d2 From 63e19c8216bb03a1b4d10f6637d1324ae7a2b612 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 20 Nov 2018 16:06:57 +0800 Subject: csky: bugfix tlb_get_pgd error. It's wrong to mask/unmask highest bit in addr to translate the vaddr to paddr. We should use PAGE_OFFSET and PHYS_OFFSET. Wrong implement: return ((get_pgd()|(1<<31)) - PHYS_OFFSET) & ~1; When PHYS_OFFSET=0xc0000000 and get_pgd() return 0xe0000000, it'll return 0x60000000. It's wrong and should be 0xa0000000. Now correct it to: return ((get_pgd() - PHYS_OFFSET) & ~1) + PAGE_OFFSET; Signed-off-by: Guo Ren --- arch/csky/include/asm/mmu_context.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/csky/include/asm/mmu_context.h b/arch/csky/include/asm/mmu_context.h index c410aa4fff1a..b2905c0485a7 100644 --- a/arch/csky/include/asm/mmu_context.h +++ b/arch/csky/include/asm/mmu_context.h @@ -16,7 +16,7 @@ static inline void tlbmiss_handler_setup_pgd(unsigned long pgd, bool kernel) { - pgd &= ~(1<<31); + pgd -= PAGE_OFFSET; pgd += PHYS_OFFSET; pgd |= 1; setup_pgd(pgd, kernel); @@ -29,7 +29,7 @@ static inline void tlbmiss_handler_setup_pgd(unsigned long pgd, bool kernel) static inline unsigned long tlb_get_pgd(void) { - return ((get_pgd()|(1<<31)) - PHYS_OFFSET) & ~1; + return ((get_pgd() - PHYS_OFFSET) & ~1) + PAGE_OFFSET; } #define cpu_context(cpu, mm) ((mm)->context.asid[cpu]) -- cgit v1.2.3-70-g09d2 From 182ddd16194cd082f25fa1b063dae3c7c5cce384 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 3 Dec 2018 11:38:11 +0100 Subject: x86/boot: Clear RSDP address in boot_params for broken loaders Gunnar Krueger reported a systemd-boot failure and bisected it down to: e6e094e053af75 ("x86/acpi, x86/boot: Take RSDP address from boot params if available") In case a broken boot loader doesn't clear its 'struct boot_params', clear rsdp_addr in sanitize_boot_params(). Reported-by: Gunnar Krueger Tested-by: Gunnar Krueger Signed-off-by: Juergen Gross Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bp@alien8.de Cc: sstabellini@kernel.org Fixes: e6e094e053af75 ("x86/acpi, x86/boot: Take RSDP address from boot params if available") Link: http://lkml.kernel.org/r/20181203103811.17056-1-jgross@suse.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/bootparam_utils.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index a07ffd23e4dd..f6f6ef436599 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -36,6 +36,7 @@ static void sanitize_boot_params(struct boot_params *boot_params) */ if (boot_params->sentinel) { /* fields in boot_params are left uninitialized, clear them */ + boot_params->acpi_rsdp_addr = 0; memset(&boot_params->ext_ramdisk_image, 0, (char *)&boot_params->efi_info - (char *)&boot_params->ext_ramdisk_image); -- cgit v1.2.3-70-g09d2 From 025dceb0fab31c912c41b8f32577432231d83e6b Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 3 Dec 2018 17:51:04 +0530 Subject: bpf: powerpc64: optimize JIT passes for bpf function calls Once the JITed images for each function in a multi-function program are generated after the first three JIT passes, we only need to fix the target address for the branch instruction corresponding to each bpf-to-bpf function call. This introduces the following optimizations for reducing the work done by the JIT compiler when handling multi-function programs: [1] Instead of doing two extra passes to fix the bpf function calls, do just one as that would be sufficient. [2] During the extra pass, only overwrite the instruction sequences for the bpf-to-bpf function calls as everything else would still remain exactly the same. This also reduces the number of writes to the JITed image. [3] Do not regenerate the prologue and the epilogue during the extra pass as that would be redundant. Signed-off-by: Sandipan Das Signed-off-by: Daniel Borkmann --- arch/powerpc/net/bpf_jit_comp64.c | 66 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 17482f5de3e2..9393e231cbc2 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -891,6 +891,55 @@ cond_branch: return 0; } +/* Fix the branch target addresses for subprog calls */ +static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, + struct codegen_context *ctx, u32 *addrs) +{ + const struct bpf_insn *insn = fp->insnsi; + bool func_addr_fixed; + u64 func_addr; + u32 tmp_idx; + int i, ret; + + for (i = 0; i < fp->len; i++) { + /* + * During the extra pass, only the branch target addresses for + * the subprog calls need to be fixed. All other instructions + * can left untouched. + * + * The JITed image length does not change because we already + * ensure that the JITed instruction sequence for these calls + * are of fixed length by padding them with NOPs. + */ + if (insn[i].code == (BPF_JMP | BPF_CALL) && + insn[i].src_reg == BPF_PSEUDO_CALL) { + ret = bpf_jit_get_func_addr(fp, &insn[i], true, + &func_addr, + &func_addr_fixed); + if (ret < 0) + return ret; + + /* + * Save ctx->idx as this would currently point to the + * end of the JITed image and set it to the offset of + * the instruction sequence corresponding to the + * subprog call temporarily. + */ + tmp_idx = ctx->idx; + ctx->idx = addrs[i] / 4; + bpf_jit_emit_func_call_rel(image, ctx, func_addr); + + /* + * Restore ctx->idx here. This is safe as the length + * of the JITed sequence remains unchanged. + */ + ctx->idx = tmp_idx; + } + } + + return 0; +} + struct powerpc64_jit_data { struct bpf_binary_header *header; u32 *addrs; @@ -989,6 +1038,22 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) skip_init_ctx: code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); + if (extra_pass) { + /* + * Do not touch the prologue and epilogue as they will remain + * unchanged. Only fix the branch target address for subprog + * calls in the body. + * + * This does not change the offsets and lengths of the subprog + * call instruction sequences and hence, the size of the JITed + * image as well. + */ + bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); + + /* There is no need to perform the usual passes. */ + goto skip_codegen_passes; + } + /* Code generation passes 1-2 */ for (pass = 1; pass < 3; pass++) { /* Now build the prologue, body code & epilogue for real. */ @@ -1002,6 +1067,7 @@ skip_init_ctx: proglen - (cgctx.idx * 4), cgctx.seen); } +skip_codegen_passes: if (bpf_jit_enable > 1) /* * Note that we output the base address of the code_base -- cgit v1.2.3-70-g09d2 From 43a1b0cb4cd6dbfd3cd9c10da663368394d299d8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 24 Aug 2018 02:16:12 +0900 Subject: kprobes/x86: Fix instruction patching corruption when copying more than one RIP-relative instruction After copy_optimized_instructions() copies several instructions to the working buffer it tries to fix up the real RIP address, but it adjusts the RIP-relative instruction with an incorrect RIP address for the 2nd and subsequent instructions due to a bug in the logic. This will break the kernel pretty badly (with likely outcomes such as a kernel freeze, a crash, or worse) because probed instructions can refer to the wrong data. For example putting kprobes on cpumask_next() typically hits this bug. cpumask_next() is normally like below if CONFIG_CPUMASK_OFFSTACK=y (in this case nr_cpumask_bits is an alias of nr_cpu_ids): : 48 89 f0 mov %rsi,%rax 8b 35 7b fb e2 00 mov 0xe2fb7b(%rip),%esi # ffffffff82db9e64 55 push %rbp ... If we put a kprobe on it and it gets jump-optimized, it gets patched by the kprobes code like this: : e9 95 7d 07 1e jmpq 0xffffffffa000207a 7b fb jnp 0xffffffff81f8a2e2 e2 00 loop 0xffffffff81f8a2e9 55 push %rbp This shows that the first two MOV instructions were copied to a trampoline buffer at 0xffffffffa000207a. Here is the disassembled result of the trampoline, skipping the optprobe template instructions: # Dump of assembly code from 0xffffffffa000207a to 0xffffffffa00020ea: 54 push %rsp ... 48 83 c4 08 add $0x8,%rsp 9d popfq 48 89 f0 mov %rsi,%rax 8b 35 82 7d db e2 mov -0x1d24827e(%rip),%esi # 0xffffffff82db9e67 This dump shows that the second MOV accesses *(nr_cpu_ids+3) instead of the original *nr_cpu_ids. This leads to a kernel freeze because cpumask_next() always returns 0 and for_each_cpu() never ends. Fix this by adding 'len' correctly to the real RIP address while copying. [ mingo: Improved the changelog. ] Reported-by: Michael Rodin Signed-off-by: Masami Hiramatsu Reviewed-by: Steven Rostedt (VMware) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Steven Rostedt Cc: Thomas Gleixner Cc: stable@vger.kernel.org # v4.15+ Fixes: 63fef14fc98a ("kprobes/x86: Make insn buffer always ROX and use text_poke()") Link: http://lkml.kernel.org/r/153504457253.22602.1314289671019919596.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/opt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 40b16b270656..6adf6e6c2933 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -189,7 +189,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) int len = 0, ret; while (len < RELATIVEJUMP_SIZE) { - ret = __copy_instruction(dest + len, src + len, real, &insn); + ret = __copy_instruction(dest + len, src + len, real + len, &insn); if (!ret || !can_boost(&insn, src + len)) return -EINVAL; len += ret; -- cgit v1.2.3-70-g09d2 From a1208f6a822ac29933e772ef1f637c5d67838da9 Mon Sep 17 00:00:00 2001 From: Chris Cole Date: Fri, 23 Nov 2018 12:20:45 +0100 Subject: ARM: 8814/1: mm: improve/fix ARM v7_dma_inv_range() unaligned address handling This patch addresses possible memory corruption when v7_dma_inv_range(start_address, end_address) address parameters are not aligned to whole cache lines. This function issues "invalidate" cache management operations to all cache lines from start_address (inclusive) to end_address (exclusive). When start_address and/or end_address are not aligned, the start and/or end cache lines are first issued "clean & invalidate" operation. The assumption is this is done to ensure that any dirty data addresses outside the address range (but part of the first or last cache lines) are cleaned/flushed so that data is not lost, which could happen if just an invalidate is issued. The problem is that these first/last partial cache lines are issued "clean & invalidate" and then "invalidate". This second "invalidate" is not required and worse can cause "lost" writes to addresses outside the address range but part of the cache line. If another component writes to its part of the cache line between the "clean & invalidate" and "invalidate" operations, the write can get lost. This fix is to remove the extra "invalidate" operation when unaligned addressed are used. A kernel module is available that has a stress test to reproduce the issue and a unit test of the updated v7_dma_inv_range(). It can be downloaded from http://ftp.sageembedded.com/outgoing/linux/cache-test-20181107.tgz. v7_dma_inv_range() is call by dmac_[un]map_area(addr, len, direction) when the direction is DMA_FROM_DEVICE. One can (I believe) successfully argue that DMA from a device to main memory should use buffers aligned to cache line size, because the "clean & invalidate" might overwrite data that the device just wrote using DMA. But if a driver does use unaligned buffers, at least this fix will prevent memory corruption outside the buffer. Signed-off-by: Chris Cole Signed-off-by: Russell King --- arch/arm/mm/cache-v7.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 215df435bfb9..2149b47a0c5a 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -360,14 +360,16 @@ v7_dma_inv_range: ALT_UP(W(nop)) #endif mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line + addne r0, r0, r2 tst r1, r3 bic r1, r1, r3 mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D / U line -1: - mcr p15, 0, r0, c7, c6, 1 @ invalidate D / U line - add r0, r0, r2 cmp r0, r1 +1: + mcrlo p15, 0, r0, c7, c6, 1 @ invalidate D / U line + addlo r0, r0, r2 + cmplo r0, r1 blo 1b dsb st ret lr -- cgit v1.2.3-70-g09d2 From 3d0358d0ba048c5afb1385787aaec8fa5ad78fcc Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Fri, 23 Nov 2018 12:25:21 +0100 Subject: ARM: 8815/1: V7M: align v7m_dma_inv_range() with v7 counterpart Chris has discovered and reported that v7_dma_inv_range() may corrupt memory if address range is not aligned to cache line size. Since the whole cache-v7m.S was lifted form cache-v7.S the same observation applies to v7m_dma_inv_range(). So the fix just mirrors what has been done for v7 with a little specific of M-class. Cc: Chris Cole Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/mm/cache-v7m.S | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S index 788486e830d3..32aa2a2aa260 100644 --- a/arch/arm/mm/cache-v7m.S +++ b/arch/arm/mm/cache-v7m.S @@ -73,9 +73,11 @@ /* * dcimvac: Invalidate data cache line by MVA to PoC */ -.macro dcimvac, rt, tmp - v7m_cacheop \rt, \tmp, V7M_SCB_DCIMVAC +.irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo +.macro dcimvac\c, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_DCIMVAC, \c .endm +.endr /* * dccmvau: Clean data cache line by MVA to PoU @@ -369,14 +371,16 @@ v7m_dma_inv_range: tst r0, r3 bic r0, r0, r3 dccimvacne r0, r3 + addne r0, r0, r2 subne r3, r2, #1 @ restore r3, corrupted by v7m's dccimvac tst r1, r3 bic r1, r1, r3 dccimvacne r1, r3 -1: - dcimvac r0, r3 - add r0, r0, r2 cmp r0, r1 +1: + dcimvaclo r0, r3 + addlo r0, r0, r2 + cmplo r0, r1 blo 1b dsb st ret lr -- cgit v1.2.3-70-g09d2 From c2a3831df6dc164af66d8d86cf356a90c021b86f Mon Sep 17 00:00:00 2001 From: Nathan Jones Date: Tue, 4 Dec 2018 10:05:32 +0100 Subject: ARM: 8816/1: dma-mapping: fix potential uninitialized return While trying to use the dma_mmap_*() interface, it was noticed that this interface returns strange values when passed an incorrect length. If neither of the if() statements fire then the return value is uninitialized. In the worst case it returns 0 which means the caller will think the function succeeded. Fixes: 1655cf8829d8 ("ARM: dma-mapping: Remove traces of NOMMU code") Signed-off-by: Nathan Jones Reviewed-by: Robin Murphy Acked-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 661fe48ab78d..78de138aa66d 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -829,7 +829,7 @@ static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { - int ret; + int ret = -ENXIO; unsigned long nr_vma_pages = vma_pages(vma); unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long pfn = dma_to_pfn(dev, dma_addr); -- cgit v1.2.3-70-g09d2 From 25896d073d8a0403b07e6dec56f58e6c33678207 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 5 Dec 2018 15:27:19 +0900 Subject: x86/build: Fix compiler support check for CONFIG_RETPOLINE It is troublesome to add a diagnostic like this to the Makefile parse stage because the top-level Makefile could be parsed with a stale include/config/auto.conf. Once you are hit by the error about non-retpoline compiler, the compilation still breaks even after disabling CONFIG_RETPOLINE. The easiest fix is to move this check to the "archprepare" like this commit did: 829fe4aa9ac1 ("x86: Allow generating user-space headers without a compiler") Reported-by: Meelis Roos Tested-by: Meelis Roos Signed-off-by: Masahiro Yamada Acked-by: Zhenzhong Duan Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Zhenzhong Duan Fixes: 4cd24de3a098 ("x86/retpoline: Make CONFIG_RETPOLINE depend on compiler support") Link: http://lkml.kernel.org/r/1543991239-18476-1-git-send-email-yamada.masahiro@socionext.com Link: https://lkml.org/lkml/2018/12/4/206 Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index f5d7f4134524..75ef499a66e2 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -220,9 +220,6 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # Avoid indirect branches in kernel to deal with Spectre ifdef CONFIG_RETPOLINE -ifeq ($(RETPOLINE_CFLAGS),) - $(error You are building kernel with non-retpoline compiler, please update your compiler.) -endif KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) endif @@ -307,6 +304,13 @@ ifndef CC_HAVE_ASM_GOTO @echo Compiler lacks asm-goto support. @exit 1 endif +ifdef CONFIG_RETPOLINE +ifeq ($(RETPOLINE_CFLAGS),) + @echo "You are building kernel with non-retpoline compiler." >&2 + @echo "Please update your compiler." >&2 + @false +endif +endif archclean: $(Q)rm -rf $(objtree)/arch/i386 -- cgit v1.2.3-70-g09d2 From a50480cb6d61d5c5fc13308479407b628b6bc1c5 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Thu, 6 Dec 2018 10:56:48 +0100 Subject: kprobes/x86: Blacklist non-attachable interrupt functions These interrupt functions are already non-attachable by kprobes. Blacklist them explicitly so that they can show up in /sys/kernel/debug/kprobes/blacklist and tools like BCC can use this additional information. Signed-off-by: Andrea Righi Cc: Andy Lutomirski Cc: Anil S Keshavamurthy Cc: Borislav Petkov Cc: David S. Miller Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yonghong Song Link: http://lkml.kernel.org/r/20181206095648.GA8249@Dell Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index ce25d84023c0..1f0efdb7b629 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -566,6 +566,7 @@ ENTRY(interrupt_entry) ret END(interrupt_entry) +_ASM_NOKPROBE(interrupt_entry) /* Interrupt entry/exit. */ @@ -766,6 +767,7 @@ native_irq_return_ldt: jmp native_irq_return_iret #endif END(common_interrupt) +_ASM_NOKPROBE(common_interrupt) /* * APIC interrupts. @@ -780,6 +782,7 @@ ENTRY(\sym) call \do_sym /* rdi points to pt_regs */ jmp ret_from_intr END(\sym) +_ASM_NOKPROBE(\sym) .endm /* Make sure APIC interrupt handlers end up in the irqentry section: */ @@ -960,6 +963,7 @@ ENTRY(\sym) jmp error_exit .endif +_ASM_NOKPROBE(\sym) END(\sym) .endm -- cgit v1.2.3-70-g09d2 From 3a4d0c2172bcf15b7a3d9d498b2b355f9864286b Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 6 Dec 2018 16:36:38 +0000 Subject: ARM: ensure that processor vtables is not lost after boot Marek Szyprowski reported problems with CPU hotplug in current kernels. This was tracked down to the processor vtables being located in an init section, and therefore discarded after kernel boot, despite being required after boot to properly initialise the non-boot CPUs. Arrange for these tables to end up in .rodata when required. Reported-by: Marek Szyprowski Tested-by: Krzysztof Kozlowski Fixes: 383fb3ee8024 ("ARM: spectre-v2: per-CPU vtables to work around big.Little systems") Signed-off-by: Russell King --- arch/arm/mm/proc-macros.S | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 81d0efb055c6..19516fbc2c55 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -274,6 +274,13 @@ .endm .macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0, bugs=0 +/* + * If we are building for big.Little with branch predictor hardening, + * we need the processor function tables to remain available after boot. + */ +#if 1 // defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) + .section ".rodata" +#endif .type \name\()_processor_functions, #object .align 2 ENTRY(\name\()_processor_functions) @@ -309,6 +316,9 @@ ENTRY(\name\()_processor_functions) .endif .size \name\()_processor_functions, . - \name\()_processor_functions +#if 1 // defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) + .previous +#endif .endm .macro define_cache_functions name:req -- cgit v1.2.3-70-g09d2 From b4aecf78083d8c6424657c1746c7c3de6e61669f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 7 Dec 2018 12:47:10 +0000 Subject: arm64: hibernate: Avoid sending cross-calling with interrupts disabled Since commit 3b8c9f1cdfc50 ("arm64: IPI each CPU after invalidating the I-cache for kernel mappings"), a call to flush_icache_range() will use an IPI to cross-call other online CPUs so that any stale instructions are flushed from their pipelines. This triggers a WARN during the hibernation resume path, where flush_icache_range() is called with interrupts disabled and is therefore prone to deadlock: | Disabling non-boot CPUs ... | CPU1: shutdown | psci: CPU1 killed. | CPU2: shutdown | psci: CPU2 killed. | CPU3: shutdown | psci: CPU3 killed. | WARNING: CPU: 0 PID: 1 at ../kernel/smp.c:416 smp_call_function_many+0xd4/0x350 | Modules linked in: | CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.20.0-rc4 #1 Since all secondary CPUs have been taken offline prior to invalidating the I-cache, there's actually no need for an IPI and we can simply call __flush_icache_range() instead. Cc: Fixes: 3b8c9f1cdfc50 ("arm64: IPI each CPU after invalidating the I-cache for kernel mappings") Reported-by: Kunihiko Hayashi Tested-by: Kunihiko Hayashi Tested-by: James Morse Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/hibernate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 6b2686d54411..29cdc99688f3 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -214,7 +214,7 @@ static int create_safe_exec_page(void *src_start, size_t length, } memcpy((void *)dst, src_start, length); - flush_icache_range(dst, dst + length); + __flush_icache_range(dst, dst + length); pgdp = pgd_offset_raw(allocator(mask), dst_addr); if (pgd_none(READ_ONCE(*pgdp))) { -- cgit v1.2.3-70-g09d2 From ac3e233d29f7f77f28243af0132057d378d3ea58 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 6 Dec 2018 11:12:31 -0800 Subject: x86/vdso: Drop implicit common-page-size linker flag GNU linker's -z common-page-size's default value is based on the target architecture. arch/x86/entry/vdso/Makefile sets it to the architecture default, which is implicit and redundant. Drop it. Fixes: 2aae950b21e4 ("x86_64: Add vDSO for x86-64 with gettimeofday/clock_gettime/getcpu") Reported-by: Dmitry Golovin Reported-by: Bill Wendling Suggested-by: Dmitry Golovin Suggested-by: Rui Ueyama Signed-off-by: Nick Desaulniers Signed-off-by: Borislav Petkov Acked-by: Andy Lutomirski Cc: Andi Kleen Cc: Fangrui Song Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20181206191231.192355-1-ndesaulniers@google.com Link: https://bugs.llvm.org/show_bug.cgi?id=38774 Link: https://github.com/ClangBuiltLinux/linux/issues/31 --- arch/x86/entry/vdso/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 141d415a8c80..0624bf2266fd 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -47,7 +47,7 @@ targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so) CPPFLAGS_vdso.lds += -P -C VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \ - -z max-page-size=4096 -z common-page-size=4096 + -z max-page-size=4096 $(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE $(call if_changed,vdso) @@ -98,7 +98,7 @@ CFLAGS_REMOVE_vvar.o = -pg CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds) VDSO_LDFLAGS_vdsox32.lds = -m elf32_x86_64 -soname linux-vdso.so.1 \ - -z max-page-size=4096 -z common-page-size=4096 + -z max-page-size=4096 # x32-rebranded versions vobjx32s-y := $(vobjs-y:.o=-x32.o) -- cgit v1.2.3-70-g09d2