From fbc81ec5b85d43a4b22e49ec0e643fa7dec2ea40 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel <ardb@kernel.org> Date: Sat, 3 Oct 2020 17:28:27 +0200 Subject: efi/arm: set HSCTLR Thumb2 bit correctly for HVC calls from HYP Commit db227c19e68db353 ("ARM: 8985/1: efi/decompressor: deal with HYP mode boot gracefully") updated the EFI entry code to permit firmware to invoke the EFI stub loader in HYP mode, with the MMU either enabled or disabled, neither of which is permitted by the EFI spec, but which does happen in the field. In the MMU on case, we remain in HYP mode as configured by the firmware, and rely on the fact that any HVC instruction issued in this mode will be dispatched via the SVC slot in the HYP vector table. However, this slot will point to a Thumb2 symbol if the kernel is built in Thumb2 mode, and so we have to configure HSCTLR to ensure that the exception handlers are invoked in Thumb2 mode as well. Fixes: db227c19e68db353 ("ARM: 8985/1: efi/decompressor: deal with HYP mode boot gracefully") Signed-off-by: Ard Biesheuvel <ardb@kernel.org> --- arch/arm/boot/compressed/head.S | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 2e04ec5b5446..caa27322a0ab 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -1472,6 +1472,9 @@ ENTRY(efi_enter_kernel) @ issued from HYP mode take us to the correct handler code. We @ will disable the MMU before jumping to the kernel proper. @ + ARM( bic r1, r1, #(1 << 30) ) @ clear HSCTLR.TE + THUMB( orr r1, r1, #(1 << 30) ) @ set HSCTLR.TE + mcr p15, 4, r1, c1, c0, 0 adr r0, __hyp_reentry_vectors mcr p15, 4, r0, c12, c0, 0 @ set HYP vector base (HVBAR) isb -- cgit v1.2.3-70-g09d2 From c20782ad4eb9dfa7f41cb2d85f218d0940f7cef1 Mon Sep 17 00:00:00 2001 From: Tony Lindgren <tony@atomide.com> Date: Mon, 26 Oct 2020 10:08:47 +0200 Subject: ARM: OMAP2+: Fix location for select PM_GENERIC_DOMAINS I accidentally misplaced select PM_GENERIC_DOMAINS, it should be selected for all the SoCs instead. Fixes: 58cbff023bfa ("soc: ti: omap-prm: Add basic power domain support") Signed-off-by: Tony Lindgren <tony@atomide.com> --- arch/arm/mach-omap2/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index 3ee7bdff86b2..68ba5adfaa23 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -7,7 +7,6 @@ config ARCH_OMAP2 depends on ARCH_MULTI_V6 select ARCH_OMAP2PLUS select CPU_V6 - select PM_GENERIC_DOMAINS if PM select SOC_HAS_OMAP2_SDRC config ARCH_OMAP3 @@ -106,6 +105,7 @@ config ARCH_OMAP2PLUS select OMAP_DM_TIMER select OMAP_GPMC select PINCTRL + select PM_GENERIC_DOMAINS if PM select RESET_CONTROLLER select SOC_BUS select TI_SYSC -- cgit v1.2.3-70-g09d2 From b69fd00120f8e3348273323099669cb058668263 Mon Sep 17 00:00:00 2001 From: Tony Lindgren <tony@atomide.com> Date: Mon, 26 Oct 2020 10:08:47 +0200 Subject: ARM: OMAP2+: Fix missing select PM_GENERIC_DOMAINS_OF We should select also PM_GENERIC_DOMAINS_OF in addition to PM_GENERIC_DOMAINS to have device tree based PM domain providers enabled. Fixes: 58cbff023bfa ("soc: ti: omap-prm: Add basic power domain support") Signed-off-by: Tony Lindgren <tony@atomide.com> --- arch/arm/mach-omap2/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index 68ba5adfaa23..3f62a0c9450d 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -106,6 +106,7 @@ config ARCH_OMAP2PLUS select OMAP_GPMC select PINCTRL select PM_GENERIC_DOMAINS if PM + select PM_GENERIC_DOMAINS_OF if PM select RESET_CONTROLLER select SOC_BUS select TI_SYSC -- cgit v1.2.3-70-g09d2 From a2089ac7f8dc682ef52ed74b52997d36cde76d05 Mon Sep 17 00:00:00 2001 From: Clément Péron <peron.clem@gmail.com> Date: Sun, 11 Oct 2020 23:15:14 +0200 Subject: arm64: dts: allwinner: pinetab: Drop unnecessary address/size-cells information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit make dtbs_check warm about unknown address/size-cells property in the pinetab device-tree. This is because these information are not necessary. Drop them. Signed-off-by: Clément Péron <peron.clem@gmail.com> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Link: https://lore.kernel.org/r/20201011211514.155266-1-peron.clem@gmail.com --- arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts index 3ab0f0347bc9..0494bfaf2ffa 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts @@ -122,9 +122,6 @@ status = "okay"; port { - #address-cells = <1>; - #size-cells = <0>; - csi_ep: endpoint { remote-endpoint = <&ov5640_ep>; bus-width = <8>; -- cgit v1.2.3-70-g09d2 From 97a38c1c213b162aa577299de698f39c18ba696b Mon Sep 17 00:00:00 2001 From: Clément Péron <peron.clem@gmail.com> Date: Sun, 18 Oct 2020 19:24:09 +0200 Subject: arm64: dts: allwinner: beelink-gs1: Enable both RGMII RX/TX delay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before the commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"), the software overwrite for RX/TX delays of the RTL8211e were not working properly and the Beelink GS1 had both RX/TX delay of RGMII interface set using pull-up on the TXDLY and RXDLY pins. Now that these delays are working properly they overwrite the HW config and set this to 'rgmii' meaning no delay on both RX/TX. This makes the ethernet of this board not working anymore. Set the phy-mode to 'rgmii-id' meaning RGMII with RX/TX delays in the device-tree to keep the correct configuration. Fixes: 089bee8dd119 ("arm64: dts: allwinner: h6: Introduce Beelink GS1 board") Signed-off-by: Clément Péron <peron.clem@gmail.com> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Acked-by: Chen-Yu Tsai <wens@csie.org> Link: https://lore.kernel.org/r/20201018172409.1754775-1-peron.clem@gmail.com --- arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts index 3f7ceeb1a767..7c9dbde645b5 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts @@ -97,7 +97,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&ext_rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_aldo2>; status = "okay"; -- cgit v1.2.3-70-g09d2 From 419c65f5000a6c25597ea52488528d75b287cbd0 Mon Sep 17 00:00:00 2001 From: Corentin Labbe <clabbe@baylibre.com> Date: Mon, 19 Oct 2020 06:34:49 +0000 Subject: arm64: dts: allwinner: Pine H64: Enable both RGMII RX/TX delay Since commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"), the network is unusable on PineH64 model A. This is due to phy-mode incorrectly set to rgmii instead of rgmii-id. Fixes: 729e1ffcf47e ("arm64: allwinner: h6: add support for the Ethernet on Pine H64") Signed-off-by: Corentin Labbe <clabbe@baylibre.com> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Link: https://lore.kernel.org/r/20201019063449.33316-1-clabbe@baylibre.com --- arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts index af85b2074867..961732c52aa0 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts @@ -100,7 +100,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&ext_rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_gmac_3v3>; allwinner,rx-delay-ps = <200>; -- cgit v1.2.3-70-g09d2 From d7cdff444579e6659459b2fe04340ebb27628d5e Mon Sep 17 00:00:00 2001 From: Jernej Skrabec <jernej.skrabec@siol.net> Date: Thu, 22 Oct 2020 20:58:39 +0200 Subject: arm64: dts: allwinner: a64: OrangePi Win: Fix ethernet node RX/TX delay on OrangePi Win board is set on PHY. Reflect that in ethernet node. Fixes: 93d6a27cfcc0 ("arm64: dts: allwinner: a64: Orange Pi Win: Add Ethernet node") Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Link: https://lore.kernel.org/r/20201022185839.2779245-1-jernej.skrabec@siol.net --- arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts index d894ec5fa8a1..70e31743f0ba 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts @@ -120,7 +120,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_gmac_3v3>; status = "okay"; -- cgit v1.2.3-70-g09d2 From 927f42fcc1b4f7d04a2ac5cf02f25612aa8923a4 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec <jernej.skrabec@siol.net> Date: Thu, 22 Oct 2020 23:13:01 +0200 Subject: arm64: dts: allwinner: a64: Pine64 Plus: Fix ethernet node According to board schematic, PHY provides both, RX and TX delays. However, according to "fix" Realtek provided for this board, only TX delay should be provided by PHY. Tests show that both variants work but TX only PHY delay works slightly better. Update ethernet node to reflect the fact that PHY provides TX delay. Fixes: 94f442886711 ("arm64: dts: allwinner: A64: Restore EMAC changes") Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Link: https://lore.kernel.org/r/20201022211301.3548422-1-jernej.skrabec@siol.net --- arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts index b26181cf9095..b54099b654c8 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts @@ -13,7 +13,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-txid"; phy-handle = <&ext_rgmii_phy>; status = "okay"; }; -- cgit v1.2.3-70-g09d2 From b34bf9f6a623ddb82600a5ed5c644224122395e1 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec <jernej.skrabec@siol.net> Date: Fri, 23 Oct 2020 20:48:58 +0200 Subject: arm64: dts: allwinner: h5: OrangePi PC2: Fix ethernet node RX and TX delay are provided by ethernet PHY. Reflect that in ethernet node. Fixes: 44a94c7ef989 ("arm64: dts: allwinner: H5: Restore EMAC changes") Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Link: https://lore.kernel.org/r/20201023184858.3272918-1-jernej.skrabec@siol.net --- arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts index 7d7aad18f078..8bf2db9dcbda 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts @@ -123,7 +123,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; -- cgit v1.2.3-70-g09d2 From b3eec3212e66ece33f69be0de98d54e67834e798 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec <jernej.skrabec@siol.net> Date: Sun, 25 Oct 2020 09:19:49 +0100 Subject: ARM: dts: sun8i: r40: bananapi-m2-ultra: Fix ethernet node Ethernet PHY on BananaPi M2 Ultra provides RX and TX delays. Fix ethernet node to reflect that fact. Fixes: c36fd5a48bd2 ("ARM: dts: sun8i: r40: bananapi-m2-ultra: Enable GMAC ethernet controller") Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Link: https://lore.kernel.org/r/20201025081949.783443-1-jernej.skrabec@siol.net --- arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts b/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts index 2fc62ef0cb3e..a6a1087a0c9b 100644 --- a/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts +++ b/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts @@ -129,7 +129,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_dc1sw>; status = "okay"; }; -- cgit v1.2.3-70-g09d2 From 8d80e2f00a42ef10b54e1b2d9e97314f8fd046c0 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai <wens@csie.org> Date: Sun, 25 Oct 2020 00:25:06 +0800 Subject: Revert "arm: sun8i: orangepi-pc-plus: Set EMAC activity LEDs to active high" This reverts commit 75ee680cbd2e4d0156b94f9fec50076361ab12f2. Turns out the activity and link LEDs on the RJ45 port are active low, just like on the Orange Pi PC. Revert the commit that says otherwise. Fixes: 75ee680cbd2e ("arm: sun8i: orangepi-pc-plus: Set EMAC activity LEDs to active high") Fixes: 4904337fe34f ("ARM: dts: sunxi: Restore EMAC changes (boards)") Signed-off-by: Chen-Yu Tsai <wens@csie.org> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Tested-by: Jernej Skrabec <jernej.skrabec@siol.net> Acked-by: Jernej Skrabec <jernej.skrabec@siol.net> Link: https://lore.kernel.org/r/20201024162515.30032-1-wens@kernel.org --- arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts index 71fb73208939..babf4cf1b2f6 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts @@ -53,11 +53,6 @@ }; }; -&emac { - /* LEDs changed to active high on the plus */ - /delete-property/ allwinner,leds-active-low; -}; - &mmc1 { vmmc-supply = <®_vcc3v3>; bus-width = <4>; -- cgit v1.2.3-70-g09d2 From e76724153f5b4539802cc21b2c6131058668a1c6 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai <wens@csie.org> Date: Sun, 25 Oct 2020 00:25:07 +0800 Subject: ARM: dts: sun6i: a31-hummingbird: Enable RGMII RX/TX delay on Ethernet PHY The Ethernet PHY on the A31 Hummingbird has the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: c220aec2bb79 ("ARM: dts: sun6i: Add Merrii A31 Hummingbird support") Signed-off-by: Chen-Yu Tsai <wens@csie.org> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Acked-by: Jernej Skrabec <jernej.skrabec@siol.net> Link: https://lore.kernel.org/r/20201024162515.30032-2-wens@kernel.org --- arch/arm/boot/dts/sun6i-a31-hummingbird.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts index 049e6ab3cf56..73de34ae37fd 100644 --- a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts +++ b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts @@ -154,7 +154,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; -- cgit v1.2.3-70-g09d2 From 353c3de1303fc93032164402c0eb8550ecd6f154 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai <wens@csie.org> Date: Sun, 25 Oct 2020 00:25:08 +0800 Subject: ARM: dts: sun7i: cubietruck: Enable RGMII RX/TX delay on Ethernet PHY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Ethernet PHY on the Cubietruck has the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: 67073d97672d ("ARM: dts: sun7i: cubietruck: Enable the GMAC") Signed-off-by: Chen-Yu Tsai <wens@csie.org> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Tested-by: Emilio López <emilio@elopez.com.ar> Reviewed-by: Emilio López <emilio@elopez.com.ar> Link: https://lore.kernel.org/r/20201024162515.30032-3-wens@kernel.org --- arch/arm/boot/dts/sun7i-a20-cubietruck.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun7i-a20-cubietruck.dts b/arch/arm/boot/dts/sun7i-a20-cubietruck.dts index 8c8dee6ea461..9109ca0919ad 100644 --- a/arch/arm/boot/dts/sun7i-a20-cubietruck.dts +++ b/arch/arm/boot/dts/sun7i-a20-cubietruck.dts @@ -151,7 +151,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; -- cgit v1.2.3-70-g09d2 From f94f78bd93f567c022f594589dbeecdf59931365 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai <wens@csie.org> Date: Sun, 25 Oct 2020 00:25:09 +0800 Subject: ARM: dts: sun7i: bananapi-m1-plus: Enable RGMII RX/TX delay on Ethernet PHY The Ethernet PHY on the Bananapi M1+ has the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: 04c85ecad32a ("ARM: dts: sun7i: Add dts file for Bananapi M1 Plus board") Signed-off-by: Chen-Yu Tsai <wens@csie.org> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Acked-by: Jernej Skrabec <jernej.skrabec@siol.net> Link: https://lore.kernel.org/r/20201024162515.30032-4-wens@kernel.org --- arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts b/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts index 32d5d45a35c0..8945dbb114a2 100644 --- a/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts +++ b/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts @@ -130,7 +130,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_gmac_3v3>; status = "okay"; }; -- cgit v1.2.3-70-g09d2 From e080ab31a0aa126b0a7e4f67f2b01b371b852c88 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai <wens@csie.org> Date: Sun, 25 Oct 2020 00:25:10 +0800 Subject: ARM: dts: sun8i: h3: orangepi-plus2e: Enable RGMII RX/TX delay on Ethernet PHY The Ethernet PHY on the Orange Pi Plus 2E has the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: 4904337fe34f ("ARM: dts: sunxi: Restore EMAC changes (boards)") Fixes: 7a78ef92cdc5 ("ARM: sun8i: h3: Enable EMAC with external PHY on Orange Pi Plus 2E") Signed-off-by: Chen-Yu Tsai <wens@csie.org> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Tested-by: Jernej Skrabec <jernej.skrabec@siol.net> Acked-by: Jernej Skrabec <jernej.skrabec@siol.net> Link: https://lore.kernel.org/r/20201024162515.30032-5-wens@kernel.org --- arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts index 6dbf7b2e0c13..b6ca45d18e51 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts @@ -67,7 +67,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; -- cgit v1.2.3-70-g09d2 From 57dbe558457bf4042169bc1f334e3b53a8480a1c Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai <wens@csie.org> Date: Sun, 25 Oct 2020 00:25:11 +0800 Subject: ARM: dts: sun8i: a83t: Enable both RGMII RX/TX delay on Ethernet PHY The Ethernet PHY on the Bananapi M3 and Cubietruck Plus have the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: 039359948a4b ("ARM: dts: sun8i: a83t: Enable Ethernet on two boards") Signed-off-by: Chen-Yu Tsai <wens@csie.org> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Acked-by: Jernej Skrabec <jernej.skrabec@siol.net> Link: https://lore.kernel.org/r/20201024162515.30032-6-wens@kernel.org --- arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts | 2 +- arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts index 9d34eabba121..431f70234d36 100644 --- a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts +++ b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts @@ -131,7 +131,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_sw>; phy-handle = <&rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; allwinner,rx-delay-ps = <700>; allwinner,tx-delay-ps = <700>; status = "okay"; diff --git a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts index d9be511f054f..d8326a5c681d 100644 --- a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts +++ b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts @@ -183,7 +183,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_dldo4>; phy-handle = <&rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; -- cgit v1.2.3-70-g09d2 From b1064037e8ecf09d587b7b4966eebe0c362908e5 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai <wens@csie.org> Date: Sun, 25 Oct 2020 00:25:12 +0800 Subject: ARM: dts: sun9i: Enable both RGMII RX/TX delay on Ethernet PHY The Ethernet PHY on the Cubieboard 4 and A80 Optimus have the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: 98048143b7f8 ("ARM: dts: sun9i: cubieboard4: Enable GMAC") Fixes: bc9bd03a44f9 ("ARM: dts: sun9i: a80-optimus: Enable GMAC") Signed-off-by: Chen-Yu Tsai <wens@csie.org> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Acked-by: Jernej Skrabec <jernej.skrabec@siol.net> Link: https://lore.kernel.org/r/20201024162515.30032-7-wens@kernel.org --- arch/arm/boot/dts/sun9i-a80-cubieboard4.dts | 2 +- arch/arm/boot/dts/sun9i-a80-optimus.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts b/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts index d3b337b043a1..484b93df20cb 100644 --- a/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts +++ b/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts @@ -129,7 +129,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_cldo1>; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun9i-a80-optimus.dts b/arch/arm/boot/dts/sun9i-a80-optimus.dts index bbc6335e5631..5c3580d712e4 100644 --- a/arch/arm/boot/dts/sun9i-a80-optimus.dts +++ b/arch/arm/boot/dts/sun9i-a80-optimus.dts @@ -124,7 +124,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_cldo1>; status = "okay"; }; -- cgit v1.2.3-70-g09d2 From 3914160ffc0bf762d6d605d4b27036b7b89367ea Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai <wens@csie.org> Date: Sun, 25 Oct 2020 00:25:13 +0800 Subject: ARM: dts: sunxi: bananapi-m2-plus: Enable RGMII RX/TX delay on Ethernet PHY The Ethernet PHY on the Bananapi M2+ has the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: 8c7ba536e709 ("ARM: sun8i: bananapi-m2-plus: Enable dwmac-sun8i") Fixes: 4904337fe34f ("ARM: dts: sunxi: Restore EMAC changes (boards)") Fixes: aa8fee415f46 ("ARM: dts: sun8i: h3: Split out non-SoC-specific parts of Bananapi M2 Plus") Signed-off-by: Chen-Yu Tsai <wens@csie.org> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Tested-by: Jernej Skrabec <jernej.skrabec@siol.net> Acked-by: Jernej Skrabec <jernej.skrabec@siol.net> Link: https://lore.kernel.org/r/20201024162515.30032-8-wens@kernel.org --- arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi b/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi index 39263e74fbb5..8e5cb3b3fd68 100644 --- a/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi +++ b/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi @@ -126,7 +126,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; -- cgit v1.2.3-70-g09d2 From 2bd8570d20c88909b8be3251727a26476b02652c Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai <wens@csie.org> Date: Sun, 25 Oct 2020 00:25:14 +0800 Subject: arm64: dts: allwinner: h5: libretech-all-h5-cc: Enable RGMII RX/TX delay on PHY The Ethernet PHY on the Libre Computer ALL-H5-CC has the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: 60d0426d7603 ("arm64: dts: allwinner: h5: Add Libre Computer ALL-H5-CC H5 board") Signed-off-by: Chen-Yu Tsai <wens@csie.org> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Acked-by: Jernej Skrabec <jernej.skrabec@siol.net> Link: https://lore.kernel.org/r/20201024162515.30032-9-wens@kernel.org --- arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts index df1b9263ad0e..6e30a564c87f 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts @@ -36,7 +36,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; /delete-property/ allwinner,leds-active-low; status = "okay"; }; -- cgit v1.2.3-70-g09d2 From 1a9a8910b2153cd3c4f3f2f8defcb853ead3b1fd Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai <wens@csie.org> Date: Sun, 25 Oct 2020 00:25:15 +0800 Subject: arm64: dts: allwinner: a64: bananapi-m64: Enable RGMII RX/TX delay on PHY The Ethernet PHY on the Bananapi M64 has the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: e7295499903d ("arm64: allwinner: bananapi-m64: Enable dwmac-sun8i") Fixes: 94f442886711 ("arm64: dts: allwinner: A64: Restore EMAC changes") Signed-off-by: Chen-Yu Tsai <wens@csie.org> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Tested-by: Corentin Labbe <clabbe.montjoie@gmail.com> Acked-by: Jernej Skrabec <jernej.skrabec@siol.net> Link: https://lore.kernel.org/r/20201024162515.30032-10-wens@kernel.org --- arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts index 3ea5182ca489..e5e840b9fbb4 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts @@ -105,7 +105,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_dc1sw>; status = "okay"; -- cgit v1.2.3-70-g09d2 From 294a3317bef52b189139c813b50dd14d344fa9ec Mon Sep 17 00:00:00 2001 From: Tony Lindgren <tony@atomide.com> Date: Wed, 28 Oct 2020 08:03:17 +0200 Subject: ARM: OMAP2+: Manage MPU state properly for omap_enter_idle_coupled() Based on more testing, commit 8ca5ee624b4c ("ARM: OMAP2+: Restore MPU power domain if cpu_cluster_pm_enter() fails") is a poor fix for handling cpu_cluster_pm_enter() returned errors. We should not override the cpuidle states with a hardcoded PWRDM_POWER_ON value. Instead, we should use a configured idle state that does not cause the context to be lost. Otherwise we end up configuring a potentially improper state for the MPUSS. We also want to update the returned state index for the selected state. Let's just select the highest power idle state C1 to ensure no context loss is allowed on cpu_cluster_pm_enter() errors. With these changes we can now unconditionally call omap4_enter_lowpower() for WFI like we did earlier before commit 55be2f50336f ("ARM: OMAP2+: Handle errors for cpu_pm"). And we can return the selected state index. Fixes: 8f04aea048d5 ("ARM: OMAP2+: Restore MPU power domain if cpu_cluster_pm_enter() fails") Fixes: 55be2f50336f ("ARM: OMAP2+: Handle errors for cpu_pm") Signed-off-by: Tony Lindgren <tony@atomide.com> --- arch/arm/mach-omap2/cpuidle44xx.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index a92d277f81a0..c8d317fafe2e 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -175,8 +175,11 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, if (mpuss_can_lose_context) { error = cpu_cluster_pm_enter(); if (error) { - omap_set_pwrdm_state(mpu_pd, PWRDM_POWER_ON); - goto cpu_cluster_pm_out; + index = 0; + cx = state_ptr + index; + pwrdm_set_logic_retst(mpu_pd, cx->mpu_logic_state); + omap_set_pwrdm_state(mpu_pd, cx->mpu_state); + mpuss_can_lose_context = 0; } } } @@ -184,7 +187,6 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, omap4_enter_lowpower(dev->cpu, cx->cpu_state); cpu_done[dev->cpu] = true; -cpu_cluster_pm_out: /* Wakeup CPU1 only if it is not offlined */ if (dev->cpu == 0 && cpumask_test_cpu(1, cpu_online_mask)) { -- cgit v1.2.3-70-g09d2 From 31b4d8e172f614adc53ddecb4b6b2f6411a49b84 Mon Sep 17 00:00:00 2001 From: Randy Dunlap <rdunlap@infradead.org> Date: Fri, 23 Oct 2020 12:44:40 -0700 Subject: MIPS: export has_transparent_hugepage() for modules MIPS should export its local version of "has_transparent_hugepage" so that loadable modules (dax) can use it. Fixes this build error: ERROR: modpost: "has_transparent_hugepage" [drivers/dax/dax.ko] undefined! Fixes: fd8cfd300019 ("arch: fix has_transparent_hugepage()") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Randy Dunlap <rdunlap@infradead.org> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: linux-mips@vger.kernel.org Cc: Dan Williams <dan.j.williams@intel.com> Cc: Vishal Verma <vishal.l.verma@intel.com> Cc: Dave Jiang <dave.jiang@intel.com> Cc: linux-nvdimm@lists.01.org Cc: Hugh Dickins <hughd@google.com> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de> --- arch/mips/mm/tlb-r4k.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 38e2894d5fa3..1b939abbe4ca 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -438,6 +438,7 @@ int has_transparent_hugepage(void) } return mask == PM_HUGE_MASK; } +EXPORT_SYMBOL(has_transparent_hugepage); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -- cgit v1.2.3-70-g09d2 From 107954afc5df667da438644aa4982606663f9b17 Mon Sep 17 00:00:00 2001 From: Nenad Peric <nperic@gmail.com> Date: Wed, 28 Oct 2020 12:58:17 +0100 Subject: arm64: dts: allwinner: h5: OrangePi Prime: Fix ethernet node RX and TX delay are provided by ethernet PHY. Reflect that in ethernet node. Fixes: 44a94c7ef989 ("arm64: dts: allwinner: H5: Restore EMAC changes") Signed-off-by: Nenad Peric <nperic@gmail.com> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Acked-by: Jernej Skrabec <jernej.skrabec@siol.net> Link: https://lore.kernel.org/r/20201028115817.68113-1-nperic@gmail.com --- arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts index cb44bfa5981f..33ab44072e6d 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts @@ -124,7 +124,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; -- cgit v1.2.3-70-g09d2 From 00203737867c8b63ca247e71ada1b32bb0b0dd3d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski <krzk@kernel.org> Date: Sun, 27 Sep 2020 18:59:42 +0200 Subject: arm64: dts: imx8mm-var-som: fix missing PMIC's interrupt line pull-up The PMIC's interrupt is level low and should be pulled up. The PMIC's device node had pinctrl-0 property but it lacked pinctrl-names which is required to apply the pin configuration. The actual problem in DTS was pointed out by Felix Radensky from Variscite. Reported-by: Felix Radensky <felix.r@variscite.com> Fixes: 5f67317bd967 ("arm64: dts: imx8mm: correct interrupt flags") Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org> Reviewed-by: Robin Gong <yibin.gong@nxp.com> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi index 4107fe914d08..49082529764f 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi @@ -135,13 +135,10 @@ pmic@4b { compatible = "rohm,bd71847"; reg = <0x4b>; + pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio2>; - /* - * The interrupt is not correct. It should be level low, - * however with internal pull up this causes IRQ storm. - */ - interrupts = <8 IRQ_TYPE_EDGE_RISING>; + interrupts = <8 IRQ_TYPE_LEVEL_LOW>; rohm,reset-snvs-powered; #clock-cells = <0>; @@ -398,7 +395,7 @@ pinctrl_pmic: pmicirqgrp { fsl,pins = < - MX8MM_IOMUXC_SD1_DATA6_GPIO2_IO8 0x41 + MX8MM_IOMUXC_SD1_DATA6_GPIO2_IO8 0x141 >; }; -- cgit v1.2.3-70-g09d2 From 0710e4385c9c978952333393396061ed1672d145 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski <krzk@kernel.org> Date: Sun, 27 Sep 2020 18:59:43 +0200 Subject: arm64: dts: imx8mm-beacon-som: fix missing PMIC's interrupt line pull-up The PMIC's interrupt is level low and should be pulled up. The PMIC's device node had pinctrl-0 property but it lacked pinctrl-names which is required to apply the pin configuration. Fixes: 5f67317bd967 ("arm64: dts: imx8mm: correct interrupt flags") Fixes: 593816fa2f35 ("arm64: dts: imx: Add Beacon i.MX8m-Mini development kit") Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org> Tested-by: Adam Ford <aford173@gmail.com> Reviewed-by: Robin Gong <yibin.gong@nxp.com> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi index 6de86a4f0ec4..55b36bddd513 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi @@ -72,6 +72,7 @@ pmic@4b { compatible = "rohm,bd71847"; reg = <0x4b>; + pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio1>; interrupts = <3 IRQ_TYPE_LEVEL_LOW>; -- cgit v1.2.3-70-g09d2 From ce6fc31f388d45b9f7135169f911cd27f4d21126 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski <krzk@kernel.org> Date: Sun, 27 Sep 2020 18:59:44 +0200 Subject: arm64: dts: imx8mm-evk: fix missing PMIC's interrupt line pull-up The PMIC's interrupt is level low and should be pulled up. The PMIC's device node had pinctrl-0 property but it lacked pinctrl-names which is required to apply the pin configuration. Fixes: 5f67317bd967 ("arm64: dts: imx8mm: correct interrupt flags") Fixes: aa71d0648318 ("arm64: dts: imx8mm: Split the imx8mm evk board dts to a common dtsi") Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org> Reviewed-by: Robin Gong <yibin.gong@nxp.com> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi index f305a530ff6f..521eb3a5a12e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi @@ -121,6 +121,7 @@ pmic@4b { compatible = "rohm,bd71847"; reg = <0x4b>; + pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio1>; interrupts = <3 IRQ_TYPE_LEVEL_LOW>; -- cgit v1.2.3-70-g09d2 From 34a1c5e39b670fd7a324b5620c9ad4ac80c2f018 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski <krzk@kernel.org> Date: Sun, 27 Sep 2020 18:59:45 +0200 Subject: arm64: dts: imx8mn-var-som: fix missing PMIC's interrupt line pull-up The PMIC's interrupt is level low and should be pulled up. The PMIC's device node had pinctrl-0 property but it lacked pinctrl-names which is required to apply the pin configuration. The actual problem in DTS was pointed out by Felix Radensky from Variscite. Reported-by: Felix Radensky <felix.r@variscite.com> Fixes: ade0176dd8a0 ("arm64: dts: imx8mn-var-som: Add Variscite VAR-SOM-MX8MN System on Module") Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org> Reviewed-by: Robin Gong <yibin.gong@nxp.com> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi index a2d0190921e4..7f356edf9f91 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi @@ -116,13 +116,10 @@ pmic@4b { compatible = "rohm,bd71847"; reg = <0x4b>; + pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio2>; - /* - * The interrupt is not correct. It should be level low, - * however with internal pull up this causes IRQ storm. - */ - interrupts = <8 IRQ_TYPE_EDGE_RISING>; + interrupts = <8 IRQ_TYPE_LEVEL_LOW>; rohm,reset-snvs-powered; regulators { @@ -388,7 +385,7 @@ pinctrl_pmic: pmicirqgrp { fsl,pins = < - MX8MN_IOMUXC_SD1_DATA6_GPIO2_IO8 0x101 + MX8MN_IOMUXC_SD1_DATA6_GPIO2_IO8 0x141 >; }; -- cgit v1.2.3-70-g09d2 From 4d20fa1dac2e3cf5aa0cd317b3436f4fda680b04 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski <krzk@kernel.org> Date: Sun, 27 Sep 2020 18:59:46 +0200 Subject: arm64: dts: imx8mn-ddr4-evk: fix missing PMIC's interrupt line pull-up The PMIC's interrupt is level low and should be pulled up. The PMIC's device node had pinctrl-0 property but it lacked pinctrl-names which is required to apply the pin configuration. Fixes: 4153f7811a9b ("arm64: dts: imx8mn: correct interrupt flags") Fixes: 3e44dd09736d ("arm64: dts: imx8mn-ddr4-evk: Add rohm,bd71847 PMIC support") Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org> Reviewed-by: Robin Gong <yibin.gong@nxp.com> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts index 46e76cf32b2f..7dfee715a2c4 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts @@ -53,6 +53,7 @@ pmic@4b { compatible = "rohm,bd71847"; reg = <0x4b>; + pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio1>; interrupts = <3 IRQ_TYPE_LEVEL_LOW>; -- cgit v1.2.3-70-g09d2 From 6efb099a1da4e954409e241b47257a637120e5c2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski <krzk@kernel.org> Date: Sun, 27 Sep 2020 18:59:47 +0200 Subject: arm64: dts: imx8mn-evk: fix missing PMIC's interrupt line pull-up The PMIC's interrupt is level low and should be pulled up. The PMIC's device node had pinctrl-0 property but it lacked pinctrl-names which is required to apply the pin configuration. Fixes: 4153f7811a9b ("arm64: dts: imx8mn: correct interrupt flags") Fixes: 6386156eb279 ("arm64: dts: imx8mn-evk: add pca9450 for i.mx8mn-evk board") Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org> Reviewed-by: Robin Gong <yibin.gong@nxp.com> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm64/boot/dts/freescale/imx8mn-evk.dts | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8mn-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-evk.dts index 707d8486b4d8..8311b95dee49 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-evk.dts @@ -18,6 +18,7 @@ pmic: pmic@25 { compatible = "nxp,pca9450b"; reg = <0x25>; + pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio1>; interrupts = <3 IRQ_TYPE_LEVEL_LOW>; -- cgit v1.2.3-70-g09d2 From d92454287ee25d78f1caac3734a1864f8a5a5275 Mon Sep 17 00:00:00 2001 From: Biwen Li <biwen.li@nxp.com> Date: Tue, 29 Sep 2020 09:30:21 +0800 Subject: arm64: dts: fsl: fix endianness issue of rcpm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add little-endian property to RCPM node (for ls1028a,ls1088a,ls208xa), otherwise RCPM driver will program hardware with incorrect setting, causing system (such as LS1028ARDB) failed to be waked by wakeup source. Fixes: 791c88ca5713 (“arm64: dts: ls1028a: Add ftm_alarm0 DT node”) Fixes: f4fe3a8665495 (“arm64: dts: layerscape: add ftm_alarm0 node”) Signed-off-by: Biwen Li <biwen.li@nxp.com> Signed-off-by: Ran Wang <ran.wang_1@nxp.com> Acked-by: Li Yang <leoyang.li@nxp.com> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi | 1 + arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi | 1 + arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi | 1 + 3 files changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index 73e4f9466887..7a6fb7e1fb82 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -1012,6 +1012,7 @@ compatible = "fsl,ls1028a-rcpm", "fsl,qoriq-rcpm-2.1+"; reg = <0x0 0x1e34040 0x0 0x1c>; #fsl,rcpm-wakeup-cells = <7>; + little-endian; }; ftm_alarm0: timer@2800000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi index ff5805206a28..692d8f4a206d 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi @@ -805,6 +805,7 @@ compatible = "fsl,ls1088a-rcpm", "fsl,qoriq-rcpm-2.1+"; reg = <0x0 0x1e34040 0x0 0x18>; #fsl,rcpm-wakeup-cells = <6>; + little-endian; }; ftm_alarm0: timer@2800000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi index bf72918fe545..e7abb74bd816 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi @@ -892,6 +892,7 @@ compatible = "fsl,ls208xa-rcpm", "fsl,qoriq-rcpm-2.1+"; reg = <0x0 0x1e34040 0x0 0x18>; #fsl,rcpm-wakeup-cells = <6>; + little-endian; }; ftm_alarm0: timer@2800000 { -- cgit v1.2.3-70-g09d2 From 054b5d97448714ae4a0bcd6f36b0515ac7aed21e Mon Sep 17 00:00:00 2001 From: Madalin Bucur <madalin.bucur@oss.nxp.com> Date: Mon, 5 Oct 2020 15:46:39 +0300 Subject: arm64: dts: fsl: DPAA FMan DMA operations are coherent Although the DPAA 1 FMan operations are coherent, the device tree node for the FMan does not indicate that, resulting in a needless loss of performance. Adding the missing dma-coherent property. Fixes: 1ffbecdd8321 ("arm64: dts: add DPAA FMan nodes") Signed-off-by: Madalin Bucur <madalin.bucur@oss.nxp.com> Tested-by: Camelia Groza <camelia.groza@oss.nxp.com> Acked-by: Li Yang <leoyang.li@nxp.com> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi b/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi index 8bc6caa9167d..4338db14c5da 100644 --- a/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi +++ b/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi @@ -19,6 +19,7 @@ fman0: fman@1a00000 { clock-names = "fmanclk"; fsl,qman-channel-range = <0x800 0x10>; ptimer-handle = <&ptp_timer0>; + dma-coherent; muram@0 { compatible = "fsl,fman-muram"; -- cgit v1.2.3-70-g09d2 From 587258edd94c305077923ec458e04c032fca83e6 Mon Sep 17 00:00:00 2001 From: Adam Ford <aford173@gmail.com> Date: Wed, 7 Oct 2020 08:02:37 -0500 Subject: arm64: dts: imx8mm-beacon-som: Fix Choppy BT audio When streaming bluetooth audio, the sound is choppy due to the fact that the default baud rate of the HCI interface is too slow to handle 16-bit stereo at 48KHz. The Bluetooth chip is capable of up to 4M baud on the serial port, so this patch sets the max-speed to 4000000 in order to properly stream audio over the Bluetooth. Fixes: 593816fa2f35 ("arm64: dts: imx: Add Beacon i.MX8m-Mini development kit") Signed-off-by: Adam Ford <aford173@gmail.com> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi index 55b36bddd513..b88c3c99b007 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi @@ -211,6 +211,7 @@ host-wakeup-gpios = <&gpio2 8 GPIO_ACTIVE_HIGH>; device-wakeup-gpios = <&gpio2 7 GPIO_ACTIVE_HIGH>; clocks = <&osc_32k>; + max-speed = <4000000>; clock-names = "extclk"; }; }; -- cgit v1.2.3-70-g09d2 From cf5abb0132193767c07c83e06f91b777d22ba495 Mon Sep 17 00:00:00 2001 From: Adam Ford <aford173@gmail.com> Date: Thu, 8 Oct 2020 13:33:00 -0500 Subject: arm64: dts imx8mn: Remove non-existent USB OTG2 According to the i.MX8MN TRM, there is only one OTG port. The address for OTG2 is reserved on Nano. This patch removes the non-existent OTG2, usbphynop2, and the usbmisc2 nodes. Fixes: 6c3debcbae47 ("arm64: dts: freescale: Add i.MX8MN dtsi support") Signed-off-by: Adam Ford <aford173@gmail.com> Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm64/boot/dts/freescale/imx8mn.dtsi | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index 746faf1cf2fb..16c7202885d7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -790,28 +790,6 @@ #index-cells = <1>; reg = <0x32e40200 0x200>; }; - - usbotg2: usb@32e50000 { - compatible = "fsl,imx8mn-usb", "fsl,imx7d-usb"; - reg = <0x32e50000 0x200>; - interrupts = <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk IMX8MN_CLK_USB1_CTRL_ROOT>; - clock-names = "usb1_ctrl_root_clk"; - assigned-clocks = <&clk IMX8MN_CLK_USB_BUS>, - <&clk IMX8MN_CLK_USB_CORE_REF>; - assigned-clock-parents = <&clk IMX8MN_SYS_PLL2_500M>, - <&clk IMX8MN_SYS_PLL1_100M>; - fsl,usbphy = <&usbphynop2>; - fsl,usbmisc = <&usbmisc2 0>; - status = "disabled"; - }; - - usbmisc2: usbmisc@32e50200 { - compatible = "fsl,imx8mn-usbmisc", "fsl,imx7d-usbmisc"; - #index-cells = <1>; - reg = <0x32e50200 0x200>; - }; - }; dma_apbh: dma-controller@33000000 { @@ -876,12 +854,4 @@ assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_100M>; clock-names = "main_clk"; }; - - usbphynop2: usbphynop2 { - compatible = "usb-nop-xceiv"; - clocks = <&clk IMX8MN_CLK_USB_PHY_REF>; - assigned-clocks = <&clk IMX8MN_CLK_USB_PHY_REF>; - assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_100M>; - clock-names = "main_clk"; - }; }; -- cgit v1.2.3-70-g09d2 From b0c0aa7aa4b919e02e0a24aa3a46dfbf2bbc34dc Mon Sep 17 00:00:00 2001 From: David Bauer <mail@david-bauer.net> Date: Mon, 26 Oct 2020 17:27:21 +0100 Subject: arm64: dts: rockchip: fix NanoPi R2S GMAC clock name This commit fixes the name for the GMAC clock to gmac_clkin, as this is the name of the clock provided by the rk3328-clk driver. Without this commit, the GMAC will not work in TX direction. Fixes: f1ec83f880db ("arm64: dts: rockchip: Add support for FriendlyARM NanoPi R2S") Suggested-by: Tobias Waldvogel <tobias.waldvogel@gmail.com> Signed-off-by: David Bauer <mail@david-bauer.net> Link: https://lore.kernel.org/r/20201026162721.70672-1-mail@david-bauer.net Signed-off-by: Heiko Stuebner <heiko@sntech.de> --- arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts b/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts index be7a31d81632..2ee07d15a6e3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts @@ -20,7 +20,7 @@ gmac_clk: gmac-clock { compatible = "fixed-clock"; clock-frequency = <125000000>; - clock-output-names = "gmac_clk"; + clock-output-names = "gmac_clkin"; #clock-cells = <0>; }; -- cgit v1.2.3-70-g09d2 From 01fe332800d0d2f94337b45c1973f4cf28ae6195 Mon Sep 17 00:00:00 2001 From: Maciej Matuszczyk <maccraft123mc@gmail.com> Date: Fri, 23 Oct 2020 20:16:29 +0200 Subject: arm64: dts: rockchip: Remove system-power-controller from pmic on Odroid Go Advance This fixes a poweroff issue when this is supposed to happen via PSCI. Signed-off-by: Maciej Matuszczyk <maccraft123mc@gmail.com> Link: https://lore.kernel.org/r/20201023181629.119727-1-maccraft123mc@gmail.com Signed-off-by: Heiko Stuebner <heiko@sntech.de> --- arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts index 35bd6b904b9c..337681038519 100644 --- a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts +++ b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts @@ -243,7 +243,6 @@ interrupts = <RK_PB2 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; pinctrl-0 = <&pmic_int>; - rockchip,system-power-controller; wakeup-source; #clock-cells = <1>; clock-output-names = "rk808-clkout1", "xin32k"; -- cgit v1.2.3-70-g09d2 From 7dd8f0ba88fce98e2953267a66af74c6f4792a56 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich <geomatsi@gmail.com> Date: Sat, 24 Oct 2020 23:11:20 +0300 Subject: arm: dts: imx6qdl-udoo: fix rgmii phy-mode for ksz9031 phy Commit bcf3440c6dd7 ("net: phy: micrel: add phy-mode support for the KSZ9031 PHY") fixed micrel phy driver adding proper support for phy modes. Adapt imx6q-udoo board phy settings : explicitly set required delay configuration using "rgmii-id". Fixes: cbd54fe0b2bc ("ARM: dts: imx6dl-udoo: Add board support based off imx6q-udoo") Signed-off-by: Sergey Matyukevich <geomatsi@gmail.com> Reviewed-by: Fabio Estevam <festevam@gmail.com> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm/boot/dts/imx6qdl-udoo.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi b/arch/arm/boot/dts/imx6qdl-udoo.dtsi index 828dd20cd27d..d07d8f83456d 100644 --- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi +++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi @@ -98,7 +98,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; -- cgit v1.2.3-70-g09d2 From f8b5a33707c9a19ec905d2826be0acd151997a09 Mon Sep 17 00:00:00 2001 From: Andrew Lunn <andrew@lunn.ch> Date: Fri, 30 Oct 2020 01:55:28 +0100 Subject: ARM: dts: vf610-zii-dev-rev-b: Fix MDIO over clocking The ZII devel B board has two generations of Marvell Switches. The mv88e6352 supports an MDIO clock of 12MHz. However the older 88e6185 does not like 12MHz, and often fails to probe. Reduce the clock speed to 5MHz, which seems to work reliably. Cc: Chris Healy <cphealy@gmail.com> Fixes: b955387667ec ("ARM: dts: ZII: update MDIO speed and preamble") Signed-off-by: Andrew Lunn <andrew@lunn.ch> Reviewed-by: Chris Healy <cphealy@gmail.com> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm/boot/dts/vf610-zii-dev-rev-b.dts | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts index e500911ce0a5..6f1e0f0d4f0a 100644 --- a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts +++ b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts @@ -406,6 +406,9 @@ }; }; +&mdio1 { + clock-frequency = <5000000>; +}; &iomuxc { pinctrl_gpio_e6185_eeprom_sel: pinctrl-gpio-e6185-eeprom-spi0 { -- cgit v1.2.3-70-g09d2 From e402599e5e5e0b2758d7766fd9f6d7953d4ccd85 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel <o.rempel@pengutronix.de> Date: Mon, 12 Oct 2020 09:18:16 +0200 Subject: ARM: dts: imx6q-prti6q: fix PHY address Due to bug in the bootloader, the PHY has floating address and may randomly change on each PHY reset. To avoid it, the updated bootloader with the following patch[0] should be used: | ARM: protonic: disable on-die termination to fix PHY bootstrapping | | If on-die termination is enabled, the RXC pin of iMX6 will be pulled | high. Since we already have an 10K pull-down on board, the RXC level on | PHY reset will be ~800mV, which is mostly interpreted as 1. On some | reboots we get 0 instead and kernel can't detect the PHY properly. | | Since the default 0x020e07ac value is 0, it is sufficient to remove this | entry from the affected imxcfg files. | | Since we get stable 0 on pin PHYADDR[2], the PHY address is changed from | 4 to 0. With latest bootloader update, the PHY address will be fixed to "0". [0] https://git.pengutronix.de/cgit/barebox/commit/?id=93f7dcf631edfcda19e7757b28d66017ea274b81 Fixes: 0d446a505592 ("ARM: dts: add Protonic PRTI6Q board") Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm/boot/dts/imx6q-prti6q.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx6q-prti6q.dts b/arch/arm/boot/dts/imx6q-prti6q.dts index d112b50f8c5d..b4605edfd2ab 100644 --- a/arch/arm/boot/dts/imx6q-prti6q.dts +++ b/arch/arm/boot/dts/imx6q-prti6q.dts @@ -213,8 +213,8 @@ #size-cells = <0>; /* Microchip KSZ9031RNX PHY */ - rgmii_phy: ethernet-phy@4 { - reg = <4>; + rgmii_phy: ethernet-phy@0 { + reg = <0>; interrupts-extended = <&gpio1 28 IRQ_TYPE_LEVEL_LOW>; reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; reset-assert-us = <10000>; -- cgit v1.2.3-70-g09d2 From 544cc3f8573bf9a82e8f348741f2f68d2a8376fb Mon Sep 17 00:00:00 2001 From: Jernej Skrabec <jernej.skrabec@siol.net> Date: Sun, 1 Nov 2020 08:26:09 +0100 Subject: arm64: dts: allwinner: h6: orangepi-one-plus: Fix ethernet RX/TX delay on OrangePi One Plus board is set on PHY. Reflect that in ethernet node. Fixes: 7ee32a17e0d6 ("arm64: dts: allwinner: h6: orangepi-one-plus: Enable ethernet") Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Tested-by: Marcus Cooper <codekipper@gmail.com> Link: https://lore.kernel.org/r/20201101072609.1681891-1-jernej.skrabec@siol.net --- arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts index fceb298bfd53..29a081e72a9b 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts @@ -27,7 +27,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&ext_rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_gmac_3v3>; allwinner,rx-delay-ps = <200>; -- cgit v1.2.3-70-g09d2 From ad2091f893bd5dfe2824f0d6819600d120698e9f Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski <contact@paulk.fr> Date: Sat, 31 Oct 2020 19:21:29 +0100 Subject: ARM: sunxi: Add machine match for the Allwinner V3 SoC The Allwinner V3 SoC shares the same base as the V3s but comes with extra pins and features available. As a result, it has its dedicated compatible string (already used in device trees), which is added here. Signed-off-by: Paul Kocialkowski <contact@paulk.fr> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Link: https://lore.kernel.org/r/20201031182137.1879521-2-contact@paulk.fr --- arch/arm/mach-sunxi/sunxi.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c index 06da2747a90b..19635721013d 100644 --- a/arch/arm/mach-sunxi/sunxi.c +++ b/arch/arm/mach-sunxi/sunxi.c @@ -66,6 +66,7 @@ static const char * const sun8i_board_dt_compat[] = { "allwinner,sun8i-h2-plus", "allwinner,sun8i-h3", "allwinner,sun8i-r40", + "allwinner,sun8i-v3", "allwinner,sun8i-v3s", NULL, }; -- cgit v1.2.3-70-g09d2 From 6ab48105aae79b9d8062e9bc922baaeff80918d7 Mon Sep 17 00:00:00 2001 From: Matteo Scordino <matteo.scordino@gmail.com> Date: Fri, 30 Oct 2020 23:43:25 +0000 Subject: ARM: dts: s3: pinecube: align compatible property to other S3 boards The compatible string in the Pine64 Pinecube dts diverges from the ones used in other S3 based boards, like the LicheePi and the Elimo Impetus and Initium. Discussion on LKML decided the PineCube should align to the others. Signed-off-by: Matteo Scordino <matteo.scordino@gmail.com> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Link: https://lore.kernel.org/r/20201030234325.5865-7-matteo.scordino@gmail.com --- arch/arm/boot/dts/sun8i-s3-pinecube.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun8i-s3-pinecube.dts b/arch/arm/boot/dts/sun8i-s3-pinecube.dts index 9bab6b7f4014..4aa0ee897a0a 100644 --- a/arch/arm/boot/dts/sun8i-s3-pinecube.dts +++ b/arch/arm/boot/dts/sun8i-s3-pinecube.dts @@ -10,7 +10,7 @@ / { model = "PineCube IP Camera"; - compatible = "pine64,pinecube", "allwinner,sun8i-s3"; + compatible = "pine64,pinecube", "sochip,s3", "allwinner,sun8i-v3"; aliases { serial0 = &uart2; -- cgit v1.2.3-70-g09d2 From 8c9cb4094ccf242eddd140efba13872c55f68a87 Mon Sep 17 00:00:00 2001 From: Pablo Greco <pgreco@centosproject.org> Date: Mon, 2 Nov 2020 11:16:40 -0300 Subject: ARM: dts: sun7i: bananapi: Enable RGMII RX/TX delay on Ethernet PHY The Ethernet PHY on the Bananapi M1 has the RX and TX delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins. Fix the phy-mode description to correct reflect this so that the implementation doesn't reconfigure the delays incorrectly. This happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Fixes: 8a5b272fbf44 ("ARM: dts: sun7i: Add Banana Pi board") Signed-off-by: Pablo Greco <pgreco@centosproject.org> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Link: https://lore.kernel.org/r/1604326600-39544-1-git-send-email-pgreco@centosproject.org --- arch/arm/boot/dts/sun7i-a20-bananapi.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun7i-a20-bananapi.dts b/arch/arm/boot/dts/sun7i-a20-bananapi.dts index bb3987e101c2..0b3d9ae75650 100644 --- a/arch/arm/boot/dts/sun7i-a20-bananapi.dts +++ b/arch/arm/boot/dts/sun7i-a20-bananapi.dts @@ -132,7 +132,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_gmac_3v3>; status = "okay"; }; -- cgit v1.2.3-70-g09d2 From bd5cdcdc66e1f7179ff6d172d1e5f55e43403aa8 Mon Sep 17 00:00:00 2001 From: Pablo Greco <pgreco@centosproject.org> Date: Mon, 2 Nov 2020 11:19:14 -0300 Subject: ARM: dts: sun8i: r40: bananapi-m2-berry: Fix dcdc1 regulator DCDC1 regulator powers many different subsystems. While some of them can work at 3.0 V, some of them can not. For example, VCC-HDMI can only work between 3.24 V and 3.36 V. According to OS images provided by the board manufacturer this regulator should be set to 3.3 V. Set DCDC1 and DCDC1SW to 3.3 V in order to fix this. Fixes: 23edc168bd98 ("ARM: dts: sun8i: Add board dts file for Banana Pi M2 Berry") Fixes: 27e81e1970a8 ("ARM: dts: sun8i: v40: bananapi-m2-berry: Enable GMAC ethernet controller") Signed-off-by: Pablo Greco <pgreco@centosproject.org> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Link: https://lore.kernel.org/r/1604326755-39742-1-git-send-email-pgreco@centosproject.org --- arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts b/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts index 15c22b06fc4b..84eb08295718 100644 --- a/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts +++ b/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts @@ -198,16 +198,16 @@ }; ®_dc1sw { - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3000000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-name = "vcc-gmac-phy"; }; ®_dcdc1 { regulator-always-on; - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3000000>; - regulator-name = "vcc-3v0"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-name = "vcc-3v3"; }; ®_dcdc2 { -- cgit v1.2.3-70-g09d2 From 8a82d91fa275aaea49be06d7f5b1407ce1c0dd4b Mon Sep 17 00:00:00 2001 From: Pablo Greco <pgreco@centosproject.org> Date: Mon, 2 Nov 2020 11:19:29 -0300 Subject: ARM: dts: sun8i: v40: bananapi-m2-berry: Fix ethernet node Ethernet PHY on BananaPi M2 Berry provides RX and TX delays. Fix ethernet node to reflect that fact. Fixes: 27e81e1970a8 ("ARM: dts: sun8i: v40: bananapi-m2-berry: Enable GMAC ethernet controller") Signed-off-by: Pablo Greco <pgreco@centosproject.org> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Link: https://lore.kernel.org/r/1604326769-39802-1-git-send-email-pgreco@centosproject.org --- arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts b/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts index 84eb08295718..47954551f573 100644 --- a/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts +++ b/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts @@ -120,7 +120,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_dc1sw>; status = "okay"; }; -- cgit v1.2.3-70-g09d2 From f126b6702e7354d6247a36f20b9172457af5c15a Mon Sep 17 00:00:00 2001 From: Dinh Nguyen <dinguyen@kernel.org> Date: Sun, 1 Nov 2020 14:02:56 -0600 Subject: arm64: dts: agilex/stratix10: Fix qspi node compatible The QSPI flash node needs to have the required "jedec,spi-nor" in the compatible string. Fixes: 0cb140d07fc7 ("arm64: dts: stratix10: Add QSPI support for Stratix10") Cc: stable@vger.kernel.org Suggested-by: Vignesh Raghavendra <vigneshr@ti.com> Signed-off-by: Dinh Nguyen <dinguyen@kernel.org> --- arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts | 2 +- arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts | 2 +- arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts index feadd21bc0dc..46e558ab7729 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts @@ -159,7 +159,7 @@ flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00a"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts index c07966740e14..f9b4a39683cf 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts @@ -192,7 +192,7 @@ flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00a"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts b/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts index 96c50d48289d..a7a83f29f00b 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts +++ b/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts @@ -110,7 +110,7 @@ flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "mt25qu02g"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; -- cgit v1.2.3-70-g09d2 From 0011c6d182774fc781fb9e115ebe8baa356029ae Mon Sep 17 00:00:00 2001 From: Markus Reichl <m.reichl@fivetechno.de> Date: Wed, 4 Nov 2020 17:23:55 +0100 Subject: arm64: dts: rockchip: Assign a fixed index to mmc devices on rk3399 boards. Recently introduced async probe on mmc devices can shuffle block IDs. Pin them to fixed values to ease booting in environments where UUIDs are not practical. Use newly introduced aliases for mmcblk devices from [1]. [1] https://patchwork.kernel.org/patch/11747669/ Signed-off-by: Markus Reichl <m.reichl@fivetechno.de> Reviewed-by: Douglas Anderson <dianders@chromium.org> Link: https://lore.kernel.org/r/20201104162356.1251-1-m.reichl@fivetechno.de Signed-off-by: Heiko Stuebner <heiko@sntech.de> --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index ada724b12f01..7a9a7aca86c6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -29,6 +29,9 @@ i2c6 = &i2c6; i2c7 = &i2c7; i2c8 = &i2c8; + mmc0 = &sdio0; + mmc1 = &sdmmc; + mmc2 = &sdhci; serial0 = &uart0; serial1 = &uart1; serial2 = &uart2; -- cgit v1.2.3-70-g09d2 From 7327c8b98e2e14c47021eea14d1ab268086a6408 Mon Sep 17 00:00:00 2001 From: Markus Reichl <m.reichl@fivetechno.de> Date: Wed, 4 Nov 2020 20:29:31 +0100 Subject: arm64: dts: rockchip: Reorder LED triggers from mmc devices on rk3399-roc-pc. After patch [1] SD-card becomes mmc1 and eMMC becomes mmc2. Correct trigger of LEDs accordingly. [1] https://patchwork.kernel.org/patch/11881427 Signed-off-by: Markus Reichl <m.reichl@fivetechno.de> Link: https://lore.kernel.org/r/20201104192933.1001-1-m.reichl@fivetechno.de Signed-off-by: Heiko Stuebner <heiko@sntech.de> --- arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi index e7a459fa4322..20309076dbac 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi @@ -74,14 +74,14 @@ label = "red:diy"; gpios = <&gpio0 RK_PB5 GPIO_ACTIVE_HIGH>; default-state = "off"; - linux,default-trigger = "mmc1"; + linux,default-trigger = "mmc2"; }; yellow_led: led-2 { label = "yellow:yellow-led"; gpios = <&gpio0 RK_PA2 GPIO_ACTIVE_HIGH>; default-state = "off"; - linux,default-trigger = "mmc0"; + linux,default-trigger = "mmc1"; }; }; -- cgit v1.2.3-70-g09d2 From 01776f070ffcbf336be3bf1672bd3c589548d6c4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Sat, 7 Nov 2020 09:07:40 +0000 Subject: powerpc/32s: Use relocation offset when setting early hash table When calling early_hash_table(), the kernel hasn't been yet relocated to its linking address, so data must be addressed with relocation offset. Add relocation offset to write into Hash in early_hash_table(). Fixes: 69a1593abdbc ("powerpc/32s: Setup the early hash table at all time.") Reported-by: Erhard Furtner <erhard_f@mailbox.org> Reported-by: Andreas Schwab <schwab@linux-m68k.org> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Tested-by: Serge Belyshev <belyshev@depni.sinp.msu.ru> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/9e225a856a8b22e0e77587ee22ab7a2f5bca8753.1604740029.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 2aa16d5368e1..a0dda2a1f2df 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -156,6 +156,7 @@ __after_mmu_off: bl initial_bats bl load_segment_registers BEGIN_MMU_FTR_SECTION + bl reloc_offset bl early_hash_table END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) #if defined(CONFIG_BOOTX_TEXT) @@ -920,7 +921,7 @@ early_hash_table: ori r6, r6, 3 /* 256kB table */ mtspr SPRN_SDR1, r6 lis r6, early_hash@h - lis r3, Hash@ha + addis r3, r3, Hash@ha stw r6, Hash@l(r3) blr -- cgit v1.2.3-70-g09d2 From ce9dfafe29bed86fe3cda330ac6072ce84e1ff81 Mon Sep 17 00:00:00 2001 From: Heiko Carstens <hca@linux.ibm.com> Date: Tue, 3 Nov 2020 16:55:43 +0100 Subject: s390: fix system call exit path The system call exit path is running with interrupts enabled while checking for TIF/PIF/CIF bits which require special handling. If all bits have been checked interrupts are disabled and the kernel exits to user space. The problem is that after checking all bits and before interrupts are disabled bits can be set already again, due to interrupt handling. This means that the kernel can exit to user space with some TIF/PIF/CIF bits set, which should never happen. E.g. TIF_NEED_RESCHED might be set, which might lead to additional latencies, since that bit will only be recognized with next exit to user space. Fix this by checking the corresponding bits only when interrupts are disabled. Fixes: 0b0ed657fe00 ("s390: remove critical section cleanup from entry.S") Cc: <stable@vger.kernel.org> # 5.8 Acked-by: Sven Schnelle <svens@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com> --- arch/s390/kernel/entry.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 86235919c2d1..5346545b9860 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -422,6 +422,7 @@ ENTRY(system_call) #endif LOCKDEP_SYS_EXIT .Lsysc_tif: + DISABLE_INTS TSTMSK __PT_FLAGS(%r11),_PIF_WORK jnz .Lsysc_work TSTMSK __TI_flags(%r12),_TIF_WORK @@ -444,6 +445,7 @@ ENTRY(system_call) # One of the work bits is on. Find out which one. # .Lsysc_work: + ENABLE_INTS TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED jo .Lsysc_reschedule TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART -- cgit v1.2.3-70-g09d2 From 7de8bfaa095fcbc2db2952d4b561be102a41c2a6 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> Date: Wed, 4 Nov 2020 10:55:08 +0000 Subject: arm64: dts: renesas: r8a774e1: Add missing audio_clk_b Add audio_clk_b configured as 0 Hz, this will be overridden by the boards providing the audio clock. Fixes: 8183a7938cfec ("arm64: dts: renesas: r8a774e1: Add audio support") Reported-by: Nobuhiro Iwamatsu <nobuhiro1.iwamatsu@toshiba.co.jp> Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> Link: https://lore.kernel.org/r/20201104105508.21197-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be> --- arch/arm64/boot/dts/renesas/r8a774e1.dtsi | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi index 9cbf963aa068..c29643442e91 100644 --- a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi @@ -28,6 +28,12 @@ clock-frequency = <0>; }; + audio_clk_b: audio_clk_b { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <0>; + }; + audio_clk_c: audio_clk_c { compatible = "fixed-clock"; #clock-cells = <0>; -- cgit v1.2.3-70-g09d2 From 52d9edbe6efc5042cf57fae6a25d07572ddf398b Mon Sep 17 00:00:00 2001 From: Marek Vasut <marex@denx.de> Date: Thu, 8 Oct 2020 21:35:38 +0200 Subject: ARM: dts: stm32: Fix TA3-GPIO-C key on STM32MP1 DHCOM PDK2 On the prototype DHCOM, the TA3-GPIO-C button was connected to pin PI11 of the STM32MP15xx, however on the production SoM this was changed to pin PG0 to free up the IRQ line 11 for LAN8710i PHY IRQ. Update the connection in the DT. Since the IRQ line 0 is used for PMIC as well and cannot be shared with the button, make the button polled. Fixes: 87cabf9405cb ("ARM: dts: stm32: Add GPIO keys for STM32MP1 DHCOM PDK2") Signed-off-by: Marek Vasut <marex@denx.de> Cc: Alexandre Torgue <alexandre.torgue@st.com> Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com> Cc: Patrice Chotard <patrice.chotard@st.com> Cc: Patrick Delaunay <patrick.delaunay@st.com> Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue <alexandre.torgue@st.com> --- arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi index 5dff24e39af8..9a0a59678097 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi @@ -46,6 +46,16 @@ linux,code = <KEY_A>; gpios = <&gpiof 3 GPIO_ACTIVE_LOW>; }; + + /* + * The EXTi IRQ line 0 is shared with PMIC, + * so mark this as polled GPIO key. + */ + button-2 { + label = "TA3-GPIO-C"; + linux,code = <KEY_C>; + gpios = <&gpiog 0 GPIO_ACTIVE_LOW>; + }; }; gpio-keys { @@ -59,13 +69,6 @@ wakeup-source; }; - button-2 { - label = "TA3-GPIO-C"; - linux,code = <KEY_C>; - gpios = <&gpioi 11 GPIO_ACTIVE_LOW>; - wakeup-source; - }; - button-3 { label = "TA4-GPIO-D"; linux,code = <KEY_D>; -- cgit v1.2.3-70-g09d2 From 7e5f3155dcbb4d724386b30cc232002d9b9d81f5 Mon Sep 17 00:00:00 2001 From: Marek Vasut <marex@denx.de> Date: Wed, 28 Oct 2020 21:46:17 +0100 Subject: ARM: dts: stm32: Fix LED5 on STM32MP1 DHCOM PDK2 On the prototype DHCOM, the LED5 was connected to pin PG2 of the STM32MP15xx, however on the production SoM this was changed to pin PC6. Update the connection in the DT. Fixes: 81d5fc719798 ("ARM: dts: stm32: Add GPIO LEDs for STM32MP1 DHCOM PDK2") Signed-off-by: Marek Vasut <marex@denx.de> Cc: Alexandre Torgue <alexandre.torgue@st.com> Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com> Cc: Patrice Chotard <patrice.chotard@st.com> Cc: Patrick Delaunay <patrick.delaunay@st.com> Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue <alexandre.torgue@st.com> --- arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi index 9a0a59678097..8456f172d4b1 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi @@ -82,7 +82,7 @@ led-0 { label = "green:led5"; - gpios = <&gpiog 2 GPIO_ACTIVE_HIGH>; + gpios = <&gpioc 6 GPIO_ACTIVE_HIGH>; default-state = "off"; }; -- cgit v1.2.3-70-g09d2 From 1f3d7fc279b1a299bb8b1b225d80309a2062ab8a Mon Sep 17 00:00:00 2001 From: Marek Vasut <marex@denx.de> Date: Thu, 29 Oct 2020 20:46:17 +0100 Subject: ARM: dts: stm32: Define VIO regulator supply on DHCOM The VIO regulator is supplied by PMIC Buck3, describe this in the DT. Fixes: 34e0c7847dcf ("ARM: dts: stm32: Add DH Electronics DHCOM STM32MP1 SoM and PDK2 board") Signed-off-by: Marek Vasut <marex@denx.de> Cc: Alexandre Torgue <alexandre.torgue@st.com> Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com> Cc: Patrice Chotard <patrice.chotard@st.com> Cc: Patrick Delaunay <patrick.delaunay@st.com> Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue <alexandre.torgue@st.com> --- arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi index b4b52cf634af..c89ebfaa115d 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi @@ -68,6 +68,7 @@ gpio = <&gpiog 3 GPIO_ACTIVE_LOW>; regulator-always-on; regulator-boot-on; + vin-supply = <&vdd>; }; }; -- cgit v1.2.3-70-g09d2 From e5ace7f62695656ef8a66ad5a4c3edd055894876 Mon Sep 17 00:00:00 2001 From: Marek Vasut <marex@denx.de> Date: Thu, 24 Sep 2020 01:25:35 +0200 Subject: ARM: dts: stm32: Enable thermal sensor support on stm32mp15xx-dhcor Enable STM32 Digital Thermal Sensor driver for stm32mp15xx-dhcor SoMs. Fixes: 94cafe1b6482 ("ARM: dts: stm32: Add Avenger96 devicetree support based on STM32MP157A") Signed-off-by: Marek Vasut <marex@denx.de> Cc: Alexandre Torgue <alexandre.torgue@st.com> Cc: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> Signed-off-by: Alexandre Torgue <alexandre.torgue@st.com> --- arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi index 04fbb324a541..803eb8bc9c85 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi @@ -21,6 +21,10 @@ }; }; +&dts { + status = "okay"; +}; + &i2c4 { pinctrl-names = "default"; pinctrl-0 = <&i2c4_pins_a>; -- cgit v1.2.3-70-g09d2 From f4c7fa39415da6db1fa0bc26162ac23a0fbae8bb Mon Sep 17 00:00:00 2001 From: Marek Vasut <marex@denx.de> Date: Thu, 29 Oct 2020 20:46:52 +0100 Subject: ARM: dts: stm32: Keep VDDA LDO1 always on on DHCOM The VDDA LDO1 PMIC output supplies the analog VDDA input of the STM32MP1 on DHCOM, keep it always on, otherwise there could be leakage through the SoC. Fixes: 34e0c7847dcf ("ARM: dts: stm32: Add DH Electronics DHCOM STM32MP1 SoM and PDK2 board") Signed-off-by: Marek Vasut <marex@denx.de> Cc: Alexandre Torgue <alexandre.torgue@st.com> Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com> Cc: Patrice Chotard <patrice.chotard@st.com> Cc: Patrick Delaunay <patrick.delaunay@st.com> Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-arm-kernel@lists.infradead.org Signed-off-by: Alexandre Torgue <alexandre.torgue@st.com> --- arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi index c89ebfaa115d..f796a6150313 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi @@ -203,6 +203,7 @@ vdda: ldo1 { regulator-name = "vdda"; + regulator-always-on; regulator-min-microvolt = <2900000>; regulator-max-microvolt = <2900000>; interrupts = <IT_CURLIM_LDO1 0>; -- cgit v1.2.3-70-g09d2 From 65cae18882f943215d0505ddc7e70495877308e6 Mon Sep 17 00:00:00 2001 From: Brian Masney <bmasney@redhat.com> Date: Fri, 6 Nov 2020 20:11:19 -0500 Subject: x86/xen: don't unbind uninitialized lock_kicker_irq When booting a hyperthreaded system with the kernel parameter 'mitigations=auto,nosmt', the following warning occurs: WARNING: CPU: 0 PID: 1 at drivers/xen/events/events_base.c:1112 unbind_from_irqhandler+0x4e/0x60 ... Hardware name: Xen HVM domU, BIOS 4.2.amazon 08/24/2006 ... Call Trace: xen_uninit_lock_cpu+0x28/0x62 xen_hvm_cpu_die+0x21/0x30 takedown_cpu+0x9c/0xe0 ? trace_suspend_resume+0x60/0x60 cpuhp_invoke_callback+0x9a/0x530 _cpu_up+0x11a/0x130 cpu_up+0x7e/0xc0 bringup_nonboot_cpus+0x48/0x50 smp_init+0x26/0x79 kernel_init_freeable+0xea/0x229 ? rest_init+0xaa/0xaa kernel_init+0xa/0x106 ret_from_fork+0x35/0x40 The secondary CPUs are not activated with the nosmt mitigations and only the primary thread on each CPU core is used. In this situation, xen_hvm_smp_prepare_cpus(), and more importantly xen_init_lock_cpu(), is not called, so the lock_kicker_irq is not initialized for the secondary CPUs. Let's fix this by exiting early in xen_uninit_lock_cpu() if the irq is not set to avoid the warning from above for each secondary CPU. Signed-off-by: Brian Masney <bmasney@redhat.com> Link: https://lore.kernel.org/r/20201107011119.631442-1-bmasney@redhat.com Reviewed-by: Juergen Gross <jgross@suse.com> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> --- arch/x86/xen/spinlock.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 799f4eba0a62..043c73dfd2c9 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -93,10 +93,20 @@ void xen_init_lock_cpu(int cpu) void xen_uninit_lock_cpu(int cpu) { + int irq; + if (!xen_pvspin) return; - unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); + /* + * When booting the kernel with 'mitigations=auto,nosmt', the secondary + * CPUs are not activated, and lock_kicker_irq is not initialized. + */ + irq = per_cpu(lock_kicker_irq, cpu); + if (irq == -1) + return; + + unbind_from_irqhandler(irq, NULL); per_cpu(lock_kicker_irq, cpu) = -1; kfree(per_cpu(irq_name, cpu)); per_cpu(irq_name, cpu) = NULL; -- cgit v1.2.3-70-g09d2 From d19d2152ca055baf20339cfacbf039c2cfb8d936 Mon Sep 17 00:00:00 2001 From: Lucas Stach <l.stach@pengutronix.de> Date: Thu, 5 Nov 2020 18:06:12 +0100 Subject: arm64: dts: imx8mm: fix voltage for 1.6GHz CPU operating point The datasheet for both the industrial and consumer variant of the SoC lists a typical voltage of 0.95V for the 1.6GHz CPU operating point. Fixes: e85c9d0faa75 (arm64: dts: imx8mm: Add cpufreq properties) Signed-off-by: Lucas Stach <l.stach@pengutronix.de> Reviewed-by: Fabio Estevam <festevam@gmail.com> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm64/boot/dts/freescale/imx8mm.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index b83f400def8b..05ee062548e4 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -129,7 +129,7 @@ opp-1600000000 { opp-hz = /bits/ 64 <1600000000>; - opp-microvolt = <900000>; + opp-microvolt = <950000>; opp-supported-hw = <0xc>, <0x7>; clock-latency-ns = <150000>; opp-suspend; -- cgit v1.2.3-70-g09d2 From 33d0d843872c5ddbe28457a92fc6f2487315fb9f Mon Sep 17 00:00:00 2001 From: Fabio Estevam <festevam@gmail.com> Date: Thu, 5 Nov 2020 18:13:20 -0300 Subject: ARM: dts: imx50-evk: Fix the chip select 1 IOMUX The SPI chip selects are represented as: cs-gpios = <&gpio4 11 GPIO_ACTIVE_LOW>, <&gpio4 13 GPIO_ACTIVE_LOW>; , which means that they are used in GPIO function instead of native SPI mode. Fix the IOMUX for the chip select 1 to use GPIO4_13 instead of the native CSPI_SSI function. Fixes: c605cbf5e135 ("ARM: dts: imx: add device tree support for Freescale imx50evk board") Signed-off-by: Fabio Estevam <festevam@gmail.com> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm/boot/dts/imx50-evk.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx50-evk.dts b/arch/arm/boot/dts/imx50-evk.dts index 878e89c20190..4ea5c23f181b 100644 --- a/arch/arm/boot/dts/imx50-evk.dts +++ b/arch/arm/boot/dts/imx50-evk.dts @@ -59,7 +59,7 @@ MX50_PAD_CSPI_MISO__CSPI_MISO 0x00 MX50_PAD_CSPI_MOSI__CSPI_MOSI 0x00 MX50_PAD_CSPI_SS0__GPIO4_11 0xc4 - MX50_PAD_ECSPI1_MOSI__CSPI_SS1 0xf4 + MX50_PAD_ECSPI1_MOSI__GPIO4_13 0x84 >; }; -- cgit v1.2.3-70-g09d2 From c2fe61d8be491ff8188edaf22e838f819999146b Mon Sep 17 00:00:00 2001 From: Arvind Sankar <nivedita@alum.mit.edu> Date: Tue, 10 Nov 2020 11:39:19 -0500 Subject: efi/x86: Free efi_pgd with free_pages() Commit d9e9a6418065 ("x86/mm/pti: Allocate a separate user PGD") changed the PGD allocation to allocate PGD_ALLOCATION_ORDER pages, so in the error path it should be freed using free_pages() rather than free_page(). Commit 06ace26f4e6f ("x86/efi: Free efi_pgd with free_pages()") fixed one instance of this, but missed another. Move the freeing out-of-line to avoid code duplication and fix this bug. Fixes: d9e9a6418065 ("x86/mm/pti: Allocate a separate user PGD") Link: https://lore.kernel.org/r/20201110163919.1134431-1-nivedita@alum.mit.edu Signed-off-by: Arvind Sankar <nivedita@alum.mit.edu> Signed-off-by: Ard Biesheuvel <ardb@kernel.org> --- arch/x86/platform/efi/efi_64.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 8f5759df7776..e1e8d4e3a213 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -78,28 +78,30 @@ int __init efi_alloc_page_tables(void) gfp_mask = GFP_KERNEL | __GFP_ZERO; efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); if (!efi_pgd) - return -ENOMEM; + goto fail; pgd = efi_pgd + pgd_index(EFI_VA_END); p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END); - if (!p4d) { - free_page((unsigned long)efi_pgd); - return -ENOMEM; - } + if (!p4d) + goto free_pgd; pud = pud_alloc(&init_mm, p4d, EFI_VA_END); - if (!pud) { - if (pgtable_l5_enabled()) - free_page((unsigned long) pgd_page_vaddr(*pgd)); - free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); - return -ENOMEM; - } + if (!pud) + goto free_p4d; efi_mm.pgd = efi_pgd; mm_init_cpumask(&efi_mm); init_new_context(NULL, &efi_mm); return 0; + +free_p4d: + if (pgtable_l5_enabled()) + free_page((unsigned long)pgd_page_vaddr(*pgd)); +free_pgd: + free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); +fail: + return -ENOMEM; } /* -- cgit v1.2.3-70-g09d2 From 99fba3205cd499255a36fd87f1d6064adc622a5b Mon Sep 17 00:00:00 2001 From: Grygorii Strashko <grygorii.strashko@ti.com> Date: Thu, 1 Oct 2020 22:20:23 +0300 Subject: ARM: dts: am437x-l4: fix compatible for cpsw switch dt node Fix compatible the new CPSW switchdev DT node to avoid probing of legacy CPSW driver which fails: [ 2.781009] cpsw 4a100000.switch: invalid resource Fixes: 7bf8f37aea82 ("ARM: dts: am437x-l4: add dt node for new cpsw switchdev driver") Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com> Signed-off-by: Tony Lindgren <tony@atomide.com> --- arch/arm/boot/dts/am437x-l4.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am437x-l4.dtsi b/arch/arm/boot/dts/am437x-l4.dtsi index c220dc3c4e0f..243e35f7a56c 100644 --- a/arch/arm/boot/dts/am437x-l4.dtsi +++ b/arch/arm/boot/dts/am437x-l4.dtsi @@ -521,7 +521,7 @@ ranges = <0x0 0x100000 0x8000>; mac_sw: switch@0 { - compatible = "ti,am4372-cpsw","ti,cpsw-switch"; + compatible = "ti,am4372-cpsw-switch", "ti,cpsw-switch"; reg = <0x0 0x4000>; ranges = <0 0 0x4000>; clocks = <&cpsw_125mhz_gclk>, <&dpll_clksel_mac_clk>; -- cgit v1.2.3-70-g09d2 From 1ed576a20cd5c93295f57d6b7400357bd8d01b21 Mon Sep 17 00:00:00 2001 From: Janosch Frank <frankja@linux.ibm.com> Date: Tue, 20 Oct 2020 06:12:07 -0400 Subject: KVM: s390: pv: Mark mm as protected after the set secure parameters and improve cleanup We can only have protected guest pages after a successful set secure parameters call as only then the UV allows imports and unpacks. By moving the test we can now also check for it in s390_reset_acc() and do an early return if it is 0. Signed-off-by: Janosch Frank <frankja@linux.ibm.com> Fixes: 29b40f105ec8 ("KVM: s390: protvirt: Add initial vm and cpu lifecycle handling") Reviewed-by: Cornelia Huck <cohuck@redhat.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> --- arch/s390/kvm/kvm-s390.c | 2 +- arch/s390/kvm/pv.c | 3 ++- arch/s390/mm/gmap.c | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6b74b92c1a58..08ea6c4735cd 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2312,7 +2312,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) struct kvm_s390_pv_unp unp = {}; r = -EINVAL; - if (!kvm_s390_pv_is_protected(kvm)) + if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm)) break; r = -EFAULT; diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index eb99e2f95ebe..f5847f9dec7c 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -208,7 +208,6 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) return -EIO; } kvm->arch.gmap->guest_handle = uvcb.guest_handle; - atomic_set(&kvm->mm->context.is_protected, 1); return 0; } @@ -228,6 +227,8 @@ int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc, *rrc = uvcb.header.rrc; KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x", *rc, *rrc); + if (!cc) + atomic_set(&kvm->mm->context.is_protected, 1); return cc ? -EINVAL : 0; } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index cfb0017f33a7..64795d034926 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2690,6 +2690,8 @@ static const struct mm_walk_ops reset_acc_walk_ops = { #include <linux/sched/mm.h> void s390_reset_acc(struct mm_struct *mm) { + if (!mm_is_protected(mm)) + return; /* * we might be called during * reset: we walk the pages and clear -- cgit v1.2.3-70-g09d2 From 6cbf1e960fa52e4c63a6dfa4cda8736375b34ccc Mon Sep 17 00:00:00 2001 From: Collin Walling <walling@linux.ibm.com> Date: Wed, 4 Nov 2020 13:10:32 -0500 Subject: KVM: s390: remove diag318 reset code The diag318 data must be set to 0 by VM-wide reset events triggered by diag308. As such, KVM should not handle resetting this data via the VCPU ioctls. Fixes: 23a60f834406 ("s390/kvm: diagnose 0x318 sync and reset") Signed-off-by: Collin Walling <walling@linux.ibm.com> Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com> Reviewed-by: Janosch Frank <frankja@linux.ibm.com> Acked-by: Cornelia Huck <cohuck@redhat.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Link: https://lore.kernel.org/r/20201104181032.109800-1-walling@linux.ibm.com --- arch/s390/kvm/kvm-s390.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 08ea6c4735cd..425d3d75320b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3564,7 +3564,6 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->pp = 0; vcpu->arch.sie_block->fpf &= ~FPF_BPBC; vcpu->arch.sie_block->todpr = 0; - vcpu->arch.sie_block->cpnc = 0; } } @@ -3582,7 +3581,6 @@ static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu) regs->etoken = 0; regs->etoken_extension = 0; - regs->diag318 = 0; } int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) -- cgit v1.2.3-70-g09d2 From 966e7ea434484a006700c144bca629a14f93530c Mon Sep 17 00:00:00 2001 From: Heiko Carstens <hca@linux.ibm.com> Date: Tue, 10 Nov 2020 14:46:55 +0100 Subject: s390: update defconfigs Signed-off-by: Heiko Carstens <hca@linux.ibm.com> --- arch/s390/configs/debug_defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index a4d3c578fbd8..fe6f529ac82c 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -1,3 +1,4 @@ +CONFIG_UAPI_HEADER_TEST=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_WATCH_QUEUE=y -- cgit v1.2.3-70-g09d2 From 78d732e1f326f74f240d416af9484928303d9951 Mon Sep 17 00:00:00 2001 From: Thomas Richter <tmricht@linux.ibm.com> Date: Wed, 11 Nov 2020 16:26:25 +0100 Subject: s390/cpum_sf.c: fix file permission for cpum_sfb_size This file is installed by the s390 CPU Measurement sampling facility device driver to export supported minimum and maximum sample buffer sizes. This file is read by lscpumf tool to display the details of the device driver capabilities. The lscpumf tool might be invoked by a non-root user. In this case it does not print anything because the file contents can not be read. Fix this by allowing read access for all users. Reading the file contents is ok, changing the file contents is left to the root user only. For further reference and details see: [1] https://github.com/ibm-s390-tools/s390-tools/issues/97 Fixes: 69f239ed335a ("s390/cpum_sf: Dynamically extend the sampling buffer if overflows occur") Cc: <stable@vger.kernel.org> # 3.14 Signed-off-by: Thomas Richter <tmricht@linux.ibm.com> Acked-by: Sumanth Korikkar <sumanthk@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com> --- arch/s390/kernel/perf_cpum_sf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 4f9e4626df55..f100c9209743 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -2228,4 +2228,4 @@ out: } arch_initcall(init_cpum_sampling_pmu); -core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); +core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644); -- cgit v1.2.3-70-g09d2 From 53bf2776e31376f0b6a1fd7c9e1abc61241825a2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski <krzk@kernel.org> Date: Thu, 5 Nov 2020 22:47:02 +0100 Subject: ARM: dts: exynos: revert "add input clock to CMU in Exynos4412 Odroid" This reverts commit eaf2d2f6895d676dda6c95a652b58594f2887720. The commit eaf2d2f6895d ("ARM: dts: exynos: add input clock to CMU in Exynos4412 Odroid") breaks probing of usb3503 USB hub on Odroid U3. It changes the order of clock drivers probe: the clkout (Exynos PMU) driver is probed before the main clk-exynos4 driver. The clkout driver on Exynos4412 depends on clk-exynos4 but it does not support deferred probe, therefore this dependency and changed probe order causes probe failure. The usb3503 USB hub on Odroid U3 on the other hand requires clkout clock. This can be seen in logs: [ 5.007442] usb3503 0-0008: unable to request refclk (-517) Reported-by: Marek Szyprowski <m.szyprowski@samsung.com> Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org> Link: https://lore.kernel.org/r/20200921174818.15525-1-krzk@kernel.org Signed-off-by: Arnd Bergmann <arnd@arndb.de> --- arch/arm/boot/dts/exynos4412-odroid-common.dtsi | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi index ab291cec650a..2983e91bc7dd 100644 --- a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi +++ b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi @@ -122,7 +122,6 @@ }; &clock { - clocks = <&clock CLK_XUSBXTI>; assigned-clocks = <&clock CLK_FOUT_EPLL>; assigned-clock-rates = <45158401>; }; -- cgit v1.2.3-70-g09d2 From 575cba20c421ecb6b563ae352e4e0468e4ca8b3c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin@gmail.com> Date: Sat, 14 Nov 2020 21:47:43 +1000 Subject: powerpc/64s: Fix KVM system reset handling when CONFIG_PPC_PSERIES=y pseries guest kernels have a FWNMI handler for SRESET and MCE NMIs, which is basically the same as the regular handlers for those interrupts. The system reset FWNMI handler did not have a KVM guest test in it, although it probably should have because the guest can itself run guests. Commit 4f50541f6703b ("powerpc/64s/exception: Move all interrupt handlers to new style code gen macros") convert the handler faithfully to avoid a KVM test with a "clever" trick to modify the IKVM_REAL setting to 0 when the fwnmi handler is to be generated (PPC_PSERIES=y). This worked when the KVM test was generated in the interrupt entry handlers, but a later patch moved the KVM test to the common handler, and the common handler macro is expanded below the fwnmi entry. This prevents the KVM test from being generated even for the 0x100 entry point as well. The result is NMI IPIs in the host kernel when a guest is running will use gest registers. This goes particularly badly when an HPT guest is running and the MMU is set to guest mode. Remove this trickery and just generate the test always. Fixes: 9600f261acaa ("powerpc/64s/exception: Move KVM test to common code") Cc: stable@vger.kernel.org # v5.7+ Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20201114114743.3306283-1-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f7d748b88705..07d64883c0b5 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1000,8 +1000,6 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake) * Vectors for the FWNMI option. Share common code. */ TRAMP_REAL_BEGIN(system_reset_fwnmi) - /* XXX: fwnmi guest could run a nested/PR guest, so why no test? */ - __IKVM_REAL(system_reset)=0 GEN_INT_ENTRY system_reset, virt=0 #endif /* CONFIG_PPC_PSERIES */ -- cgit v1.2.3-70-g09d2 From 7bab16a6075b7b94999666355ab532c3dabb94f9 Mon Sep 17 00:00:00 2001 From: Jamie Iles <jamie@nuviainc.com> Date: Fri, 13 Nov 2020 15:04:06 +0000 Subject: KVM: arm64: Correctly align nVHE percpu data The nVHE percpu data is partially linked but the nVHE linker script did not align the percpu section. The PERCPU_INPUT macro would then align the data to a page boundary: #define PERCPU_INPUT(cacheline) \ __per_cpu_start = .; \ *(.data..percpu..first) \ . = ALIGN(PAGE_SIZE); \ *(.data..percpu..page_aligned) \ . = ALIGN(cacheline); \ *(.data..percpu..read_mostly) \ . = ALIGN(cacheline); \ *(.data..percpu) \ *(.data..percpu..shared_aligned) \ PERCPU_DECRYPTED_SECTION \ __per_cpu_end = .; but then when the final vmlinux linking happens the hypervisor percpu data is included after page alignment and so the offsets potentially don't match. On my build I saw that the .hyp.data..percpu section was at address 0x20 and then the percpu data would begin at 0x1000 (because of the page alignment in PERCPU_INPUT), but when linked into vmlinux, everything would be shifted down by 0x20 bytes. This manifests as one of the CPUs getting lost when running kvm-unit-tests or starting any VM and subsequent soft lockup on a Cortex A72 device. Fixes: 30c953911c43 ("kvm: arm64: Set up hyp percpu data for nVHE") Signed-off-by: Jamie Iles <jamie@nuviainc.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Acked-by: David Brazdil <dbrazdil@google.com> Cc: David Brazdil <dbrazdil@google.com> Cc: Marc Zyngier <maz@kernel.org> Cc: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/r/20201113150406.14314-1-jamie@nuviainc.com --- arch/arm64/kvm/hyp/nvhe/hyp.lds.S | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S index bb2d986ff696..a797abace13f 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S @@ -13,6 +13,11 @@ SECTIONS { HYP_SECTION(.text) + /* + * .hyp..data..percpu needs to be page aligned to maintain the same + * alignment for when linking into vmlinux. + */ + . = ALIGN(PAGE_SIZE); HYP_SECTION_NAME(.data..percpu) : { PERCPU_INPUT(L1_CACHE_BYTES) } -- cgit v1.2.3-70-g09d2 From 481535c5b41d191b22775a6873de5ec0e1cdced1 Mon Sep 17 00:00:00 2001 From: Max Filippov <jcmvbkbc@gmail.com> Date: Mon, 16 Nov 2020 01:25:56 -0800 Subject: xtensa: fix TLBTEMP area placement fast_second_level_miss handler for the TLBTEMP area has an assumption that page table directory entry for the TLBTEMP address range is 0. For it to be true the TLBTEMP area must be aligned to 4MB boundary and not share its 4MB region with anything that may use a page table. This is not true currently: TLBTEMP shares space with vmalloc space which results in the following kinds of runtime errors when fast_second_level_miss loads page table directory entry for the vmalloc space instead of fixing up the TLBTEMP area: Unable to handle kernel paging request at virtual address c7ff0e00 pc = d0009275, ra = 90009478 Oops: sig: 9 [#1] PREEMPT CPU: 1 PID: 61 Comm: kworker/u9:2 Not tainted 5.10.0-rc3-next-20201110-00007-g1fe4962fa983-dirty #58 Workqueue: xprtiod xs_stream_data_receive_workfn a00: 90009478 d11e1dc0 c7ff0e00 00000020 c7ff0000 00000001 7f8b8107 00000000 a08: 900c5992 d11e1d90 d0cc88b8 5506e97c 00000000 5506e97c d06c8074 d11e1d90 pc: d0009275, ps: 00060310, depc: 00000014, excvaddr: c7ff0e00 lbeg: d0009275, lend: d0009287 lcount: 00000003, sar: 00000010 Call Trace: xs_stream_data_receive_workfn+0x43c/0x770 process_one_work+0x1a1/0x324 worker_thread+0x1cc/0x3c0 kthread+0x10d/0x124 ret_from_kernel_thread+0xc/0x18 Cc: stable@vger.kernel.org Signed-off-by: Max Filippov <jcmvbkbc@gmail.com> --- Documentation/xtensa/mmu.rst | 9 ++++++--- arch/xtensa/include/asm/pgtable.h | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/Documentation/xtensa/mmu.rst b/Documentation/xtensa/mmu.rst index e52a12960fdc..450573afa31a 100644 --- a/Documentation/xtensa/mmu.rst +++ b/Documentation/xtensa/mmu.rst @@ -82,7 +82,8 @@ Default MMUv2-compatible layout:: +------------------+ | VMALLOC area | VMALLOC_START 0xc0000000 128MB - 64KB +------------------+ VMALLOC_END - | Cache aliasing | TLBTEMP_BASE_1 0xc7ff0000 DCACHE_WAY_SIZE + +------------------+ + | Cache aliasing | TLBTEMP_BASE_1 0xc8000000 DCACHE_WAY_SIZE | remap area 1 | +------------------+ | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE @@ -124,7 +125,8 @@ Default MMUv2-compatible layout:: +------------------+ | VMALLOC area | VMALLOC_START 0xa0000000 128MB - 64KB +------------------+ VMALLOC_END - | Cache aliasing | TLBTEMP_BASE_1 0xa7ff0000 DCACHE_WAY_SIZE + +------------------+ + | Cache aliasing | TLBTEMP_BASE_1 0xa8000000 DCACHE_WAY_SIZE | remap area 1 | +------------------+ | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE @@ -167,7 +169,8 @@ Default MMUv2-compatible layout:: +------------------+ | VMALLOC area | VMALLOC_START 0x90000000 128MB - 64KB +------------------+ VMALLOC_END - | Cache aliasing | TLBTEMP_BASE_1 0x97ff0000 DCACHE_WAY_SIZE + +------------------+ + | Cache aliasing | TLBTEMP_BASE_1 0x98000000 DCACHE_WAY_SIZE | remap area 1 | +------------------+ | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index fa054a1772e1..4dc04e6c01d7 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -69,7 +69,7 @@ */ #define VMALLOC_START (XCHAL_KSEG_CACHED_VADDR - 0x10000000) #define VMALLOC_END (VMALLOC_START + 0x07FEFFFF) -#define TLBTEMP_BASE_1 (VMALLOC_END + 1) +#define TLBTEMP_BASE_1 (VMALLOC_START + 0x08000000) #define TLBTEMP_BASE_2 (TLBTEMP_BASE_1 + DCACHE_WAY_SIZE) #if 2 * DCACHE_WAY_SIZE > ICACHE_WAY_SIZE #define TLBTEMP_SIZE (2 * DCACHE_WAY_SIZE) -- cgit v1.2.3-70-g09d2 From 3a860d165eb5f4d7cf0bf81ef6a5b5c5e1754422 Mon Sep 17 00:00:00 2001 From: Max Filippov <jcmvbkbc@gmail.com> Date: Mon, 16 Nov 2020 01:38:59 -0800 Subject: xtensa: disable preemption around cache alias management calls Although cache alias management calls set up and tear down TLB entries and fast_second_level_miss is able to restore TLB entry should it be evicted they absolutely cannot preempt each other because they use the same TLBTEMP area for different purposes. Disable preemption around all cache alias management calls to enforce that. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov <jcmvbkbc@gmail.com> --- arch/xtensa/mm/cache.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch') diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c index 5835406b3cec..085b8c77b9d9 100644 --- a/arch/xtensa/mm/cache.c +++ b/arch/xtensa/mm/cache.c @@ -70,8 +70,10 @@ static inline void kmap_invalidate_coherent(struct page *page, kvaddr = TLBTEMP_BASE_1 + (page_to_phys(page) & DCACHE_ALIAS_MASK); + preempt_disable(); __invalidate_dcache_page_alias(kvaddr, page_to_phys(page)); + preempt_enable(); } } } @@ -156,6 +158,7 @@ void flush_dcache_page(struct page *page) if (!alias && !mapping) return; + preempt_disable(); virt = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK); __flush_invalidate_dcache_page_alias(virt, phys); @@ -166,6 +169,7 @@ void flush_dcache_page(struct page *page) if (mapping) __invalidate_icache_page_alias(virt, phys); + preempt_enable(); } /* There shouldn't be an entry in the cache for this page anymore. */ @@ -199,8 +203,10 @@ void local_flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigned long phys = page_to_phys(pfn_to_page(pfn)); unsigned long virt = TLBTEMP_BASE_1 + (address & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_page_alias(virt, phys); __invalidate_icache_page_alias(virt, phys); + preempt_enable(); } EXPORT_SYMBOL(local_flush_cache_page); @@ -227,11 +233,13 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep) unsigned long phys = page_to_phys(page); unsigned long tmp; + preempt_disable(); tmp = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK); __flush_invalidate_dcache_page_alias(tmp, phys); tmp = TLBTEMP_BASE_1 + (addr & DCACHE_ALIAS_MASK); __flush_invalidate_dcache_page_alias(tmp, phys); __invalidate_icache_page_alias(tmp, phys); + preempt_enable(); clear_bit(PG_arch_1, &page->flags); } @@ -265,7 +273,9 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, if (alias) { unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_page_alias(t, phys); + preempt_enable(); } /* Copy data */ @@ -280,9 +290,11 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, if (alias) { unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_range((unsigned long) dst, len); if ((vma->vm_flags & VM_EXEC) != 0) __invalidate_icache_page_alias(t, phys); + preempt_enable(); } else if ((vma->vm_flags & VM_EXEC) != 0) { __flush_dcache_range((unsigned long)dst,len); @@ -304,7 +316,9 @@ extern void copy_from_user_page(struct vm_area_struct *vma, struct page *page, if (alias) { unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_page_alias(t, phys); + preempt_enable(); } memcpy(dst, src, len); -- cgit v1.2.3-70-g09d2 From 75b49620267c700f0a07fec7f27f69852db70e46 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater <clg@kaod.org> Date: Thu, 5 Nov 2020 14:47:13 +0100 Subject: KVM: PPC: Book3S HV: XIVE: Fix possible oops when accessing ESB page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When accessing the ESB page of a source interrupt, the fault handler will retrieve the page address from the XIVE interrupt 'xive_irq_data' structure. If the associated KVM XIVE interrupt is not valid, that is not allocated at the HW level for some reason, the fault handler will dereference a NULL pointer leading to the oops below : WARNING: CPU: 40 PID: 59101 at arch/powerpc/kvm/book3s_xive_native.c:259 xive_native_esb_fault+0xe4/0x240 [kvm] CPU: 40 PID: 59101 Comm: qemu-system-ppc Kdump: loaded Tainted: G W --------- - - 4.18.0-240.el8.ppc64le #1 NIP: c00800000e949fac LR: c00000000044b164 CTR: c00800000e949ec8 REGS: c000001f69617840 TRAP: 0700 Tainted: G W --------- - - (4.18.0-240.el8.ppc64le) MSR: 9000000000029033 <SF,HV,EE,ME,IR,DR,RI,LE> CR: 44044282 XER: 00000000 CFAR: c00000000044b160 IRQMASK: 0 GPR00: c00000000044b164 c000001f69617ac0 c00800000e96e000 c000001f69617c10 GPR04: 05faa2b21e000080 0000000000000000 0000000000000005 ffffffffffffffff GPR08: 0000000000000000 0000000000000001 0000000000000000 0000000000000001 GPR12: c00800000e949ec8 c000001ffffd3400 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 c000001f5c065160 c000000001c76f90 GPR24: c000001f06f20000 c000001f5c065100 0000000000000008 c000001f0eb98c78 GPR28: c000001dcab40000 c000001dcab403d8 c000001f69617c10 0000000000000011 NIP [c00800000e949fac] xive_native_esb_fault+0xe4/0x240 [kvm] LR [c00000000044b164] __do_fault+0x64/0x220 Call Trace: [c000001f69617ac0] [0000000137a5dc20] 0x137a5dc20 (unreliable) [c000001f69617b50] [c00000000044b164] __do_fault+0x64/0x220 [c000001f69617b90] [c000000000453838] do_fault+0x218/0x930 [c000001f69617bf0] [c000000000456f50] __handle_mm_fault+0x350/0xdf0 [c000001f69617cd0] [c000000000457b1c] handle_mm_fault+0x12c/0x310 [c000001f69617d10] [c00000000007ef44] __do_page_fault+0x264/0xbb0 [c000001f69617df0] [c00000000007f8c8] do_page_fault+0x38/0xd0 [c000001f69617e30] [c00000000000a714] handle_page_fault+0x18/0x38 Instruction dump: 40c2fff0 7c2004ac 2fa90000 409e0118 73e90001 41820080 e8bd0008 7c2004ac 7ca90074 39400000 915c0000 7929d182 <0b090000> 2fa50000 419e0080 e89e0018 ---[ end trace 66c6ff034c53f64f ]--- xive-kvm: xive_native_esb_fault: accessing invalid ESB page for source 8 ! Fix that by checking the validity of the KVM XIVE interrupt structure. Fixes: 6520ca64cde7 ("KVM: PPC: Book3S HV: XIVE: Add a mapping for the source ESB pages") Cc: stable@vger.kernel.org # v5.2+ Reported-by: Greg Kurz <groug@kaod.org> Signed-off-by: Cédric Le Goater <clg@kaod.org> Tested-by: Greg Kurz <groug@kaod.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20201105134713.656160-1-clg@kaod.org --- arch/powerpc/kvm/book3s_xive_native.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index d0c2db0e07fa..a59a94f02733 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -251,6 +251,13 @@ static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf) } state = &sb->irq_state[src]; + + /* Some sanity checking */ + if (!state->valid) { + pr_devel("%s: source %lx invalid !\n", __func__, irq); + return VM_FAULT_SIGBUS; + } + kvmppc_xive_select_irq(state, &hw_num, &xd); arch_spin_lock(&sb->lock); -- cgit v1.2.3-70-g09d2 From cef397038167ac15d085914493d6c86385773709 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Wed, 11 Nov 2020 17:52:58 +0100 Subject: arch: pgtable: define MAX_POSSIBLE_PHYSMEM_BITS where needed Stefan Agner reported a bug when using zsram on 32-bit Arm machines with RAM above the 4GB address boundary: Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = a27bd01c [00000000] *pgd=236a0003, *pmd=1ffa64003 Internal error: Oops: 207 [#1] SMP ARM Modules linked in: mdio_bcm_unimac(+) brcmfmac cfg80211 brcmutil raspberrypi_hwmon hci_uart crc32_arm_ce bcm2711_thermal phy_generic genet CPU: 0 PID: 123 Comm: mkfs.ext4 Not tainted 5.9.6 #1 Hardware name: BCM2711 PC is at zs_map_object+0x94/0x338 LR is at zram_bvec_rw.constprop.0+0x330/0xa64 pc : [<c0602b38>] lr : [<c0bda6a0>] psr: 60000013 sp : e376bbe0 ip : 00000000 fp : c1e2921c r10: 00000002 r9 : c1dda730 r8 : 00000000 r7 : e8ff7a00 r6 : 00000000 r5 : 02f9ffa0 r4 : e3710000 r3 : 000fdffe r2 : c1e0ce80 r1 : ebf979a0 r0 : 00000000 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user Control: 30c5383d Table: 235c2a80 DAC: fffffffd Process mkfs.ext4 (pid: 123, stack limit = 0x495a22e6) Stack: (0xe376bbe0 to 0xe376c000) As it turns out, zsram needs to know the maximum memory size, which is defined in MAX_PHYSMEM_BITS when CONFIG_SPARSEMEM is set, or in MAX_POSSIBLE_PHYSMEM_BITS on the x86 architecture. The same problem will be hit on all 32-bit architectures that have a physical address space larger than 4GB and happen to not enable sparsemem and include asm/sparsemem.h from asm/pgtable.h. After the initial discussion, I suggested just always defining MAX_POSSIBLE_PHYSMEM_BITS whenever CONFIG_PHYS_ADDR_T_64BIT is set, or provoking a build error otherwise. This addresses all configurations that can currently have this runtime bug, but leaves all other configurations unchanged. I looked up the possible number of bits in source code and datasheets, here is what I found: - on ARC, CONFIG_ARC_HAS_PAE40 controls whether 32 or 40 bits are used - on ARM, CONFIG_LPAE enables 40 bit addressing, without it we never support more than 32 bits, even though supersections in theory allow up to 40 bits as well. - on MIPS, some MIPS32r1 or later chips support 36 bits, and MIPS32r5 XPA supports up to 60 bits in theory, but 40 bits are more than anyone will ever ship - On PowerPC, there are three different implementations of 36 bit addressing, but 32-bit is used without CONFIG_PTE_64BIT - On RISC-V, the normal page table format can support 34 bit addressing. There is no highmem support on RISC-V, so anything above 2GB is unused, but it might be useful to eventually support CONFIG_ZRAM for high pages. Fixes: 61989a80fb3a ("staging: zsmalloc: zsmalloc memory allocation library") Fixes: 02390b87a945 ("mm/zsmalloc: Prepare to variable MAX_PHYSMEM_BITS") Acked-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Reviewed-by: Stefan Agner <stefan@agner.ch> Tested-by: Stefan Agner <stefan@agner.ch> Acked-by: Mike Rapoport <rppt@linux.ibm.com> Link: https://lore.kernel.org/linux-mm/bdfa44bf1c570b05d6c70898e2bbb0acf234ecdf.1604762181.git.stefan@agner.ch/ Signed-off-by: Arnd Bergmann <arnd@arndb.de> --- arch/arc/include/asm/pgtable.h | 2 ++ arch/arm/include/asm/pgtable-2level.h | 2 ++ arch/arm/include/asm/pgtable-3level.h | 2 ++ arch/mips/include/asm/pgtable-32.h | 3 +++ arch/powerpc/include/asm/book3s/32/pgtable.h | 2 ++ arch/powerpc/include/asm/nohash/32/pgtable.h | 2 ++ arch/riscv/include/asm/pgtable-32.h | 2 ++ include/linux/pgtable.h | 13 +++++++++++++ 8 files changed, 28 insertions(+) (limited to 'arch') diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index f1ed17edb085..163641726a2b 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -134,8 +134,10 @@ #ifdef CONFIG_ARC_HAS_PAE40 #define PTE_BITS_NON_RWX_IN_PD1 (0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE) +#define MAX_POSSIBLE_PHYSMEM_BITS 40 #else #define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE) +#define MAX_POSSIBLE_PHYSMEM_BITS 32 #endif /************************************************************************** diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 3502c2f746ca..baf7d0204eb5 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -75,6 +75,8 @@ #define PTE_HWTABLE_OFF (PTE_HWTABLE_PTRS * sizeof(pte_t)) #define PTE_HWTABLE_SIZE (PTRS_PER_PTE * sizeof(u32)) +#define MAX_POSSIBLE_PHYSMEM_BITS 32 + /* * PMD_SHIFT determines the size of the area a second-level page table can map * PGDIR_SHIFT determines what a third-level page table entry can map diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index fbb6693c3352..2b85d175e999 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -25,6 +25,8 @@ #define PTE_HWTABLE_OFF (0) #define PTE_HWTABLE_SIZE (PTRS_PER_PTE * sizeof(u64)) +#define MAX_POSSIBLE_PHYSMEM_BITS 40 + /* * PGDIR_SHIFT determines the size a top-level page table entry can map. */ diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index a950fc1ddb4d..6c0532d7b211 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -154,6 +154,7 @@ static inline void pmd_clear(pmd_t *pmdp) #if defined(CONFIG_XPA) +#define MAX_POSSIBLE_PHYSMEM_BITS 40 #define pte_pfn(x) (((unsigned long)((x).pte_high >> _PFN_SHIFT)) | (unsigned long)((x).pte_low << _PAGE_PRESENT_SHIFT)) static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) @@ -169,6 +170,7 @@ pfn_pte(unsigned long pfn, pgprot_t prot) #elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) +#define MAX_POSSIBLE_PHYSMEM_BITS 36 #define pte_pfn(x) ((unsigned long)((x).pte_high >> 6)) static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) @@ -183,6 +185,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) #else +#define MAX_POSSIBLE_PHYSMEM_BITS 32 #ifdef CONFIG_CPU_VR41XX #define pte_pfn(x) ((unsigned long)((x).pte >> (PAGE_SHIFT + 2))) #define pfn_pte(pfn, prot) __pte(((pfn) << (PAGE_SHIFT + 2)) | pgprot_val(prot)) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 36443cda8dcf..1376be95e975 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -36,8 +36,10 @@ static inline bool pte_user(pte_t pte) */ #ifdef CONFIG_PTE_64BIT #define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 36 #else #define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 32 #endif /* diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index ee2243ba96cf..96522f7f0618 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -153,8 +153,10 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); */ #if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) #define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 36 #else #define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 32 #endif /* diff --git a/arch/riscv/include/asm/pgtable-32.h b/arch/riscv/include/asm/pgtable-32.h index b0ab66e5fdb1..5b2e79e5bfa5 100644 --- a/arch/riscv/include/asm/pgtable-32.h +++ b/arch/riscv/include/asm/pgtable-32.h @@ -14,4 +14,6 @@ #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 34 + #endif /* _ASM_RISCV_PGTABLE_32_H */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 71125a4676c4..e237004d498d 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1427,6 +1427,19 @@ typedef unsigned int pgtbl_mod_mask; #endif /* !__ASSEMBLY__ */ +#if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT) +#ifdef CONFIG_PHYS_ADDR_T_64BIT +/* + * ZSMALLOC needs to know the highest PFN on 32-bit architectures + * with physical address space extension, but falls back to + * BITS_PER_LONG otherwise. + */ +#error Missing MAX_POSSIBLE_PHYSMEM_BITS definition +#else +#define MAX_POSSIBLE_PHYSMEM_BITS 32 +#endif +#endif + #ifndef has_transparent_hugepage #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define has_transparent_hugepage() 1 -- cgit v1.2.3-70-g09d2 From 854c57f02bc718b0653bc467073b4541b8155a36 Mon Sep 17 00:00:00 2001 From: Ashish Kalra <ashish.kalra@amd.com> Date: Tue, 10 Nov 2020 22:42:05 +0000 Subject: KVM: SVM: Fix offset computation bug in __sev_dbg_decrypt(). Fix offset computation in __sev_dbg_decrypt() to include the source paddr before it is rounded down to be aligned to 16 bytes as required by SEV API. This fixes incorrect guest memory dumps observed when using qemu monitor. Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> Message-Id: <20201110224205.29444-1-Ashish.Kalra@amd.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- arch/x86/kvm/svm/sev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c0b14106258a..566f4d18185b 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -642,8 +642,8 @@ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr, * Its safe to read more than we are asked, caller should ensure that * destination has enough space. */ - src_paddr = round_down(src_paddr, 16); offset = src_paddr & 15; + src_paddr = round_down(src_paddr, 16); sz = round_up(sz + offset, 16); return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false); -- cgit v1.2.3-70-g09d2 From 054409ab253d9f31bec5760105144166b4b71e22 Mon Sep 17 00:00:00 2001 From: Chen Zhou <chenzhou10@huawei.com> Date: Tue, 17 Nov 2020 10:54:26 +0800 Subject: KVM: SVM: fix error return code in svm_create_vcpu() Fix to return a negative error code from the error handling case instead of 0 in function svm_create_vcpu(), as done elsewhere in this function. Fixes: f4c847a95654 ("KVM: SVM: refactor msr permission bitmap allocation") Reported-by: Hulk Robot <hulkci@huawei.com> Signed-off-by: Chen Zhou <chenzhou10@huawei.com> Message-Id: <20201117025426.167824-1-chenzhou10@huawei.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- arch/x86/kvm/svm/svm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1e81cfebd491..79b3a564f1c9 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1309,8 +1309,10 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm->avic_is_running = true; svm->msrpm = svm_vcpu_alloc_msrpm(); - if (!svm->msrpm) + if (!svm->msrpm) { + err = -ENOMEM; goto error_free_vmcb_page; + } svm_vcpu_init_msrpm(vcpu, svm->msrpm); -- cgit v1.2.3-70-g09d2 From 61a2f1aecf6052f7bcf900829ca2b9d74437ec07 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Date: Mon, 16 Nov 2020 18:45:15 +0100 Subject: MIPS: kernel: Fix for_each_memblock conversion The loop over all memblocks works with PFNs and not physical addresses, so we need for_each_mem_pfn_range(). Fixes: b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Reviewed-by: Mike Rapoport <rppt@linux.ibm.com> Reviewed-by: Serge Semin <fancer.lancer@gmail.com> --- arch/mips/kernel/setup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 0d4253208bde..ca579deef939 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -262,8 +262,8 @@ static void __init bootmem_init(void) static void __init bootmem_init(void) { phys_addr_t ramstart, ramend; - phys_addr_t start, end; - u64 i; + unsigned long start, end; + int i; ramstart = memblock_start_of_DRAM(); ramend = memblock_end_of_DRAM(); @@ -300,7 +300,7 @@ static void __init bootmem_init(void) min_low_pfn = ARCH_PFN_OFFSET; max_pfn = PFN_DOWN(ramend); - for_each_mem_range(i, &start, &end) { + for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) { /* * Skip highmem here so we get an accurate max_low_pfn if low * memory stops short of high memory. -- cgit v1.2.3-70-g09d2 From 1a371e67dc77125736cc56d3a0893f06b75855b6 Mon Sep 17 00:00:00 2001 From: Chen Yu <yu.c.chen@intel.com> Date: Fri, 13 Nov 2020 09:59:23 +0800 Subject: x86/microcode/intel: Check patch signature before saving microcode for early loading Currently, scan_microcode() leverages microcode_matches() to check if the microcode matches the CPU by comparing the family and model. However, the processor stepping and flags of the microcode signature should also be considered when saving a microcode patch for early update. Use find_matching_signature() in scan_microcode() and get rid of the now-unused microcode_matches() which is a good cleanup in itself. Complete the verification of the patch being saved for early loading in save_microcode_patch() directly. This needs to be done there too because save_mc_for_early() will call save_microcode_patch() too. The second reason why this needs to be done is because the loader still tries to support, at least hypothetically, mixed-steppings systems and thus adds all patches to the cache that belong to the same CPU model albeit with different steppings. For example: microcode: CPU: sig=0x906ec, pf=0x2, rev=0xd6 microcode: mc_saved[0]: sig=0x906e9, pf=0x2a, rev=0xd6, total size=0x19400, date = 2020-04-23 microcode: mc_saved[1]: sig=0x906ea, pf=0x22, rev=0xd6, total size=0x19000, date = 2020-04-27 microcode: mc_saved[2]: sig=0x906eb, pf=0x2, rev=0xd6, total size=0x19400, date = 2020-04-23 microcode: mc_saved[3]: sig=0x906ec, pf=0x22, rev=0xd6, total size=0x19000, date = 2020-04-27 microcode: mc_saved[4]: sig=0x906ed, pf=0x22, rev=0xd6, total size=0x19400, date = 2020-04-23 The patch which is being saved for early loading, however, can only be the one which fits the CPU this runs on so do the signature verification before saving. [ bp: Do signature verification in save_microcode_patch() and rewrite commit message. ] Fixes: ec400ddeff20 ("x86/microcode_intel_early.c: Early update ucode on Intel's CPU") Signed-off-by: Chen Yu <yu.c.chen@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=208535 Link: https://lkml.kernel.org/r/20201113015923.13960-1-yu.c.chen@intel.com --- arch/x86/kernel/cpu/microcode/intel.c | 63 ++++++----------------------------- 1 file changed, 10 insertions(+), 53 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 6a99535d7f37..7e8e07bddd5f 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -100,53 +100,6 @@ static int has_newer_microcode(void *mc, unsigned int csig, int cpf, int new_rev return find_matching_signature(mc, csig, cpf); } -/* - * Given CPU signature and a microcode patch, this function finds if the - * microcode patch has matching family and model with the CPU. - * - * %true - if there's a match - * %false - otherwise - */ -static bool microcode_matches(struct microcode_header_intel *mc_header, - unsigned long sig) -{ - unsigned long total_size = get_totalsize(mc_header); - unsigned long data_size = get_datasize(mc_header); - struct extended_sigtable *ext_header; - unsigned int fam_ucode, model_ucode; - struct extended_signature *ext_sig; - unsigned int fam, model; - int ext_sigcount, i; - - fam = x86_family(sig); - model = x86_model(sig); - - fam_ucode = x86_family(mc_header->sig); - model_ucode = x86_model(mc_header->sig); - - if (fam == fam_ucode && model == model_ucode) - return true; - - /* Look for ext. headers: */ - if (total_size <= data_size + MC_HEADER_SIZE) - return false; - - ext_header = (void *) mc_header + data_size + MC_HEADER_SIZE; - ext_sig = (void *)ext_header + EXT_HEADER_SIZE; - ext_sigcount = ext_header->count; - - for (i = 0; i < ext_sigcount; i++) { - fam_ucode = x86_family(ext_sig->sig); - model_ucode = x86_model(ext_sig->sig); - - if (fam == fam_ucode && model == model_ucode) - return true; - - ext_sig++; - } - return false; -} - static struct ucode_patch *memdup_patch(void *data, unsigned int size) { struct ucode_patch *p; @@ -164,7 +117,7 @@ static struct ucode_patch *memdup_patch(void *data, unsigned int size) return p; } -static void save_microcode_patch(void *data, unsigned int size) +static void save_microcode_patch(struct ucode_cpu_info *uci, void *data, unsigned int size) { struct microcode_header_intel *mc_hdr, *mc_saved_hdr; struct ucode_patch *iter, *tmp, *p = NULL; @@ -210,6 +163,9 @@ static void save_microcode_patch(void *data, unsigned int size) if (!p) return; + if (!find_matching_signature(p->data, uci->cpu_sig.sig, uci->cpu_sig.pf)) + return; + /* * Save for early loading. On 32-bit, that needs to be a physical * address as the APs are running from physical addresses, before @@ -344,13 +300,14 @@ scan_microcode(void *data, size_t size, struct ucode_cpu_info *uci, bool save) size -= mc_size; - if (!microcode_matches(mc_header, uci->cpu_sig.sig)) { + if (!find_matching_signature(data, uci->cpu_sig.sig, + uci->cpu_sig.pf)) { data += mc_size; continue; } if (save) { - save_microcode_patch(data, mc_size); + save_microcode_patch(uci, data, mc_size); goto next; } @@ -483,14 +440,14 @@ static void show_saved_mc(void) * Save this microcode patch. It will be loaded early when a CPU is * hot-added or resumes. */ -static void save_mc_for_early(u8 *mc, unsigned int size) +static void save_mc_for_early(struct ucode_cpu_info *uci, u8 *mc, unsigned int size) { /* Synchronization during CPU hotplug. */ static DEFINE_MUTEX(x86_cpu_microcode_mutex); mutex_lock(&x86_cpu_microcode_mutex); - save_microcode_patch(mc, size); + save_microcode_patch(uci, mc, size); show_saved_mc(); mutex_unlock(&x86_cpu_microcode_mutex); @@ -935,7 +892,7 @@ static enum ucode_state generic_load_microcode(int cpu, struct iov_iter *iter) * permanent memory. So it will be loaded early when a CPU is hot added * or resumes. */ - save_mc_for_early(new_mc, new_mc_size); + save_mc_for_early(uci, new_mc, new_mc_size); pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); -- cgit v1.2.3-70-g09d2 From e02152ba2810f7c88cb54e71cda096268dfa9241 Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@ellerman.id.au> Date: Mon, 16 Nov 2020 23:09:13 +1100 Subject: powerpc: Drop -me200 addition to build flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently a build with CONFIG_E200=y will fail with: Error: invalid switch -me200 Error: unrecognized option -me200 Upstream binutils has never supported an -me200 option. Presumably it was supported at some point by either a fork or Freescale internal binutils. We can't support code that we can't even build test, so drop the addition of -me200 to the build flags, so we can at least build with CONFIG_E200=y. Reported-by: Németh Márton <nm127@freemail.hu> Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Reviewed-by: Nick Desaulniers <ndesaulniers@google.com> Acked-by: Scott Wood <oss@buserror.net> Link: https://lore.kernel.org/r/20201116120913.165317-1-mpe@ellerman.id.au --- arch/powerpc/Makefile | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index a4d56f0a41d9..16b8336f91dd 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -248,7 +248,6 @@ KBUILD_CFLAGS += $(call cc-option,-mno-string) cpu-as-$(CONFIG_40x) += -Wa,-m405 cpu-as-$(CONFIG_44x) += -Wa,-m440 cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec) -cpu-as-$(CONFIG_E200) += -Wa,-me200 cpu-as-$(CONFIG_E500) += -Wa,-me500 # When using '-many -mpower4' gas will first try and find a matching power4 -- cgit v1.2.3-70-g09d2 From ac3b57adf87ad9bac7e33ca26bbbb13fae1ed62b Mon Sep 17 00:00:00 2001 From: Zhang Qilong <zhangqilong3@huawei.com> Date: Fri, 13 Nov 2020 21:18:56 +0800 Subject: MIPS: Alchemy: Fix memleak in alchemy_clk_setup_cpu If the clk_register fails, we should free h before function returns to prevent memleak. Fixes: 474402291a0ad ("MIPS: Alchemy: clock framework integration of onchip clocks") Reported-by: Hulk Robot <hulkci@huawei.com> Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com> Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de> --- arch/mips/alchemy/common/clock.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c index a95a894aceaf..f0c830337104 100644 --- a/arch/mips/alchemy/common/clock.c +++ b/arch/mips/alchemy/common/clock.c @@ -152,6 +152,7 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name, { struct clk_init_data id; struct clk_hw *h; + struct clk *clk; h = kzalloc(sizeof(*h), GFP_KERNEL); if (!h) @@ -164,7 +165,13 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name, id.ops = &alchemy_clkops_cpu; h->init = &id; - return clk_register(NULL, h); + clk = clk_register(NULL, h); + if (IS_ERR(clk)) { + pr_err("failed to register clock\n"); + kfree(h); + } + + return clk; } /* AUXPLLs ************************************************************/ -- cgit v1.2.3-70-g09d2 From ebd19fc372e3e78bf165f230e7c084e304441c08 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen <samitolvanen@google.com> Date: Fri, 13 Nov 2020 10:31:26 -0800 Subject: perf/x86: fix sysfs type mismatches This change switches rapl to use PMU_FORMAT_ATTR, and fixes two other macros to use device_attribute instead of kobj_attribute to avoid callback type mismatches that trip indirect call checking with Clang's Control-Flow Integrity (CFI). Reported-by: Sedat Dilek <sedat.dilek@gmail.com> Signed-off-by: Sami Tolvanen <samitolvanen@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Kees Cook <keescook@chromium.org> Link: https://lkml.kernel.org/r/20201113183126.1239404-1-samitolvanen@google.com --- arch/x86/events/intel/cstate.c | 6 +++--- arch/x86/events/intel/uncore.c | 4 ++-- arch/x86/events/intel/uncore.h | 12 ++++++------ arch/x86/events/rapl.c | 14 +------------- 4 files changed, 12 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 442e1ed4acd4..4eb7ee5fed72 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -107,14 +107,14 @@ MODULE_LICENSE("GPL"); #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __cstate_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ +static ssize_t __cstate_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ char *page) \ { \ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ return sprintf(page, _format "\n"); \ } \ -static struct kobj_attribute format_attr_##_var = \ +static struct device_attribute format_attr_##_var = \ __ATTR(_name, 0444, __cstate_##_var##_show, NULL) static ssize_t cstate_get_attr_cpumask(struct device *dev, diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 86d012b3e0b4..80d52cbe2fde 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -94,8 +94,8 @@ end: return map; } -ssize_t uncore_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +ssize_t uncore_event_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct uncore_event_desc *event = container_of(attr, struct uncore_event_desc, attr); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 83d2a7d490e0..9efea154349d 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -157,7 +157,7 @@ struct intel_uncore_box { #define UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS 2 struct uncore_event_desc { - struct kobj_attribute attr; + struct device_attribute attr; const char *config; }; @@ -179,8 +179,8 @@ struct pci2phy_map { struct pci2phy_map *__find_pci2phy_map(int segment); int uncore_pcibus_to_physid(struct pci_bus *bus); -ssize_t uncore_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf); +ssize_t uncore_event_show(struct device *dev, + struct device_attribute *attr, char *buf); static inline struct intel_uncore_pmu *dev_to_uncore_pmu(struct device *dev) { @@ -201,14 +201,14 @@ extern int __uncore_max_dies; } #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __uncore_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ +static ssize_t __uncore_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ char *page) \ { \ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ return sprintf(page, _format "\n"); \ } \ -static struct kobj_attribute format_attr_##_var = \ +static struct device_attribute format_attr_##_var = \ __ATTR(_name, 0444, __uncore_##_var##_show, NULL) static inline bool uncore_pmc_fixed(int idx) diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 7c0120e2e957..7dbbeaacd995 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -93,18 +93,6 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { * any other bit is reserved */ #define RAPL_EVENT_MASK 0xFFULL - -#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __rapl_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ - char *page) \ -{ \ - BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ - return sprintf(page, _format "\n"); \ -} \ -static struct kobj_attribute format_attr_##_var = \ - __ATTR(_name, 0444, __rapl_##_var##_show, NULL) - #define RAPL_CNTR_WIDTH 32 #define RAPL_EVENT_ATTR_STR(_name, v, str) \ @@ -441,7 +429,7 @@ static struct attribute_group rapl_pmu_events_group = { .attrs = attrs_empty, }; -DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); +PMU_FORMAT_ATTR(event, "config:0-7"); static struct attribute *rapl_formats_attr[] = { &format_attr_event.attr, NULL, -- cgit v1.2.3-70-g09d2 From dc293f2106903ab9c24e9cea18c276e32c394c33 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Date: Tue, 1 Sep 2020 00:09:37 +0300 Subject: xtensa: uaccess: Add missing __user to strncpy_from_user() prototype When adding __user annotations in commit 2adf5352a34a, the strncpy_from_user() function declaration for the CONFIG_GENERIC_STRNCPY_FROM_USER case was missed. Fix it. Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Message-Id: <20200831210937.17938-1-laurent.pinchart@ideasonboard.com> Signed-off-by: Max Filippov <jcmvbkbc@gmail.com> --- arch/xtensa/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index b9758119feca..5c9fb8005aa8 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -302,7 +302,7 @@ strncpy_from_user(char *dst, const char __user *src, long count) return -EFAULT; } #else -long strncpy_from_user(char *dst, const char *src, long count); +long strncpy_from_user(char *dst, const char __user *src, long count); #endif /* -- cgit v1.2.3-70-g09d2 From 2013a4b684b6eb614ee5c9a3c07b0ae6f5ca96d9 Mon Sep 17 00:00:00 2001 From: Zhen Lei <thunder.leizhen@huawei.com> Date: Fri, 16 Oct 2020 17:08:32 +0800 Subject: arm64: dts: broadcom: clear the warnings caused by empty dma-ranges The scripts/dtc/checks.c requires that the node have empty "dma-ranges" property must have the same "#address-cells" and "#size-cells" values as the parent node. Otherwise, the following warnings is reported: arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi:7.3-14: Warning \ (dma_ranges_format): /usb:dma-ranges: empty "dma-ranges" property but \ its #address-cells (1) differs from / (2) arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi:7.3-14: Warning \ (dma_ranges_format): /usb:dma-ranges: empty "dma-ranges" property but \ its #size-cells (1) differs from / (2) Arnd Bergmann figured out why it's necessary: Also note that the #address-cells=<1> means that any device under this bus is assumed to only support 32-bit addressing, and DMA will have to go through a slow swiotlb in the absence of an IOMMU. Suggested-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com> Link: https://lore.kernel.org/r/20201016090833.1892-2-thunder.leizhen@huawei.com' Acked-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: Arnd Bergmann <arnd@arndb.de> --- .../boot/dts/broadcom/stingray/stingray-usb.dtsi | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi index 55259f973b5a..aef8f2b00778 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi @@ -5,20 +5,20 @@ usb { compatible = "simple-bus"; dma-ranges; - #address-cells = <1>; - #size-cells = <1>; - ranges = <0x0 0x0 0x68500000 0x00400000>; + #address-cells = <2>; + #size-cells = <2>; + ranges = <0x0 0x0 0x0 0x68500000 0x0 0x00400000>; usbphy0: usb-phy@0 { compatible = "brcm,sr-usb-combo-phy"; - reg = <0x00000000 0x100>; + reg = <0x0 0x00000000 0x0 0x100>; #phy-cells = <1>; status = "disabled"; }; xhci0: usb@1000 { compatible = "generic-xhci"; - reg = <0x00001000 0x1000>; + reg = <0x0 0x00001000 0x0 0x1000>; interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>; phys = <&usbphy0 1>, <&usbphy0 0>; phy-names = "phy0", "phy1"; @@ -28,7 +28,7 @@ bdc0: usb@2000 { compatible = "brcm,bdc-v0.16"; - reg = <0x00002000 0x1000>; + reg = <0x0 0x00002000 0x0 0x1000>; interrupts = <GIC_SPI 259 IRQ_TYPE_LEVEL_HIGH>; phys = <&usbphy0 0>, <&usbphy0 1>; phy-names = "phy0", "phy1"; @@ -38,21 +38,21 @@ usbphy1: usb-phy@10000 { compatible = "brcm,sr-usb-combo-phy"; - reg = <0x00010000 0x100>; + reg = <0x0 0x00010000 0x0 0x100>; #phy-cells = <1>; status = "disabled"; }; usbphy2: usb-phy@20000 { compatible = "brcm,sr-usb-hs-phy"; - reg = <0x00020000 0x100>; + reg = <0x0 0x00020000 0x0 0x100>; #phy-cells = <0>; status = "disabled"; }; xhci1: usb@11000 { compatible = "generic-xhci"; - reg = <0x00011000 0x1000>; + reg = <0x0 0x00011000 0x0 0x1000>; interrupts = <GIC_SPI 263 IRQ_TYPE_LEVEL_HIGH>; phys = <&usbphy1 1>, <&usbphy2>, <&usbphy1 0>; phy-names = "phy0", "phy1", "phy2"; @@ -62,7 +62,7 @@ bdc1: usb@21000 { compatible = "brcm,bdc-v0.16"; - reg = <0x00021000 0x1000>; + reg = <0x0 0x00021000 0x0 0x1000>; interrupts = <GIC_SPI 266 IRQ_TYPE_LEVEL_HIGH>; phys = <&usbphy2>; phy-names = "phy0"; -- cgit v1.2.3-70-g09d2 From e3389b0a14952aac7f2998bb98f633afb21eaa92 Mon Sep 17 00:00:00 2001 From: Zhen Lei <thunder.leizhen@huawei.com> Date: Fri, 16 Oct 2020 17:08:33 +0800 Subject: arm64: dts: qcom: clear the warnings caused by empty dma-ranges The scripts/dtc/checks.c requires that the node have empty "dma-ranges" property must have the same "#address-cells" and "#size-cells" values as the parent node. Otherwise, the following warnings is reported: arch/arm64/boot/dts/qcom/ipq6018.dtsi:185.3-14: Warning \ (dma_ranges_format): /soc:dma-ranges: empty "dma-ranges" property but \ its #address-cells (1) differs from / (2) arch/arm64/boot/dts/qcom/ipq6018.dtsi:185.3-14: Warning \ (dma_ranges_format): /soc:dma-ranges: empty "dma-ranges" property but \ its #size-cells (1) differs from / (2) Arnd Bergmann figured out why it's necessary: Also note that the #address-cells=<1> means that any device under this bus is assumed to only support 32-bit addressing, and DMA will have to go through a slow swiotlb in the absence of an IOMMU. Suggested-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com> Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org> Link: https://lore.kernel.org/r/20201016090833.1892-3-thunder.leizhen@huawei.com' Signed-off-by: Arnd Bergmann <arnd@arndb.de> --- arch/arm64/boot/dts/qcom/ipq6018.dtsi | 72 +++++++++++++++++------------------ 1 file changed, 36 insertions(+), 36 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi index a94dac76bf3f..59e0cbfa2214 100644 --- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi @@ -179,22 +179,22 @@ }; soc: soc { - #address-cells = <1>; - #size-cells = <1>; - ranges = <0 0 0 0xffffffff>; + #address-cells = <2>; + #size-cells = <2>; + ranges = <0 0 0 0 0x0 0xffffffff>; dma-ranges; compatible = "simple-bus"; prng: qrng@e1000 { compatible = "qcom,prng-ee"; - reg = <0xe3000 0x1000>; + reg = <0x0 0xe3000 0x0 0x1000>; clocks = <&gcc GCC_PRNG_AHB_CLK>; clock-names = "core"; }; cryptobam: dma@704000 { compatible = "qcom,bam-v1.7.0"; - reg = <0x00704000 0x20000>; + reg = <0x0 0x00704000 0x0 0x20000>; interrupts = <GIC_SPI 207 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc GCC_CRYPTO_AHB_CLK>; clock-names = "bam_clk"; @@ -206,7 +206,7 @@ crypto: crypto@73a000 { compatible = "qcom,crypto-v5.1"; - reg = <0x0073a000 0x6000>; + reg = <0x0 0x0073a000 0x0 0x6000>; clocks = <&gcc GCC_CRYPTO_AHB_CLK>, <&gcc GCC_CRYPTO_AXI_CLK>, <&gcc GCC_CRYPTO_CLK>; @@ -217,7 +217,7 @@ tlmm: pinctrl@1000000 { compatible = "qcom,ipq6018-pinctrl"; - reg = <0x01000000 0x300000>; + reg = <0x0 0x01000000 0x0 0x300000>; interrupts = <GIC_SPI 208 IRQ_TYPE_LEVEL_HIGH>; gpio-controller; #gpio-cells = <2>; @@ -235,7 +235,7 @@ gcc: gcc@1800000 { compatible = "qcom,gcc-ipq6018"; - reg = <0x01800000 0x80000>; + reg = <0x0 0x01800000 0x0 0x80000>; clocks = <&xo>, <&sleep_clk>; clock-names = "xo", "sleep_clk"; #clock-cells = <1>; @@ -244,17 +244,17 @@ tcsr_mutex_regs: syscon@1905000 { compatible = "syscon"; - reg = <0x01905000 0x8000>; + reg = <0x0 0x01905000 0x0 0x8000>; }; tcsr_q6: syscon@1945000 { compatible = "syscon"; - reg = <0x01945000 0xe000>; + reg = <0x0 0x01945000 0x0 0xe000>; }; blsp_dma: dma@7884000 { compatible = "qcom,bam-v1.7.0"; - reg = <0x07884000 0x2b000>; + reg = <0x0 0x07884000 0x0 0x2b000>; interrupts = <GIC_SPI 238 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc GCC_BLSP1_AHB_CLK>; clock-names = "bam_clk"; @@ -264,7 +264,7 @@ blsp1_uart3: serial@78b1000 { compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm"; - reg = <0x078b1000 0x200>; + reg = <0x0 0x078b1000 0x0 0x200>; interrupts = <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc GCC_BLSP1_UART3_APPS_CLK>, <&gcc GCC_BLSP1_AHB_CLK>; @@ -276,7 +276,7 @@ compatible = "qcom,spi-qup-v2.2.1"; #address-cells = <1>; #size-cells = <0>; - reg = <0x078b5000 0x600>; + reg = <0x0 0x078b5000 0x0 0x600>; interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>; spi-max-frequency = <50000000>; clocks = <&gcc GCC_BLSP1_QUP1_SPI_APPS_CLK>, @@ -291,7 +291,7 @@ compatible = "qcom,spi-qup-v2.2.1"; #address-cells = <1>; #size-cells = <0>; - reg = <0x078b6000 0x600>; + reg = <0x0 0x078b6000 0x0 0x600>; interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>; spi-max-frequency = <50000000>; clocks = <&gcc GCC_BLSP1_QUP2_SPI_APPS_CLK>, @@ -306,7 +306,7 @@ compatible = "qcom,i2c-qup-v2.2.1"; #address-cells = <1>; #size-cells = <0>; - reg = <0x078b6000 0x600>; + reg = <0x0 0x078b6000 0x0 0x600>; interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc GCC_BLSP1_AHB_CLK>, <&gcc GCC_BLSP1_QUP2_I2C_APPS_CLK>; @@ -321,7 +321,7 @@ compatible = "qcom,i2c-qup-v2.2.1"; #address-cells = <1>; #size-cells = <0>; - reg = <0x078b7000 0x600>; + reg = <0x0 0x078b7000 0x0 0x600>; interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc GCC_BLSP1_AHB_CLK>, <&gcc GCC_BLSP1_QUP3_I2C_APPS_CLK>; @@ -336,24 +336,24 @@ compatible = "qcom,msm-qgic2"; interrupt-controller; #interrupt-cells = <0x3>; - reg = <0x0b000000 0x1000>, /*GICD*/ - <0x0b002000 0x1000>, /*GICC*/ - <0x0b001000 0x1000>, /*GICH*/ - <0x0b004000 0x1000>; /*GICV*/ + reg = <0x0 0x0b000000 0x0 0x1000>, /*GICD*/ + <0x0 0x0b002000 0x0 0x1000>, /*GICC*/ + <0x0 0x0b001000 0x0 0x1000>, /*GICH*/ + <0x0 0x0b004000 0x0 0x1000>; /*GICV*/ interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>; }; watchdog@b017000 { compatible = "qcom,kpss-wdt"; interrupts = <GIC_SPI 3 IRQ_TYPE_EDGE_RISING>; - reg = <0x0b017000 0x40>; + reg = <0x0 0x0b017000 0x0 0x40>; clocks = <&sleep_clk>; timeout-sec = <10>; }; apcs_glb: mailbox@b111000 { compatible = "qcom,ipq6018-apcs-apps-global"; - reg = <0x0b111000 0x1000>; + reg = <0x0 0x0b111000 0x0 0x1000>; #clock-cells = <1>; clocks = <&a53pll>, <&xo>; clock-names = "pll", "xo"; @@ -362,7 +362,7 @@ a53pll: clock@b116000 { compatible = "qcom,ipq6018-a53pll"; - reg = <0x0b116000 0x40>; + reg = <0x0 0x0b116000 0x0 0x40>; #clock-cells = <0>; clocks = <&xo>; clock-names = "xo"; @@ -377,68 +377,68 @@ }; timer@b120000 { - #address-cells = <1>; - #size-cells = <1>; + #address-cells = <2>; + #size-cells = <2>; ranges; compatible = "arm,armv7-timer-mem"; - reg = <0x0b120000 0x1000>; + reg = <0x0 0x0b120000 0x0 0x1000>; clock-frequency = <19200000>; frame@b120000 { frame-number = <0>; interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0b121000 0x1000>, - <0x0b122000 0x1000>; + reg = <0x0 0x0b121000 0x0 0x1000>, + <0x0 0x0b122000 0x0 0x1000>; }; frame@b123000 { frame-number = <1>; interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>; - reg = <0xb123000 0x1000>; + reg = <0x0 0xb123000 0x0 0x1000>; status = "disabled"; }; frame@b124000 { frame-number = <2>; interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0b124000 0x1000>; + reg = <0x0 0x0b124000 0x0 0x1000>; status = "disabled"; }; frame@b125000 { frame-number = <3>; interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0b125000 0x1000>; + reg = <0x0 0x0b125000 0x0 0x1000>; status = "disabled"; }; frame@b126000 { frame-number = <4>; interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0b126000 0x1000>; + reg = <0x0 0x0b126000 0x0 0x1000>; status = "disabled"; }; frame@b127000 { frame-number = <5>; interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0b127000 0x1000>; + reg = <0x0 0x0b127000 0x0 0x1000>; status = "disabled"; }; frame@b128000 { frame-number = <6>; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0b128000 0x1000>; + reg = <0x0 0x0b128000 0x0 0x1000>; status = "disabled"; }; }; q6v5_wcss: remoteproc@cd00000 { compatible = "qcom,ipq8074-wcss-pil"; - reg = <0x0cd00000 0x4040>, - <0x004ab000 0x20>; + reg = <0x0 0x0cd00000 0x0 0x4040>, + <0x0 0x004ab000 0x0 0x20>; reg-names = "qdsp6", "rmb"; interrupts-extended = <&intc GIC_SPI 325 IRQ_TYPE_EDGE_RISING>, -- cgit v1.2.3-70-g09d2 From 23bde34771f1ea92fb5e6682c0d8c04304d34b3b Mon Sep 17 00:00:00 2001 From: Zenghui Yu <yuzenghui@huawei.com> Date: Tue, 17 Nov 2020 23:16:29 +0800 Subject: KVM: arm64: vgic-v3: Drop the reporting of GICR_TYPER.Last for userspace It was recently reported that if GICR_TYPER is accessed before the RD base address is set, we'll suffer from the unset @rdreg dereferencing. Oops... gpa_t last_rdist_typer = rdreg->base + GICR_TYPER + (rdreg->free_index - 1) * KVM_VGIC_V3_REDIST_SIZE; It's "expected" that users will access registers in the redistributor if the RD has been properly configured (e.g., the RD base address is set). But it hasn't yet been covered by the existing documentation. Per discussion on the list [1], the reporting of the GICR_TYPER.Last bit for userspace never actually worked. And it's difficult for us to emulate it correctly given that userspace has the flexibility to access it any time. Let's just drop the reporting of the Last bit for userspace for now (userspace should have full knowledge about it anyway) and it at least prevents kernel from panic ;-) [1] https://lore.kernel.org/kvmarm/c20865a267e44d1e2c0d52ce4e012263@kernel.org/ Fixes: ba7b3f1275fd ("KVM: arm/arm64: Revisit Redistributor TYPER last bit computation") Reported-by: Keqian Zhu <zhukeqian1@huawei.com> Signed-off-by: Zenghui Yu <yuzenghui@huawei.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Reviewed-by: Eric Auger <eric.auger@redhat.com> Link: https://lore.kernel.org/r/20201117151629.1738-1-yuzenghui@huawei.com Cc: stable@vger.kernel.org --- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 52d6f24f65dc..15a6c98ee92f 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -273,6 +273,23 @@ static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu, return extract_bytes(value, addr & 7, len); } +static unsigned long vgic_uaccess_read_v3r_typer(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + int target_vcpu_id = vcpu->vcpu_id; + u64 value; + + value = (u64)(mpidr & GENMASK(23, 0)) << 32; + value |= ((target_vcpu_id & 0xffff) << 8); + + if (vgic_has_its(vcpu->kvm)) + value |= GICR_TYPER_PLPIS; + + /* reporting of the Last bit is not supported for userspace */ + return extract_bytes(value, addr & 7, len); +} + static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { @@ -593,8 +610,9 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = { REGISTER_DESC_WITH_LENGTH(GICR_IIDR, vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_TYPER, - vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8, + REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_TYPER, + vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, + vgic_uaccess_read_v3r_typer, vgic_mmio_uaccess_write_wi, 8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_WAKER, vgic_mmio_read_raz, vgic_mmio_write_wi, 4, -- cgit v1.2.3-70-g09d2 From cd81acc600a9684ea4b4d25a47900d38a3890eab Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin@gmail.com> Date: Tue, 17 Nov 2020 23:56:17 +1000 Subject: powerpc/64s/exception: KVM Fix for host DSI being taken in HPT guest MMU context Commit 2284ffea8f0c ("powerpc/64s/exception: Only test KVM in SRR interrupts when PR KVM is supported") removed KVM guest tests from interrupts that do not set HV=1, when PR-KVM is not configured. This is wrong for HV-KVM HPT guest MMIO emulation case which attempts to load the faulting instruction word with MSR[DR]=1 and MSR[HV]=1 with the guest MMU context loaded. This can cause host DSI, DSLB interrupts which must test for KVM guest. Restore this and add a comment. Fixes: 2284ffea8f0c ("powerpc/64s/exception: Only test KVM in SRR interrupts when PR KVM is supported") Cc: stable@vger.kernel.org # v5.7+ Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20201117135617.3521127-1-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 07d64883c0b5..8e6b2cc8db67 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1410,6 +1410,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * If none is found, do a Linux page fault. Linux page faults can happen in * kernel mode due to user copy operations of course. * + * KVM: The KVM HDSI handler may perform a load with MSR[DR]=1 in guest + * MMU context, which may cause a DSI in the host, which must go to the + * KVM handler. MSR[IR] is not enabled, so the real-mode handler will + * always be used regardless of AIL setting. + * * - Radix MMU * The hardware loads from the Linux page table directly, so a fault goes * immediately to Linux page fault. @@ -1420,10 +1425,8 @@ INT_DEFINE_BEGIN(data_access) IVEC=0x300 IDAR=1 IDSISR=1 -#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_SKIP=1 IKVM_REAL=1 -#endif INT_DEFINE_END(data_access) EXC_REAL_BEGIN(data_access, 0x300, 0x80) @@ -1462,6 +1465,8 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) * ppc64_bolted_size (first segment). The kernel handler must avoid stomping * on user-handler data structures. * + * KVM: Same as 0x300, DSLB must test for KVM guest. + * * A dedicated save area EXSLB is used (XXX: but it actually need not be * these days, we could use EXGEN). */ @@ -1470,10 +1475,8 @@ INT_DEFINE_BEGIN(data_access_slb) IAREA=PACA_EXSLB IRECONCILE=0 IDAR=1 -#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_SKIP=1 IKVM_REAL=1 -#endif INT_DEFINE_END(data_access_slb) EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80) -- cgit v1.2.3-70-g09d2 From 78aec9bb1f3c79e4570eb50260d6320063f823a2 Mon Sep 17 00:00:00 2001 From: Gustavo Pimentel <gustavo.pimentel@synopsys.com> Date: Wed, 21 Oct 2020 23:12:20 +0200 Subject: ARC: bitops: Remove unecessary operation and value The 1-bit shift rotation to the left on x variable located on 4 last if statement can be removed because the computed value is will not be used afront. Signed-off-by: Gustavo Pimentel <gustavo.pimentel@synopsys.com> Signed-off-by: Vineet Gupta <vgupta@synopsys.com> --- arch/arc/include/asm/bitops.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index c6606f4d20d6..fb98440c0bd4 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -243,10 +243,8 @@ static inline int constant_fls(unsigned int x) x <<= 2; r -= 2; } - if (!(x & 0x80000000u)) { - x <<= 1; + if (!(x & 0x80000000u)) r -= 1; - } return r; } -- cgit v1.2.3-70-g09d2 From 5f840df591a9554e4e1355ef1f8946bc2120ca9f Mon Sep 17 00:00:00 2001 From: Flavio Suligoi <f.suligoi@asem.it> Date: Mon, 9 Nov 2020 14:21:30 +0100 Subject: ARC: mm: fix spelling mistakes Signed-off-by: Flavio Suligoi <f.suligoi@asem.it> Signed-off-by: Vineet Gupta <vgupta@synopsys.com> --- arch/arc/mm/tlb.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index c340acd989a0..9bb3c24f3677 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -30,14 +30,14 @@ * -Changes related to MMU v2 (Rel 4.8) * * Vineetg: Aug 29th 2008 - * -In TLB Flush operations (Metal Fix MMU) there is a explict command to + * -In TLB Flush operations (Metal Fix MMU) there is a explicit command to * flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd, * it fails. Thus need to load it with ANY valid value before invoking * TLBIVUTLB cmd * * Vineetg: Aug 21th 2008: * -Reduced the duration of IRQ lockouts in TLB Flush routines - * -Multiple copies of TLB erase code seperated into a "single" function + * -Multiple copies of TLB erase code separated into a "single" function * -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID * in interrupt-safe region. * @@ -66,7 +66,7 @@ * * Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has * much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways. - * Given this, the thrasing problem should never happen because once the 3 + * Given this, the thrashing problem should never happen because once the 3 * J-TLB entries are created (even though 3rd will knock out one of the prev * two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy * @@ -127,7 +127,7 @@ static void utlb_invalidate(void) * There was however an obscure hardware bug, where uTLB flush would * fail when a prior probe for J-TLB (both totally unrelated) would * return lkup err - because the entry didn't exist in MMU. - * The Workround was to set Index reg with some valid value, prior to + * The Workaround was to set Index reg with some valid value, prior to * flush. This was fixed in MMU v3 */ unsigned int idx; @@ -272,7 +272,7 @@ noinline void local_flush_tlb_all(void) } /* - * Flush the entrie MM for userland. The fastest way is to move to Next ASID + * Flush the entire MM for userland. The fastest way is to move to Next ASID */ noinline void local_flush_tlb_mm(struct mm_struct *mm) { @@ -303,7 +303,7 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm) * Difference between this and Kernel Range Flush is * -Here the fastest way (if range is too large) is to move to next ASID * without doing any explicit Shootdown - * -In case of kernel Flush, entry has to be shot down explictly + * -In case of kernel Flush, entry has to be shot down explicitly */ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) @@ -620,7 +620,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, * Super Page size is configurable in hardware (4K to 16M), but fixed once * RTL builds. * - * The exact THP size a Linx configuration will support is a function of: + * The exact THP size a Linux configuration will support is a function of: * - MMU page size (typical 8K, RTL fixed) * - software page walker address split between PGD:PTE:PFN (typical * 11:8:13, but can be changed with 1 line) @@ -698,7 +698,7 @@ void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, #endif -/* Read the Cache Build Confuration Registers, Decode them and save into +/* Read the Cache Build Configuration Registers, Decode them and save into * the cpuinfo structure for later use. * No Validation is done here, simply read/convert the BCRs */ @@ -803,13 +803,13 @@ void arc_mmu_init(void) pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str))); /* - * Can't be done in processor.h due to header include depenedencies + * Can't be done in processor.h due to header include dependencies */ BUILD_BUG_ON(!IS_ALIGNED((CONFIG_ARC_KVADDR_SIZE << 20), PMD_SIZE)); /* * stack top size sanity check, - * Can't be done in processor.h due to header include depenedencies + * Can't be done in processor.h due to header include dependencies */ BUILD_BUG_ON(!IS_ALIGNED(STACK_TOP, PMD_SIZE)); @@ -881,7 +881,7 @@ void arc_mmu_init(void) * the duplicate one. * -Knob to be verbose abt it.(TODO: hook them up to debugfs) */ -volatile int dup_pd_silent; /* Be slient abt it or complain (default) */ +volatile int dup_pd_silent; /* Be silent abt it or complain (default) */ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, struct pt_regs *regs) @@ -948,7 +948,7 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, /*********************************************************************** * Diagnostic Routines - * -Called from Low Level TLB Hanlders if things don;t look good + * -Called from Low Level TLB Handlers if things don;t look good **********************************************************************/ #ifdef CONFIG_ARC_DBG_TLB_PARANOIA -- cgit v1.2.3-70-g09d2 From e42404fa10fd11fe72d0a0e149a321d10e577715 Mon Sep 17 00:00:00 2001 From: Vineet Gupta <vgupta@synopsys.com> Date: Fri, 6 Nov 2020 16:59:27 -0800 Subject: ARC: stack unwinding: don't assume non-current task is sleeping To start stack unwinding (SP, PC and BLINK) are needed. When the explicit execution context (pt_regs etc) is not available, unwinder assumes the task is sleeping (in __switch_to()) and fetches SP and BLINK from kernel mode stack. But this assumption is not true, specially in a SMP system, when top runs on 1 core, there may be active running processes on all cores. So when unwinding non courrent tasks, ensure they are NOT running. And while at it, handle the self unwinding case explicitly. This came out of investigation of a customer reported hang with rcutorture+top Link: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/31 Signed-off-by: Vineet Gupta <vgupta@synopsys.com> --- arch/arc/kernel/stacktrace.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index b23986f98450..b2557f581ea8 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -38,15 +38,15 @@ #ifdef CONFIG_ARC_DW2_UNWIND -static void seed_unwind_frame_info(struct task_struct *tsk, - struct pt_regs *regs, - struct unwind_frame_info *frame_info) +static int +seed_unwind_frame_info(struct task_struct *tsk, struct pt_regs *regs, + struct unwind_frame_info *frame_info) { /* * synchronous unwinding (e.g. dump_stack) * - uses current values of SP and friends */ - if (tsk == NULL && regs == NULL) { + if (regs == NULL && (tsk == NULL || tsk == current)) { unsigned long fp, sp, blink, ret; frame_info->task = current; @@ -65,11 +65,15 @@ static void seed_unwind_frame_info(struct task_struct *tsk, frame_info->call_frame = 0; } else if (regs == NULL) { /* - * Asynchronous unwinding of sleeping task - * - Gets SP etc from task's pt_regs (saved bottom of kernel - * mode stack of task) + * Asynchronous unwinding of a likely sleeping task + * - first ensure it is actually sleeping + * - if so, it will be in __switch_to, kernel mode SP of task + * is safe-kept and BLINK at a well known location in there */ + if (tsk->state == TASK_RUNNING) + return -1; + frame_info->task = tsk; frame_info->regs.r27 = TSK_K_FP(tsk); @@ -103,6 +107,8 @@ static void seed_unwind_frame_info(struct task_struct *tsk, frame_info->regs.r63 = regs->ret; frame_info->call_frame = 0; } + + return 0; } #endif @@ -116,7 +122,8 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, unsigned int address; struct unwind_frame_info frame_info; - seed_unwind_frame_info(tsk, regs, &frame_info); + if (seed_unwind_frame_info(tsk, regs, &frame_info)) + return 0; while (1) { address = UNW_PC(&frame_info); -- cgit v1.2.3-70-g09d2 From f737561c709667013d832316dd3198a7fe3d1260 Mon Sep 17 00:00:00 2001 From: Vineet Gupta <vgupta@synopsys.com> Date: Fri, 6 Nov 2020 17:37:34 -0800 Subject: ARC: stack unwinding: reorganize how initial register state setup This is a non-functional change, if anything a better fall-back handling. Signed-off-by: Vineet Gupta <vgupta@synopsys.com> --- arch/arc/kernel/stacktrace.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index b2557f581ea8..f73da203b170 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -42,11 +42,23 @@ static int seed_unwind_frame_info(struct task_struct *tsk, struct pt_regs *regs, struct unwind_frame_info *frame_info) { - /* - * synchronous unwinding (e.g. dump_stack) - * - uses current values of SP and friends - */ - if (regs == NULL && (tsk == NULL || tsk == current)) { + if (regs) { + /* + * Asynchronous unwinding of intr/exception + * - Just uses the pt_regs passed + */ + frame_info->task = tsk; + + frame_info->regs.r27 = regs->fp; + frame_info->regs.r28 = regs->sp; + frame_info->regs.r31 = regs->blink; + frame_info->regs.r63 = regs->ret; + frame_info->call_frame = 0; + } else if (tsk == NULL || tsk == current) { + /* + * synchronous unwinding (e.g. dump_stack) + * - uses current values of SP and friends + */ unsigned long fp, sp, blink, ret; frame_info->task = current; @@ -63,7 +75,7 @@ seed_unwind_frame_info(struct task_struct *tsk, struct pt_regs *regs, frame_info->regs.r31 = blink; frame_info->regs.r63 = ret; frame_info->call_frame = 0; - } else if (regs == NULL) { + } else { /* * Asynchronous unwinding of a likely sleeping task * - first ensure it is actually sleeping @@ -94,20 +106,7 @@ seed_unwind_frame_info(struct task_struct *tsk, struct pt_regs *regs, frame_info->regs.r28 += 60; frame_info->call_frame = 0; - } else { - /* - * Asynchronous unwinding of intr/exception - * - Just uses the pt_regs passed - */ - frame_info->task = tsk; - - frame_info->regs.r27 = regs->fp; - frame_info->regs.r28 = regs->sp; - frame_info->regs.r31 = regs->blink; - frame_info->regs.r63 = regs->ret; - frame_info->call_frame = 0; } - return 0; } -- cgit v1.2.3-70-g09d2 From 860aaabac8235cfde10fe556aa82abbbe3117888 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner <tglx@linutronix.de> Date: Tue, 17 Nov 2020 21:23:34 +0100 Subject: x86/dumpstack: Do not try to access user space code of other tasks sysrq-t ends up invoking show_opcodes() for each task which tries to access the user space code of other processes, which is obviously bogus. It either manages to dump where the foreign task's regs->ip points to in a valid mapping of the current task or triggers a pagefault and prints "Code: Bad RIP value.". Both is just wrong. Add a safeguard in copy_code() and check whether the @regs pointer matches currents pt_regs. If not, do not even try to access it. While at it, add commentary why using copy_from_user_nmi() is safe in copy_code() even if the function name suggests otherwise. Reported-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Borislav Petkov <bp@suse.de> Acked-by: Oleg Nesterov <oleg@redhat.com> Tested-by: Borislav Petkov <bp@suse.de> Link: https://lkml.kernel.org/r/20201117202753.667274723@linutronix.de --- arch/x86/kernel/dumpstack.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 25c06b67e7e0..97aa900386cb 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -78,6 +78,9 @@ static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src, if (!user_mode(regs)) return copy_from_kernel_nofault(buf, (u8 *)src, nbytes); + /* The user space code from other tasks cannot be accessed. */ + if (regs != task_pt_regs(current)) + return -EPERM; /* * Make sure userspace isn't trying to trick us into dumping kernel * memory by pointing the userspace instruction pointer at it. @@ -85,6 +88,12 @@ static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src, if (__chk_range_not_ok(src, nbytes, TASK_SIZE_MAX)) return -EINVAL; + /* + * Even if named copy_from_user_nmi() this can be invoked from + * other contexts and will not try to resolve a pagefault, which is + * the correct thing to do here as this code can be called from any + * context. + */ return copy_from_user_nmi(buf, (void __user *)src, nbytes); } @@ -115,13 +124,19 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl) u8 opcodes[OPCODE_BUFSIZE]; unsigned long prologue = regs->ip - PROLOGUE_SIZE; - if (copy_code(regs, opcodes, prologue, sizeof(opcodes))) { - printk("%sCode: Unable to access opcode bytes at RIP 0x%lx.\n", - loglvl, prologue); - } else { + switch (copy_code(regs, opcodes, prologue, sizeof(opcodes))) { + case 0: printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %" __stringify(EPILOGUE_SIZE) "ph\n", loglvl, opcodes, opcodes[PROLOGUE_SIZE], opcodes + PROLOGUE_SIZE + 1); + break; + case -EPERM: + /* No access to the user space stack of other tasks. Ignore. */ + break; + default: + printk("%sCode: Unable to access opcode bytes at RIP 0x%lx.\n", + loglvl, prologue); + break; } } -- cgit v1.2.3-70-g09d2 From 4c80d05714d347405865802b7098f1c97362cbef Mon Sep 17 00:00:00 2001 From: Christian Borntraeger <borntraeger@de.ibm.com> Date: Tue, 17 Nov 2020 20:00:33 +0100 Subject: s390/uv: handle destroy page legacy interface Older firmware can return rc=0x107 rrc=0xd for destroy page if the page is already non-secure. This should be handled like a success as already done by newer firmware. Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Fixes: 1a80b54d1ce1 ("s390/uv: add destroy page call") Reviewed-by: David Hildenbrand <david@redhat.com> Acked-by: Cornelia Huck <cohuck@redhat.com> Reviewed-by: Janosch Frank <frankja@linux.ibm.com> --- arch/s390/kernel/uv.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 14bd9d58edc9..883bfed9f5c2 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -129,8 +129,15 @@ int uv_destroy_page(unsigned long paddr) .paddr = paddr }; - if (uv_call(0, (u64)&uvcb)) + if (uv_call(0, (u64)&uvcb)) { + /* + * Older firmware uses 107/d as an indication of a non secure + * page. Let us emulate the newer variant (no-op). + */ + if (uvcb.header.rc == 0x107 && uvcb.header.rrc == 0xd) + return 0; return -EINVAL; + } return 0; } -- cgit v1.2.3-70-g09d2 From 4d213e76a359e540ca786ee937da7f35faa8e5f8 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan <zhenzhong.duan@gmail.com> Date: Tue, 10 Nov 2020 15:19:08 +0800 Subject: iommu/vt-d: Avoid panic if iommu init fails in tboot system "intel_iommu=off" command line is used to disable iommu but iommu is force enabled in a tboot system for security reason. However for better performance on high speed network device, a new option "intel_iommu=tboot_noforce" is introduced to disable the force on. By default kernel should panic if iommu init fail in tboot for security reason, but it's unnecessory if we use "intel_iommu=tboot_noforce,off". Fix the code setting force_on and move intel_iommu_tboot_noforce from tboot code to intel iommu code. Fixes: 7304e8f28bb2 ("iommu/vt-d: Correctly disable Intel IOMMU force on") Signed-off-by: Zhenzhong Duan <zhenzhong.duan@gmail.com> Tested-by: Lukasz Hawrylko <lukasz.hawrylko@linux.intel.com> Acked-by: Lu Baolu <baolu.lu@linux.intel.com> Link: https://lore.kernel.org/r/20201110071908.3133-1-zhenzhong.duan@gmail.com Signed-off-by: Will Deacon <will@kernel.org> --- arch/x86/kernel/tboot.c | 3 --- drivers/iommu/intel/iommu.c | 5 +++-- include/linux/intel-iommu.h | 1 - 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 992fb1415c0f..420be871d9d4 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -514,9 +514,6 @@ int tboot_force_iommu(void) if (!tboot_enabled()) return 0; - if (intel_iommu_tboot_noforce) - return 1; - if (no_iommu || swiotlb || dmar_disabled) pr_warn("Forcing Intel-IOMMU to enabled\n"); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 1b1ca63e6bbe..4d9b298002f0 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -179,7 +179,7 @@ static int rwbf_quirk; * (used when kernel is launched w/ TXT) */ static int force_on = 0; -int intel_iommu_tboot_noforce; +static int intel_iommu_tboot_noforce; static int no_platform_optin; #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) @@ -4885,7 +4885,8 @@ int __init intel_iommu_init(void) * Intel IOMMU is required for a TXT/tboot launch or platform * opt in, so enforce that. */ - force_on = tboot_force_iommu() || platform_optin_force_iommu(); + force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) || + platform_optin_force_iommu(); if (iommu_init_mempool()) { if (force_on) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index fbf5b3e7707e..d956987ed032 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -798,7 +798,6 @@ extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; extern int intel_iommu_enabled; -extern int intel_iommu_tboot_noforce; extern int intel_iommu_gfx_mapped; #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) -- cgit v1.2.3-70-g09d2 From 05d5de6ba7dbe490dd413b5ca11d0875bd2bc006 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde <mkl@pengutronix.de> Date: Wed, 11 Nov 2020 15:12:11 +0100 Subject: ARM: dts: dra76x: m_can: fix order of clocks According to the bosch,m_can.yaml bindings the first clock shall be the "hclk", while the second clock "cclk". This patch fixes the order accordingly. Fixes: 0adbe832f21a ("ARM: dts: dra76x: Add MCAN node") Cc: Faiz Abbas <faiz_abbas@ti.com> Cc: Tony Lindgren <tony@atomide.com> Cc: linux-omap@vger.kernel.org Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de> Signed-off-by: Tony Lindgren <tony@atomide.com> --- arch/arm/boot/dts/dra76x.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/dra76x.dtsi b/arch/arm/boot/dts/dra76x.dtsi index b69c7d40f5d8..2f326151116b 100644 --- a/arch/arm/boot/dts/dra76x.dtsi +++ b/arch/arm/boot/dts/dra76x.dtsi @@ -32,8 +32,8 @@ interrupts = <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "int0", "int1"; - clocks = <&mcan_clk>, <&l3_iclk_div>; - clock-names = "cclk", "hclk"; + clocks = <&l3_iclk_div>, <&mcan_clk>; + clock-names = "hclk", "cclk"; bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>; }; }; -- cgit v1.2.3-70-g09d2 From f79643787e0a0762d2409b7b8334e83f22d85695 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin@gmail.com> Date: Tue, 17 Nov 2020 16:59:12 +1100 Subject: powerpc/64s: flush L1D on kernel entry IBM Power9 processors can speculatively operate on data in the L1 cache before it has been completely validated, via a way-prediction mechanism. It is not possible for an attacker to determine the contents of impermissible memory using this method, since these systems implement a combination of hardware and software security measures to prevent scenarios where protected data could be leaked. However these measures don't address the scenario where an attacker induces the operating system to speculatively execute instructions using data that the attacker controls. This can be used for example to speculatively bypass "kernel user access prevention" techniques, as discovered by Anthony Steinhauser of Google's Safeside Project. This is not an attack by itself, but there is a possibility it could be used in conjunction with side-channels or other weaknesses in the privileged code to construct an attack. This issue can be mitigated by flushing the L1 cache between privilege boundaries of concern. This patch flushes the L1 cache on kernel entry. This is part of the fix for CVE-2020-4788. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Daniel Axtens <dja@axtens.net> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> --- Documentation/admin-guide/kernel-parameters.txt | 3 ++ arch/powerpc/include/asm/exception-64s.h | 9 +++- arch/powerpc/include/asm/feature-fixups.h | 10 +++++ arch/powerpc/include/asm/security_features.h | 4 ++ arch/powerpc/include/asm/setup.h | 3 ++ arch/powerpc/kernel/exceptions-64s.S | 37 +++++++++++++++ arch/powerpc/kernel/setup_64.c | 60 ++++++++++++++++++++++++- arch/powerpc/kernel/vmlinux.lds.S | 7 +++ arch/powerpc/lib/feature-fixups.c | 54 ++++++++++++++++++++++ arch/powerpc/platforms/powernv/setup.c | 11 +++++ arch/powerpc/platforms/pseries/setup.c | 4 ++ 11 files changed, 200 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 526d65d8573a..383889fe771f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2858,6 +2858,7 @@ mds=off [X86] tsx_async_abort=off [X86] kvm.nx_huge_pages=off [X86] + no_entry_flush [PPC] Exceptions: This does not have any effect on @@ -3186,6 +3187,8 @@ noefi Disable EFI runtime services support. + no_entry_flush [PPC] Don't flush the L1-D cache when entering the kernel. + noexec [IA-64] noexec [X86] diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index ebe95aa04d53..83fa88bc9935 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -57,11 +57,18 @@ nop; \ nop +#define ENTRY_FLUSH_SLOT \ + ENTRY_FLUSH_FIXUP_SECTION; \ + nop; \ + nop; \ + nop; + /* * r10 must be free to use, r13 must be paca */ #define INTERRUPT_TO_KERNEL \ - STF_ENTRY_BARRIER_SLOT + STF_ENTRY_BARRIER_SLOT; \ + ENTRY_FLUSH_SLOT /* * Macros for annotating the expected destination of (h)rfid diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index b0af97add751..06a48219bbf2 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -205,6 +205,14 @@ label##3: \ FTR_ENTRY_OFFSET 955b-956b; \ .popsection; +#define ENTRY_FLUSH_FIXUP_SECTION \ +957: \ + .pushsection __entry_flush_fixup,"a"; \ + .align 2; \ +958: \ + FTR_ENTRY_OFFSET 957b-958b; \ + .popsection; + #define RFI_FLUSH_FIXUP_SECTION \ 951: \ .pushsection __rfi_flush_fixup,"a"; \ @@ -237,8 +245,10 @@ label##3: \ #include <linux/types.h> extern long stf_barrier_fallback; +extern long entry_flush_fallback; extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; +extern long __start___entry_flush_fixup, __stop___entry_flush_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; extern long __start__btb_flush_fixup, __stop__btb_flush_fixup; diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index fbb8fa32150f..9e7459d2edca 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -86,12 +86,16 @@ static inline bool security_ftr_enabled(u64 feature) // Software required to flush link stack on context switch #define SEC_FTR_FLUSH_LINK_STACK 0x0000000000001000ull +// The L1-D cache should be flushed when entering the kernel +#define SEC_FTR_L1D_FLUSH_ENTRY 0x0000000000004000ull + // Features enabled by default #define SEC_FTR_DEFAULT \ (SEC_FTR_L1D_FLUSH_HV | \ SEC_FTR_L1D_FLUSH_PR | \ SEC_FTR_BNDS_CHK_SPEC_BAR | \ + SEC_FTR_L1D_FLUSH_ENTRY | \ SEC_FTR_FAVOUR_SECURITY) #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 9efbddee2bca..e2fcd3e874f8 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -52,12 +52,15 @@ enum l1d_flush_type { }; void setup_rfi_flush(enum l1d_flush_type, bool enable); +void setup_entry_flush(bool enable); +void setup_uaccess_flush(bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); #ifdef CONFIG_PPC_BARRIER_NOSPEC void setup_barrier_nospec(void); #else static inline void setup_barrier_nospec(void) { }; #endif +void do_entry_flush_fixups(enum l1d_flush_type types); void do_barrier_nospec_fixups(bool enable); extern bool barrier_nospec_enabled; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f7d748b88705..5577dd887d37 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -2951,6 +2951,43 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback) .endr blr +TRAMP_REAL_BEGIN(entry_flush_fallback) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SIZE(r13) + srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ + mtctr r11 + DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync + + /* + * The load addresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ +1: + ld r11,(0x80 + 8)*0(r10) + ld r11,(0x80 + 8)*1(r10) + ld r11,(0x80 + 8)*2(r10) + ld r11,(0x80 + 8)*3(r10) + ld r11,(0x80 + 8)*4(r10) + ld r11,(0x80 + 8)*5(r10) + ld r11,(0x80 + 8)*6(r10) + ld r11,(0x80 + 8)*7(r10) + addi r10,r10,0x80*8 + bdnz 1b + + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + blr + TRAMP_REAL_BEGIN(rfi_flush_fallback) SET_SCRATCH0(r13); GET_PACA(r13); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index bb9cab3641d7..7d447b43ad13 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -945,7 +945,9 @@ early_initcall(disable_hardlockup_detector); static enum l1d_flush_type enabled_flush_types; static void *l1d_flush_fallback_area; static bool no_rfi_flush; +static bool no_entry_flush; bool rfi_flush; +bool entry_flush; static int __init handle_no_rfi_flush(char *p) { @@ -955,6 +957,14 @@ static int __init handle_no_rfi_flush(char *p) } early_param("no_rfi_flush", handle_no_rfi_flush); +static int __init handle_no_entry_flush(char *p) +{ + pr_info("entry-flush: disabled on command line."); + no_entry_flush = true; + return 0; +} +early_param("no_entry_flush", handle_no_entry_flush); + /* * The RFI flush is not KPTI, but because users will see doco that says to use * nopti we hijack that option here to also disable the RFI flush. @@ -986,6 +996,18 @@ void rfi_flush_enable(bool enable) rfi_flush = enable; } +void entry_flush_enable(bool enable) +{ + if (enable) { + do_entry_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else { + do_entry_flush_fixups(L1D_FLUSH_NONE); + } + + entry_flush = enable; +} + static void __ref init_fallback_flush(void) { u64 l1d_size, limit; @@ -1044,10 +1066,19 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable) enabled_flush_types = types; - if (!no_rfi_flush && !cpu_mitigations_off()) + if (!cpu_mitigations_off() && !no_rfi_flush) rfi_flush_enable(enable); } +void setup_entry_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_entry_flush) + entry_flush_enable(enable); +} + #ifdef CONFIG_DEBUG_FS static int rfi_flush_set(void *data, u64 val) { @@ -1075,9 +1106,36 @@ static int rfi_flush_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); +static int entry_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != entry_flush) + entry_flush_enable(enable); + + return 0; +} + +static int entry_flush_get(void *data, u64 *val) +{ + *val = entry_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n"); + static __init int rfi_flush_debugfs_init(void) { debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); return 0; } device_initcall(rfi_flush_debugfs_init); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index e0548b4950de..715ec4d4614a 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -131,6 +131,13 @@ SECTIONS __stop___stf_entry_barrier_fixup = .; } + . = ALIGN(8); + __entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) { + __start___entry_flush_fixup = .; + *(__entry_flush_fixup) + __stop___entry_flush_fixup = .; + } + . = ALIGN(8); __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) { __start___stf_exit_barrier_fixup = .; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 4c0a7ee9fa00..70e83cfd74aa 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -234,6 +234,60 @@ void do_stf_barrier_fixups(enum stf_barrier_type types) do_stf_exit_barrier_fixups(types); } +void do_entry_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___entry_flush_fixup); + end = PTRRELOC(&__stop___entry_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[i++] = 0x7d4802a6; /* mflr r10 */ + instrs[i++] = 0x60000000; /* branch patched below */ + instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + + if (types == L1D_FLUSH_FALLBACK) + patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&entry_flush_fallback, + BRANCH_SET_LINK); + else + patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); + + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + } + + printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); +} + void do_rfi_flush_fixups(enum l1d_flush_type types) { unsigned int instrs[3], *dest; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 9acaa0f131b9..d04a085c423d 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -122,12 +122,23 @@ static void pnv_setup_rfi_flush(void) type = L1D_FLUSH_ORI; } + /* + * If we are non-Power9 bare metal, we don't need to flush on kernel + * entry: it fixes a P9 specific vulnerability. + */ + if (!pvr_version_is(PVR_POWER9)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \ security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); setup_rfi_flush(type, enable); setup_count_cache_flush(); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); + setup_entry_flush(enable); } static void __init pnv_check_guarded_cores(void) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 633c45ec406d..8136c5368ee4 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -579,6 +579,10 @@ void pseries_setup_rfi_flush(void) setup_rfi_flush(types, enable); setup_count_cache_flush(); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); + setup_entry_flush(enable); } #ifdef CONFIG_PCI_IOV -- cgit v1.2.3-70-g09d2 From 9a32a7e78bd0cd9a9b6332cbdc345ee5ffd0c5de Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin@gmail.com> Date: Tue, 17 Nov 2020 16:59:13 +1100 Subject: powerpc/64s: flush L1D after user accesses IBM Power9 processors can speculatively operate on data in the L1 cache before it has been completely validated, via a way-prediction mechanism. It is not possible for an attacker to determine the contents of impermissible memory using this method, since these systems implement a combination of hardware and software security measures to prevent scenarios where protected data could be leaked. However these measures don't address the scenario where an attacker induces the operating system to speculatively execute instructions using data that the attacker controls. This can be used for example to speculatively bypass "kernel user access prevention" techniques, as discovered by Anthony Steinhauser of Google's Safeside Project. This is not an attack by itself, but there is a possibility it could be used in conjunction with side-channels or other weaknesses in the privileged code to construct an attack. This issue can be mitigated by flushing the L1 cache between privilege boundaries of concern. This patch flushes the L1 cache after user accesses. This is part of the fix for CVE-2020-4788. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Daniel Axtens <dja@axtens.net> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> --- Documentation/admin-guide/kernel-parameters.txt | 4 ++ arch/powerpc/include/asm/book3s/64/kup-radix.h | 66 ++++++++++++------- arch/powerpc/include/asm/exception-64s.h | 3 + arch/powerpc/include/asm/feature-fixups.h | 9 +++ arch/powerpc/include/asm/kup.h | 19 ++++-- arch/powerpc/include/asm/security_features.h | 3 + arch/powerpc/include/asm/setup.h | 1 + arch/powerpc/kernel/exceptions-64s.S | 85 ++++++++----------------- arch/powerpc/kernel/setup_64.c | 62 ++++++++++++++++++ arch/powerpc/kernel/vmlinux.lds.S | 7 ++ arch/powerpc/lib/feature-fixups.c | 50 +++++++++++++++ arch/powerpc/platforms/powernv/setup.c | 10 ++- arch/powerpc/platforms/pseries/setup.c | 4 ++ 13 files changed, 233 insertions(+), 90 deletions(-) (limited to 'arch') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 383889fe771f..44fde25bb221 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2859,6 +2859,7 @@ tsx_async_abort=off [X86] kvm.nx_huge_pages=off [X86] no_entry_flush [PPC] + no_uaccess_flush [PPC] Exceptions: This does not have any effect on @@ -3238,6 +3239,9 @@ nospec_store_bypass_disable [HW] Disable all mitigations for the Speculative Store Bypass vulnerability + no_uaccess_flush + [PPC] Don't flush the L1-D cache after accessing user data. + noxsave [BUGS=X86] Disables x86 extended register state save and restore using xsave. The kernel will fallback to enabling legacy floating-point and sse state. diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index 3ee1ec60be84..97c2394e7dea 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -61,6 +61,8 @@ #else /* !__ASSEMBLY__ */ +DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); + #ifdef CONFIG_PPC_KUAP #include <asm/mmu.h> @@ -103,8 +105,16 @@ static inline void kuap_check_amr(void) static inline unsigned long get_kuap(void) { + /* + * We return AMR_KUAP_BLOCKED when we don't support KUAP because + * prevent_user_access_return needs to return AMR_KUAP_BLOCKED to + * cause restore_user_access to do a flush. + * + * This has no effect in terms of actually blocking things on hash, + * so it doesn't break anything. + */ if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP)) - return 0; + return AMR_KUAP_BLOCKED; return mfspr(SPRN_AMR); } @@ -123,6 +133,31 @@ static inline void set_kuap(unsigned long value) isync(); } +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ + return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && + (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), + "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); +} +#else /* CONFIG_PPC_KUAP */ +static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) { } + +static inline unsigned long kuap_get_and_check_amr(void) +{ + return 0UL; +} + +static inline void kuap_check_amr(void) { } + +static inline unsigned long get_kuap(void) +{ + return AMR_KUAP_BLOCKED; +} + +static inline void set_kuap(unsigned long value) { } +#endif /* !CONFIG_PPC_KUAP */ + static __always_inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { @@ -142,6 +177,8 @@ static inline void prevent_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { set_kuap(AMR_KUAP_BLOCKED); + if (static_branch_unlikely(&uaccess_flush_key)) + do_uaccess_flush(); } static inline unsigned long prevent_user_access_return(void) @@ -149,6 +186,8 @@ static inline unsigned long prevent_user_access_return(void) unsigned long flags = get_kuap(); set_kuap(AMR_KUAP_BLOCKED); + if (static_branch_unlikely(&uaccess_flush_key)) + do_uaccess_flush(); return flags; } @@ -156,30 +195,9 @@ static inline unsigned long prevent_user_access_return(void) static inline void restore_user_access(unsigned long flags) { set_kuap(flags); + if (static_branch_unlikely(&uaccess_flush_key) && flags == AMR_KUAP_BLOCKED) + do_uaccess_flush(); } - -static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) -{ - return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && - (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), - "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); -} -#else /* CONFIG_PPC_KUAP */ -static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) -{ -} - -static inline void kuap_check_amr(void) -{ -} - -static inline unsigned long kuap_get_and_check_amr(void) -{ - return 0; -} -#endif /* CONFIG_PPC_KUAP */ - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 83fa88bc9935..1d32b174ab6a 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -144,6 +144,9 @@ RFSCV; \ b rfscv_flush_fallback +#else /* __ASSEMBLY__ */ +/* Prototype for function defined in exceptions-64s.S */ +void do_uaccess_flush(void); #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_EXCEPTION_H */ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 06a48219bbf2..fbd406cd6916 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -205,6 +205,14 @@ label##3: \ FTR_ENTRY_OFFSET 955b-956b; \ .popsection; +#define UACCESS_FLUSH_FIXUP_SECTION \ +959: \ + .pushsection __uaccess_flush_fixup,"a"; \ + .align 2; \ +960: \ + FTR_ENTRY_OFFSET 959b-960b; \ + .popsection; + #define ENTRY_FLUSH_FIXUP_SECTION \ 957: \ .pushsection __entry_flush_fixup,"a"; \ @@ -248,6 +256,7 @@ extern long stf_barrier_fallback; extern long entry_flush_fallback; extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; +extern long __start___uaccess_flush_fixup, __stop___uaccess_flush_fixup; extern long __start___entry_flush_fixup, __stop___entry_flush_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 1d0f7d838b2e..0f5c606ae057 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -53,17 +53,26 @@ static inline void setup_kuep(bool disabled) { } void setup_kuap(bool disabled); #else static inline void setup_kuap(bool disabled) { } + +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ + return false; +} + +/* + * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush + * the L1D cache after user accesses. Only include the empty stubs for other + * platforms. + */ +#ifndef CONFIG_PPC64 static inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { } static inline void prevent_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { } static inline unsigned long prevent_user_access_return(void) { return 0UL; } static inline void restore_user_access(unsigned long flags) { } -static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) -{ - return false; -} +#endif /* CONFIG_PPC64 */ #endif /* CONFIG_PPC_KUAP */ static inline void allow_read_from_user(const void __user *from, unsigned long size) diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 9e7459d2edca..b774a4477d5f 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -89,6 +89,8 @@ static inline bool security_ftr_enabled(u64 feature) // The L1-D cache should be flushed when entering the kernel #define SEC_FTR_L1D_FLUSH_ENTRY 0x0000000000004000ull +// The L1-D cache should be flushed after user accesses from the kernel +#define SEC_FTR_L1D_FLUSH_UACCESS 0x0000000000008000ull // Features enabled by default #define SEC_FTR_DEFAULT \ @@ -96,6 +98,7 @@ static inline bool security_ftr_enabled(u64 feature) SEC_FTR_L1D_FLUSH_PR | \ SEC_FTR_BNDS_CHK_SPEC_BAR | \ SEC_FTR_L1D_FLUSH_ENTRY | \ + SEC_FTR_L1D_FLUSH_UACCESS | \ SEC_FTR_FAVOUR_SECURITY) #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index e2fcd3e874f8..a466749703f1 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -60,6 +60,7 @@ void setup_barrier_nospec(void); #else static inline void setup_barrier_nospec(void) { }; #endif +void do_uaccess_flush_fixups(enum l1d_flush_type types); void do_entry_flush_fixups(enum l1d_flush_type types); void do_barrier_nospec_fixups(bool enable); extern bool barrier_nospec_enabled; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 5577dd887d37..f63a3d3bca3d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -2951,11 +2951,8 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback) .endr blr -TRAMP_REAL_BEGIN(entry_flush_fallback) - std r9,PACA_EXRFI+EX_R9(r13) - std r10,PACA_EXRFI+EX_R10(r13) - std r11,PACA_EXRFI+EX_R11(r13) - mfctr r9 +/* Clobbers r10, r11, ctr */ +.macro L1D_DISPLACEMENT_FLUSH ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) ld r11,PACA_L1D_FLUSH_SIZE(r13) srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ @@ -2981,7 +2978,14 @@ TRAMP_REAL_BEGIN(entry_flush_fallback) ld r11,(0x80 + 8)*7(r10) addi r10,r10,0x80*8 bdnz 1b +.endm +TRAMP_REAL_BEGIN(entry_flush_fallback) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + mfctr r9 + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -2997,32 +3001,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) mfctr r9 - ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SIZE(r13) - srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ - mtctr r11 - DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ - - /* order ld/st prior to dcbt stop all streams with flushing */ - sync - - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ -1: - ld r11,(0x80 + 8)*0(r10) - ld r11,(0x80 + 8)*1(r10) - ld r11,(0x80 + 8)*2(r10) - ld r11,(0x80 + 8)*3(r10) - ld r11,(0x80 + 8)*4(r10) - ld r11,(0x80 + 8)*5(r10) - ld r11,(0x80 + 8)*6(r10) - ld r11,(0x80 + 8)*7(r10) - addi r10,r10,0x80*8 - bdnz 1b - + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -3040,32 +3019,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) mfctr r9 - ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SIZE(r13) - srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ - mtctr r11 - DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ - - /* order ld/st prior to dcbt stop all streams with flushing */ - sync - - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ -1: - ld r11,(0x80 + 8)*0(r10) - ld r11,(0x80 + 8)*1(r10) - ld r11,(0x80 + 8)*2(r10) - ld r11,(0x80 + 8)*3(r10) - ld r11,(0x80 + 8)*4(r10) - ld r11,(0x80 + 8)*5(r10) - ld r11,(0x80 + 8)*6(r10) - ld r11,(0x80 + 8)*7(r10) - addi r10,r10,0x80*8 - bdnz 1b - + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -3116,8 +3070,21 @@ TRAMP_REAL_BEGIN(rfscv_flush_fallback) RFSCV USE_TEXT_SECTION() - MASKED_INTERRUPT - MASKED_INTERRUPT hsrr=1 + +_GLOBAL(do_uaccess_flush) + UACCESS_FLUSH_FIXUP_SECTION + nop + nop + nop + blr + L1D_DISPLACEMENT_FLUSH + blr +_ASM_NOKPROBE_SYMBOL(do_uaccess_flush) +EXPORT_SYMBOL(do_uaccess_flush) + + +MASKED_INTERRUPT +MASKED_INTERRUPT hsrr=1 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER kvmppc_skip_interrupt: diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 7d447b43ad13..74fd47f46fa5 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -946,8 +946,12 @@ static enum l1d_flush_type enabled_flush_types; static void *l1d_flush_fallback_area; static bool no_rfi_flush; static bool no_entry_flush; +static bool no_uaccess_flush; bool rfi_flush; bool entry_flush; +bool uaccess_flush; +DEFINE_STATIC_KEY_FALSE(uaccess_flush_key); +EXPORT_SYMBOL(uaccess_flush_key); static int __init handle_no_rfi_flush(char *p) { @@ -965,6 +969,14 @@ static int __init handle_no_entry_flush(char *p) } early_param("no_entry_flush", handle_no_entry_flush); +static int __init handle_no_uaccess_flush(char *p) +{ + pr_info("uaccess-flush: disabled on command line."); + no_uaccess_flush = true; + return 0; +} +early_param("no_uaccess_flush", handle_no_uaccess_flush); + /* * The RFI flush is not KPTI, but because users will see doco that says to use * nopti we hijack that option here to also disable the RFI flush. @@ -1008,6 +1020,20 @@ void entry_flush_enable(bool enable) entry_flush = enable; } +void uaccess_flush_enable(bool enable) +{ + if (enable) { + do_uaccess_flush_fixups(enabled_flush_types); + static_branch_enable(&uaccess_flush_key); + on_each_cpu(do_nothing, NULL, 1); + } else { + static_branch_disable(&uaccess_flush_key); + do_uaccess_flush_fixups(L1D_FLUSH_NONE); + } + + uaccess_flush = enable; +} + static void __ref init_fallback_flush(void) { u64 l1d_size, limit; @@ -1079,6 +1105,15 @@ void setup_entry_flush(bool enable) entry_flush_enable(enable); } +void setup_uaccess_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_uaccess_flush) + uaccess_flush_enable(enable); +} + #ifdef CONFIG_DEBUG_FS static int rfi_flush_set(void *data, u64 val) { @@ -1132,10 +1167,37 @@ static int entry_flush_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n"); +static int uaccess_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != uaccess_flush) + uaccess_flush_enable(enable); + + return 0; +} + +static int uaccess_flush_get(void *data, u64 *val) +{ + *val = uaccess_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n"); + static __init int rfi_flush_debugfs_init(void) { debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); + debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush); return 0; } device_initcall(rfi_flush_debugfs_init); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 715ec4d4614a..6db90cdf11da 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -131,6 +131,13 @@ SECTIONS __stop___stf_entry_barrier_fixup = .; } + . = ALIGN(8); + __uaccess_flush_fixup : AT(ADDR(__uaccess_flush_fixup) - LOAD_OFFSET) { + __start___uaccess_flush_fixup = .; + *(__uaccess_flush_fixup) + __stop___uaccess_flush_fixup = .; + } + . = ALIGN(8); __entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) { __start___entry_flush_fixup = .; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 70e83cfd74aa..321c12a9ef6b 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -234,6 +234,56 @@ void do_stf_barrier_fixups(enum stf_barrier_type types) do_stf_exit_barrier_fixups(types); } +void do_uaccess_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[4], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___uaccess_flush_fixup); + end = PTRRELOC(&__stop___uaccess_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + instrs[3] = 0x4e800020; /* blr */ + + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[3] = 0x60000000; /* nop */ + /* fallthrough to fallback flush */ + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + + patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3])); + } + + printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); +} + void do_entry_flush_fixups(enum l1d_flush_type types) { unsigned int instrs[3], *dest; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index d04a085c423d..087ec92acfc4 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -124,10 +124,12 @@ static void pnv_setup_rfi_flush(void) /* * If we are non-Power9 bare metal, we don't need to flush on kernel - * entry: it fixes a P9 specific vulnerability. + * entry or after user access: they fix a P9 specific vulnerability. */ - if (!pvr_version_is(PVR_POWER9)) + if (!pvr_version_is(PVR_POWER9)) { security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); + } enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \ @@ -139,6 +141,10 @@ static void pnv_setup_rfi_flush(void) enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); setup_entry_flush(enable); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); + setup_uaccess_flush(enable); } static void __init pnv_check_guarded_cores(void) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 8136c5368ee4..3617cdb079f6 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -583,6 +583,10 @@ void pseries_setup_rfi_flush(void) enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); setup_entry_flush(enable); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); + setup_uaccess_flush(enable); } #ifdef CONFIG_PCI_IOV -- cgit v1.2.3-70-g09d2 From 178d52c6e89c38d0553b0ac8b99927b11eb995b0 Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@ellerman.id.au> Date: Thu, 19 Nov 2020 23:43:53 +1100 Subject: powerpc: Only include kup-radix.h for 64-bit Book3S In kup.h we currently include kup-radix.h for all 64-bit builds, which includes Book3S and Book3E. The latter doesn't make sense, Book3E never uses the Radix MMU. This has worked up until now, but almost by accident, and the recent uaccess flush changes introduced a build breakage on Book3E because of the bad structure of the code. So disentangle things so that we only use kup-radix.h for Book3S. This requires some more stubs in kup.h and fixing an include in syscall_64.c. Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> --- arch/powerpc/include/asm/book3s/64/kup-radix.h | 4 ++-- arch/powerpc/include/asm/kup.h | 11 ++++++++--- arch/powerpc/kernel/syscall_64.c | 2 +- 3 files changed, 11 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index 97c2394e7dea..28716e2f13e3 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -27,6 +27,7 @@ #endif .endm +#ifdef CONFIG_PPC_KUAP .macro kuap_check_amr gpr1, gpr2 #ifdef CONFIG_PPC_KUAP_DEBUG BEGIN_MMU_FTR_SECTION_NESTED(67) @@ -38,6 +39,7 @@ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) #endif .endm +#endif .macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr #ifdef CONFIG_PPC_KUAP @@ -148,8 +150,6 @@ static inline unsigned long kuap_get_and_check_amr(void) return 0UL; } -static inline void kuap_check_amr(void) { } - static inline unsigned long get_kuap(void) { return AMR_KUAP_BLOCKED; diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 0f5c606ae057..0d93331d0fab 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -14,7 +14,7 @@ #define KUAP_CURRENT_WRITE 8 #define KUAP_CURRENT (KUAP_CURRENT_READ | KUAP_CURRENT_WRITE) -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 #include <asm/book3s/64/kup-radix.h> #endif #ifdef CONFIG_PPC_8xx @@ -35,6 +35,9 @@ .macro kuap_check current, gpr .endm +.macro kuap_check_amr gpr1, gpr2 +.endm + #endif #else /* !__ASSEMBLY__ */ @@ -60,19 +63,21 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) return false; } +static inline void kuap_check_amr(void) { } + /* * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush * the L1D cache after user accesses. Only include the empty stubs for other * platforms. */ -#ifndef CONFIG_PPC64 +#ifndef CONFIG_PPC_BOOK3S_64 static inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { } static inline void prevent_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { } static inline unsigned long prevent_user_access_return(void) { return 0UL; } static inline void restore_user_access(unsigned long flags) { } -#endif /* CONFIG_PPC64 */ +#endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* CONFIG_PPC_KUAP */ static inline void allow_read_from_user(const void __user *from, unsigned long size) diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 8e50818aa50b..310bcd768cd5 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -2,7 +2,7 @@ #include <linux/err.h> #include <asm/asm-prototypes.h> -#include <asm/book3s/64/kup-radix.h> +#include <asm/kup.h> #include <asm/cputime.h> #include <asm/hw_irq.h> #include <asm/kprobes.h> -- cgit v1.2.3-70-g09d2 From da631f7fd623b6c180c8d93a93040d1e0d61291f Mon Sep 17 00:00:00 2001 From: Daniel Axtens <dja@axtens.net> Date: Tue, 17 Nov 2020 16:59:16 +1100 Subject: powerpc/64s: rename pnv|pseries_setup_rfi_flush to _setup_security_mitigations pseries|pnv_setup_rfi_flush already does the count cache flush setup, and we just added entry and uaccess flushes. So the name is not very accurate any more. In both platforms we then also immediately setup the STF flush. Rename them to _setup_security_mitigations and fold the STF flush in. Signed-off-by: Daniel Axtens <dja@axtens.net> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> --- arch/powerpc/platforms/powernv/setup.c | 7 ++++--- arch/powerpc/platforms/pseries/mobility.c | 4 ++-- arch/powerpc/platforms/pseries/pseries.h | 2 +- arch/powerpc/platforms/pseries/setup.c | 7 ++++--- 4 files changed, 11 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 087ec92acfc4..46115231a3b2 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -98,7 +98,7 @@ static void init_fw_feat_flags(struct device_node *np) security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); } -static void pnv_setup_rfi_flush(void) +static void pnv_setup_security_mitigations(void) { struct device_node *np, *fw_features; enum l1d_flush_type type; @@ -145,6 +145,8 @@ static void pnv_setup_rfi_flush(void) enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); setup_uaccess_flush(enable); + + setup_stf_barrier(); } static void __init pnv_check_guarded_cores(void) @@ -173,8 +175,7 @@ static void __init pnv_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); - pnv_setup_rfi_flush(); - setup_stf_barrier(); + pnv_setup_security_mitigations(); /* Initialize SMP */ pnv_smp_init(); diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index d6f4162478a5..2f73cb5bf12d 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -349,8 +349,8 @@ void post_mobility_fixup(void) cpus_read_unlock(); - /* Possibly switch to a new RFI flush type */ - pseries_setup_rfi_flush(); + /* Possibly switch to a new L1 flush type */ + pseries_setup_security_mitigations(); /* Reinitialise system information for hv-24x7 */ read_24x7_sys_info(); diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 13fa370a87e4..593840847cd3 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -111,7 +111,7 @@ static inline unsigned long cmo_get_page_size(void) int dlpar_workqueue_init(void); -void pseries_setup_rfi_flush(void); +void pseries_setup_security_mitigations(void); void pseries_lpar_read_hblkrm_characteristics(void); #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 3617cdb079f6..090c13f6c881 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -542,7 +542,7 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); } -void pseries_setup_rfi_flush(void) +void pseries_setup_security_mitigations(void) { struct h_cpu_char_result result; enum l1d_flush_type types; @@ -587,6 +587,8 @@ void pseries_setup_rfi_flush(void) enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); setup_uaccess_flush(enable); + + setup_stf_barrier(); } #ifdef CONFIG_PCI_IOV @@ -776,8 +778,7 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); - pseries_setup_rfi_flush(); - setup_stf_barrier(); + pseries_setup_security_mitigations(); pseries_lpar_read_hblkrm_characteristics(); /* By default, only probe PCI (can be overridden by rtas_pci) */ -- cgit v1.2.3-70-g09d2 From a98fd117a2553ab1a6d2fe3c7acae88c1eca4372 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng <icenowy@aosc.io> Date: Fri, 20 Nov 2020 13:08:51 +0800 Subject: ARM: dts: sun8i: v3s: fix GIC node memory range Currently the GIC node in V3s DTSI follows some old DT examples, and being broken. This leads a warning at boot. Fix this. Fixes: f989086ccbc6 ("ARM: dts: sunxi: add dtsi file for V3s SoC") Signed-off-by: Icenowy Zheng <icenowy@aosc.io> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Link: https://lore.kernel.org/r/20201120050851.4123759-1-icenowy@aosc.io --- arch/arm/boot/dts/sun8i-v3s.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun8i-v3s.dtsi b/arch/arm/boot/dts/sun8i-v3s.dtsi index 0c7341676921..89abd4cc7e23 100644 --- a/arch/arm/boot/dts/sun8i-v3s.dtsi +++ b/arch/arm/boot/dts/sun8i-v3s.dtsi @@ -539,7 +539,7 @@ gic: interrupt-controller@1c81000 { compatible = "arm,gic-400"; reg = <0x01c81000 0x1000>, - <0x01c82000 0x1000>, + <0x01c82000 0x2000>, <0x01c84000 0x2000>, <0x01c86000 0x2000>; interrupt-controller; -- cgit v1.2.3-70-g09d2 From a927bd6ba952d13c52b8b385030943032f659a3e Mon Sep 17 00:00:00 2001 From: Dan Williams <dan.j.williams@intel.com> Date: Sat, 21 Nov 2020 22:17:05 -0800 Subject: mm: fix phys_to_target_node() and memory_add_physaddr_to_nid() exports The core-mm has a default __weak implementation of phys_to_target_node() to mirror the weak definition of memory_add_physaddr_to_nid(). That symbol is exported for modules. However, while the export in mm/memory_hotplug.c exported the symbol in the configuration cases of: CONFIG_NUMA_KEEP_MEMINFO=y CONFIG_MEMORY_HOTPLUG=y ...and: CONFIG_NUMA_KEEP_MEMINFO=n CONFIG_MEMORY_HOTPLUG=y ...it failed to export the symbol in the case of: CONFIG_NUMA_KEEP_MEMINFO=y CONFIG_MEMORY_HOTPLUG=n Not only is that broken, but Christoph points out that the kernel should not be exporting any __weak symbol, which means that memory_add_physaddr_to_nid() example that phys_to_target_node() copied is broken too. Rework the definition of phys_to_target_node() and memory_add_physaddr_to_nid() to not require weak symbols. Move to the common arch override design-pattern of an asm header defining a symbol to replace the default implementation. The only common header that all memory_add_physaddr_to_nid() producing architectures implement is asm/sparsemem.h. In fact, powerpc already defines its memory_add_physaddr_to_nid() helper in sparsemem.h. Double-down on that observation and define phys_to_target_node() where necessary in asm/sparsemem.h. An alternate consideration that was discarded was to put this override in asm/numa.h, but that entangles with the definition of MAX_NUMNODES relative to the inclusion of linux/nodemask.h, and requires powerpc to grow a new header. The dependency on NUMA_KEEP_MEMINFO for DEV_DAX_HMEM_DEVICES is invalid now that the symbol is properly exported / stubbed in all combinations of CONFIG_NUMA_KEEP_MEMINFO and CONFIG_MEMORY_HOTPLUG. [dan.j.williams@intel.com: v4] Link: https://lkml.kernel.org/r/160461461867.1505359.5301571728749534585.stgit@dwillia2-desk3.amr.corp.intel.com [dan.j.williams@intel.com: powerpc: fix create_section_mapping compile warning] Link: https://lkml.kernel.org/r/160558386174.2948926.2740149041249041764.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: a035b6bf863e ("mm/memory_hotplug: introduce default phys_to_target_node() implementation") Reported-by: Randy Dunlap <rdunlap@infradead.org> Reported-by: Thomas Gleixner <tglx@linutronix.de> Reported-by: kernel test robot <lkp@intel.com> Reported-by: Christoph Hellwig <hch@infradead.org> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Tested-by: Randy Dunlap <rdunlap@infradead.org> Tested-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Christoph Hellwig <hch@lst.de> Cc: Joao Martins <joao.m.martins@oracle.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Vishal Verma <vishal.l.verma@intel.com> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Link: https://lkml.kernel.org/r/160447639846.1133764.7044090803980177548.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/ia64/include/asm/sparsemem.h | 6 ++++++ arch/powerpc/include/asm/mmzone.h | 5 +++++ arch/powerpc/include/asm/sparsemem.h | 5 ++--- arch/powerpc/mm/mem.c | 1 + arch/x86/include/asm/sparsemem.h | 10 ++++++++++ arch/x86/mm/numa.c | 2 ++ drivers/dax/Kconfig | 1 - include/linux/memory_hotplug.h | 14 -------------- include/linux/numa.h | 30 +++++++++++++++++++++++++++++- mm/memory_hotplug.c | 18 ------------------ 10 files changed, 55 insertions(+), 37 deletions(-) (limited to 'arch') diff --git a/arch/ia64/include/asm/sparsemem.h b/arch/ia64/include/asm/sparsemem.h index 336d0570e1fa..dd8c166ffd7b 100644 --- a/arch/ia64/include/asm/sparsemem.h +++ b/arch/ia64/include/asm/sparsemem.h @@ -18,4 +18,10 @@ #endif #endif /* CONFIG_SPARSEMEM */ + +#ifdef CONFIG_MEMORY_HOTPLUG +int memory_add_physaddr_to_nid(u64 addr); +#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid +#endif + #endif /* _ASM_IA64_SPARSEMEM_H */ diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h index 91c69ff53a8a..6cda76b57c5d 100644 --- a/arch/powerpc/include/asm/mmzone.h +++ b/arch/powerpc/include/asm/mmzone.h @@ -46,5 +46,10 @@ u64 memory_hotplug_max(void); #define __HAVE_ARCH_RESERVED_KERNEL_PAGES #endif +#ifdef CONFIG_MEMORY_HOTPLUG +extern int create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot); +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_MMZONE_H_ */ diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index 1e6fa371cc38..d072866842e4 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -13,9 +13,9 @@ #endif /* CONFIG_SPARSEMEM */ #ifdef CONFIG_MEMORY_HOTPLUG -extern int create_section_mapping(unsigned long start, unsigned long end, - int nid, pgprot_t prot); extern int remove_section_mapping(unsigned long start, unsigned long end); +extern int memory_add_physaddr_to_nid(u64 start); +#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid #ifdef CONFIG_NUMA extern int hot_add_scn_to_nid(unsigned long scn_addr); @@ -26,6 +26,5 @@ static inline int hot_add_scn_to_nid(unsigned long scn_addr) } #endif /* CONFIG_NUMA */ #endif /* CONFIG_MEMORY_HOTPLUG */ - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_SPARSEMEM_H */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 01ec2a252f09..3fc325bebe4d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -50,6 +50,7 @@ #include <asm/rtas.h> #include <asm/kasan.h> #include <asm/svm.h> +#include <asm/mmzone.h> #include <mm/mmu_decl.h> diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index 6bfc878f6771..6a9ccc1b2be5 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h @@ -28,4 +28,14 @@ #endif #endif /* CONFIG_SPARSEMEM */ + +#ifndef __ASSEMBLY__ +#ifdef CONFIG_NUMA_KEEP_MEMINFO +extern int phys_to_target_node(phys_addr_t start); +#define phys_to_target_node phys_to_target_node +extern int memory_add_physaddr_to_nid(u64 start); +#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid +#endif +#endif /* __ASSEMBLY__ */ + #endif /* _ASM_X86_SPARSEMEM_H */ diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 44148691d78b..5eb4dc2b97da 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -938,6 +938,7 @@ int phys_to_target_node(phys_addr_t start) return meminfo_to_nid(&numa_reserved_meminfo, start); } +EXPORT_SYMBOL_GPL(phys_to_target_node); int memory_add_physaddr_to_nid(u64 start) { @@ -947,4 +948,5 @@ int memory_add_physaddr_to_nid(u64 start) nid = numa_meminfo.blk[0].nid; return nid; } +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index 567428e10b7b..d2834c2cfa10 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -50,7 +50,6 @@ config DEV_DAX_HMEM Say M if unsure. config DEV_DAX_HMEM_DEVICES - depends on NUMA_KEEP_MEMINFO # for phys_to_target_node() depends on DEV_DAX_HMEM && DAX=y def_bool y diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index d65c6fdc5cfc..551093b74596 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -281,20 +281,6 @@ static inline bool movable_node_is_enabled(void) } #endif /* ! CONFIG_MEMORY_HOTPLUG */ -#ifdef CONFIG_NUMA -extern int memory_add_physaddr_to_nid(u64 start); -extern int phys_to_target_node(u64 start); -#else -static inline int memory_add_physaddr_to_nid(u64 start) -{ - return 0; -} -static inline int phys_to_target_node(u64 start) -{ - return 0; -} -#endif - #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) /* * pgdat resizing functions diff --git a/include/linux/numa.h b/include/linux/numa.h index 8cb33ccfb671..cb44cfe2b725 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -21,13 +21,41 @@ #endif #ifdef CONFIG_NUMA +#include <linux/printk.h> +#include <asm/sparsemem.h> + /* Generic implementation available */ int numa_map_to_online_node(int node); -#else + +#ifndef memory_add_physaddr_to_nid +static inline int memory_add_physaddr_to_nid(u64 start) +{ + pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n", + start); + return 0; +} +#endif +#ifndef phys_to_target_node +static inline int phys_to_target_node(u64 start) +{ + pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n", + start); + return 0; +} +#endif +#else /* !CONFIG_NUMA */ static inline int numa_map_to_online_node(int node) { return NUMA_NO_NODE; } +static inline int memory_add_physaddr_to_nid(u64 start) +{ + return 0; +} +static inline int phys_to_target_node(u64 start) +{ + return 0; +} #endif #endif /* _LINUX_NUMA_H */ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b44d4c7ba73b..63b2e46b6555 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -350,24 +350,6 @@ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages, return err; } -#ifdef CONFIG_NUMA -int __weak memory_add_physaddr_to_nid(u64 start) -{ - pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n", - start); - return 0; -} -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); - -int __weak phys_to_target_node(u64 start) -{ - pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n", - start); - return 0; -} -EXPORT_SYMBOL_GPL(phys_to_target_node); -#endif - /* find the smallest valid pfn in the range [start_pfn, end_pfn) */ static unsigned long find_smallest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, -- cgit v1.2.3-70-g09d2 From b6b79dd53082db11070b4368d85dd6699ff0b063 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell <sfr@canb.auug.org.au> Date: Mon, 23 Nov 2020 18:40:16 +1100 Subject: powerpc/64s: Fix allnoconfig build since uaccess flush Using DECLARE_STATIC_KEY_FALSE needs linux/jump_table.h. Otherwise the build fails with eg: arch/powerpc/include/asm/book3s/64/kup-radix.h:66:1: warning: data definition has no type or storage class 66 | DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); Fixes: 9a32a7e78bd0 ("powerpc/64s: flush L1D after user accesses") Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> [mpe: Massage change log] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20201123184016.693fe464@canb.auug.org.au --- arch/powerpc/include/asm/book3s/64/kup-radix.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index 28716e2f13e3..a39e2d193fdc 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -63,6 +63,8 @@ #else /* !__ASSEMBLY__ */ +#include <linux/jump_label.h> + DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); #ifdef CONFIG_PPC_KUAP -- cgit v1.2.3-70-g09d2 From 1179f170b6f0af7bb0b3b7628136eaac450ddf31 Mon Sep 17 00:00:00 2001 From: Sven Schnelle <svens@linux.ibm.com> Date: Fri, 20 Nov 2020 14:17:52 +0100 Subject: s390: fix fpu restore in entry.S We need to disable interrupts in load_fpu_regs(). Otherwise an interrupt might come in after the registers are loaded, but before CIF_FPU is cleared in load_fpu_regs(). When the interrupt returns, CIF_FPU will be cleared and the registers will never be restored. The entry.S code usually saves the interrupt state in __SF_EMPTY on the stack when disabling/restoring interrupts. sie64a however saves the pointer to the sie control block in __SF_SIE_CONTROL, which references the same location. This is non-obvious to the reader. To avoid thrashing the sie control block pointer in load_fpu_regs(), move the __SIE_* offsets eight bytes after __SF_EMPTY on the stack. Cc: <stable@vger.kernel.org> # 5.8 Fixes: 0b0ed657fe00 ("s390: remove critical section cleanup from entry.S") Reported-by: Pierre Morel <pmorel@linux.ibm.com> Signed-off-by: Sven Schnelle <svens@linux.ibm.com> Acked-by: Christian Borntraeger <borntraeger@de.ibm.com> Reviewed-by: Heiko Carstens <hca@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com> --- arch/s390/kernel/asm-offsets.c | 10 +++++----- arch/s390/kernel/entry.S | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 2012c1cf0853..483051e10db3 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -53,11 +53,11 @@ int main(void) /* stack_frame offsets */ OFFSET(__SF_BACKCHAIN, stack_frame, back_chain); OFFSET(__SF_GPRS, stack_frame, gprs); - OFFSET(__SF_EMPTY, stack_frame, empty1); - OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[0]); - OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[1]); - OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]); - OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[3]); + OFFSET(__SF_EMPTY, stack_frame, empty1[0]); + OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[1]); + OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[2]); + OFFSET(__SF_SIE_REASON, stack_frame, empty1[3]); + OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[4]); BLANK(); OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val); BLANK(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 5346545b9860..26bb0603c5a1 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -1068,6 +1068,7 @@ EXPORT_SYMBOL(save_fpu_regs) * %r4 */ load_fpu_regs: + stnsm __SF_EMPTY(%r15),0xfc lg %r4,__LC_CURRENT aghi %r4,__TASK_thread TSTMSK __LC_CPU_FLAGS,_CIF_FPU @@ -1099,6 +1100,7 @@ load_fpu_regs: .Lload_fpu_regs_done: ni __LC_CPU_FLAGS+7,255-_CIF_FPU .Lload_fpu_regs_exit: + ssm __SF_EMPTY(%r15) BR_EX %r14 .Lload_fpu_regs_end: ENDPROC(load_fpu_regs) -- cgit v1.2.3-70-g09d2 From 03659efe4287230b1d65b31c993708f335c8de82 Mon Sep 17 00:00:00 2001 From: Randy Dunlap <rdunlap@infradead.org> Date: Sun, 22 Nov 2020 20:45:10 -0800 Subject: arm64/fpsimd: add <asm/insn.h> to <asm/kprobes.h> to fix fpsimd build Adding <asm/exception.h> brought in <asm/kprobes.h> which uses <asm/probes.h>, which uses 'pstate_check_t' so the latter needs to #include <asm/insn.h> for this typedef. Fixes this build error: In file included from arch/arm64/include/asm/kprobes.h:24, from arch/arm64/include/asm/exception.h:11, from arch/arm64/kernel/fpsimd.c:35: arch/arm64/include/asm/probes.h:16:2: error: unknown type name 'pstate_check_t' 16 | pstate_check_t *pstate_cc; Fixes: c6b90d5cf637 ("arm64/fpsimd: Fix missing-prototypes in fpsimd.c") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Randy Dunlap <rdunlap@infradead.org> Cc: Will Deacon <will@kernel.org> Cc: Tian Tao <tiantao6@hisilicon.com> Link: https://lore.kernel.org/r/20201123044510.9942-1-rdunlap@infradead.org Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/include/asm/probes.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h index 4266262101fe..006946745352 100644 --- a/arch/arm64/include/asm/probes.h +++ b/arch/arm64/include/asm/probes.h @@ -7,6 +7,8 @@ #ifndef _ARM_PROBES_H #define _ARM_PROBES_H +#include <asm/insn.h> + typedef u32 probe_opcode_t; typedef void (probes_handler_t) (u32 opcode, long addr, struct pt_regs *); -- cgit v1.2.3-70-g09d2 From 07509e10dcc77627f8b6a57381e878fe269958d3 Mon Sep 17 00:00:00 2001 From: Will Deacon <will@kernel.org> Date: Fri, 20 Nov 2020 13:28:01 +0000 Subject: arm64: pgtable: Fix pte_accessible() pte_accessible() is used by ptep_clear_flush() to figure out whether TLB invalidation is necessary when unmapping pages for reclaim. Although our implementation is correct according to the architecture, returning true only for valid, young ptes in the absence of racing page-table modifications, this is in fact flawed due to lazy invalidation of old ptes in ptep_clear_flush_young() where we elide the expensive DSB instruction for completing the TLB invalidation. Rather than penalise the aging path, adjust pte_accessible() to return true for any valid pte, even if the access flag is cleared. Cc: <stable@vger.kernel.org> Fixes: 76c714be0e5e ("arm64: pgtable: implement pte_accessible()") Reported-by: Yu Zhao <yuzhao@google.com> Acked-by: Yu Zhao <yuzhao@google.com> Reviewed-by: Minchan Kim <minchan@kernel.org> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Link: https://lore.kernel.org/r/20201120143557.6715-2-will@kernel.org Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/include/asm/pgtable.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 4ff12a7adcfd..326c34677e86 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -115,8 +115,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) #define pte_valid_not_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) -#define pte_valid_young(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) #define pte_valid_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) @@ -124,9 +122,12 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; * Could the pte be present in the TLB? We must check mm_tlb_flush_pending * so that we don't erroneously return false for pages that have been * remapped as PROT_NONE but are yet to be flushed from the TLB. + * Note that we can't make any assumptions based on the state of the access + * flag, since ptep_clear_flush_young() elides a DSB when invalidating the + * TLB. */ #define pte_accessible(mm, pte) \ - (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte)) + (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) /* * p??_access_permitted() is true for valid user mappings (subject to the -- cgit v1.2.3-70-g09d2 From ff1712f953e27f0b0718762ec17d0adb15c9fd0b Mon Sep 17 00:00:00 2001 From: Will Deacon <will@kernel.org> Date: Fri, 20 Nov 2020 13:57:48 +0000 Subject: arm64: pgtable: Ensure dirty bit is preserved across pte_wrprotect() With hardware dirty bit management, calling pte_wrprotect() on a writable, dirty PTE will lose the dirty state and return a read-only, clean entry. Move the logic from ptep_set_wrprotect() into pte_wrprotect() to ensure that the dirty bit is preserved for writable entries, as this is required for soft-dirty bit management if we enable it in the future. Cc: <stable@vger.kernel.org> Fixes: 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits") Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Link: https://lore.kernel.org/r/20201120143557.6715-3-will@kernel.org Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/include/asm/pgtable.h | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 326c34677e86..5628289b9d5e 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -165,13 +165,6 @@ static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) return pmd; } -static inline pte_t pte_wrprotect(pte_t pte) -{ - pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); - pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); - return pte; -} - static inline pte_t pte_mkwrite(pte_t pte) { pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); @@ -197,6 +190,20 @@ static inline pte_t pte_mkdirty(pte_t pte) return pte; } +static inline pte_t pte_wrprotect(pte_t pte) +{ + /* + * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY + * clear), set the PTE_DIRTY bit. + */ + if (pte_hw_dirty(pte)) + pte = pte_mkdirty(pte); + + pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); + pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); + return pte; +} + static inline pte_t pte_mkold(pte_t pte) { return clear_pte_bit(pte, __pgprot(PTE_AF)); @@ -846,12 +853,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres pte = READ_ONCE(*ptep); do { old_pte = pte; - /* - * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY - * clear), set the PTE_DIRTY bit. - */ - if (pte_hw_dirty(pte)) - pte = pte_mkdirty(pte); pte = pte_wrprotect(pte); pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte), pte_val(pte)); -- cgit v1.2.3-70-g09d2 From fd8d9db3559a29fd737bcdb7c4fcbe1940caae34 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen <xiaochen.shen@intel.com> Date: Sat, 31 Oct 2020 03:10:53 +0800 Subject: x86/resctrl: Remove superfluous kernfs_get() calls to prevent refcount leak Willem reported growing of kernfs_node_cache entries in slabtop when repeatedly creating and removing resctrl subdirectories as well as when repeatedly mounting and unmounting the resctrl filesystem. On resource group (control as well as monitoring) creation via a mkdir an extra kernfs_node reference is obtained to ensure that the rdtgroup structure remains accessible for the rdtgroup_kn_unlock() calls where it is removed on deletion. The kernfs_node reference count is dropped by kernfs_put() in rdtgroup_kn_unlock(). With the above explaining the need for one kernfs_get()/kernfs_put() pair in resctrl there are more places where a kernfs_node reference is obtained without a corresponding release. The excessive amount of reference count on kernfs nodes will never be dropped to 0 and the kernfs nodes will never be freed in the call paths of rmdir and umount. It leads to reference count leak and kernfs_node_cache memory leak. Remove the superfluous kernfs_get() calls and expand the existing comments surrounding the remaining kernfs_get()/kernfs_put() pair that remains in use. Superfluous kernfs_get() calls are removed from two areas: (1) In call paths of mount and mkdir, when kernfs nodes for "info", "mon_groups" and "mon_data" directories and sub-directories are created, the reference count of newly created kernfs node is set to 1. But after kernfs_create_dir() returns, superfluous kernfs_get() are called to take an additional reference. (2) kernfs_get() calls in rmdir call paths. Fixes: 17eafd076291 ("x86/intel_rdt: Split resource group removal in two") Fixes: 4af4a88e0c92 ("x86/intel_rdt/cqm: Add mount,umount support") Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support") Fixes: d89b7379015f ("x86/intel_rdt/cqm: Add mon_data") Fixes: c7d9aac61311 ("x86/intel_rdt/cqm: Add mkdir support for RDT monitoring") Fixes: 5dc1d5c6bac2 ("x86/intel_rdt: Simplify info and base file lists") Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system") Fixes: 4e978d06dedb ("x86/intel_rdt: Add "info" files to resctrl file system") Reported-by: Willem de Bruijn <willemb@google.com> Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Reinette Chatre <reinette.chatre@intel.com> Tested-by: Willem de Bruijn <willemb@google.com> Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1604085053-31639-1-git-send-email-xiaochen.shen@intel.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 35 ++-------------------------------- 1 file changed, 2 insertions(+), 33 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index af323e2e3100..2ab1266a5f14 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1769,7 +1769,6 @@ static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name, if (IS_ERR(kn_subdir)) return PTR_ERR(kn_subdir); - kernfs_get(kn_subdir); ret = rdtgroup_kn_set_ugid(kn_subdir); if (ret) return ret; @@ -1792,7 +1791,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL); if (IS_ERR(kn_info)) return PTR_ERR(kn_info); - kernfs_get(kn_info); ret = rdtgroup_add_files(kn_info, RF_TOP_INFO); if (ret) @@ -1813,12 +1811,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) goto out_destroy; } - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that @rdtgrp->kn is always accessible. - */ - kernfs_get(kn_info); - ret = rdtgroup_kn_set_ugid(kn_info); if (ret) goto out_destroy; @@ -1847,12 +1839,6 @@ mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, if (dest_kn) *dest_kn = kn; - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that @rdtgrp->kn is always accessible. - */ - kernfs_get(kn); - ret = rdtgroup_kn_set_ugid(kn); if (ret) goto out_destroy; @@ -2139,13 +2125,11 @@ static int rdt_get_tree(struct fs_context *fc) &kn_mongrp); if (ret < 0) goto out_info; - kernfs_get(kn_mongrp); ret = mkdir_mondata_all(rdtgroup_default.kn, &rdtgroup_default, &kn_mondata); if (ret < 0) goto out_mongrp; - kernfs_get(kn_mondata); rdtgroup_default.mon.mon_data_kn = kn_mondata; } @@ -2499,11 +2483,6 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, if (IS_ERR(kn)) return PTR_ERR(kn); - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that kn is always accessible. - */ - kernfs_get(kn); ret = rdtgroup_kn_set_ugid(kn); if (ret) goto out_destroy; @@ -2838,8 +2817,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, /* * kernfs_remove() will drop the reference count on "kn" which * will free it. But we still need it to stick around for the - * rdtgroup_kn_unlock(kn} call below. Take one extra reference - * here, which will be dropped inside rdtgroup_kn_unlock(). + * rdtgroup_kn_unlock(kn) call. Take one extra reference here, + * which will be dropped inside rdtgroup_kn_unlock(). */ kernfs_get(kn); @@ -3049,11 +3028,6 @@ static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp, WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); list_del(&rdtgrp->mon.crdtgrp_list); - /* - * one extra hold on this, will drop when we kfree(rdtgrp) - * in rdtgroup_kn_unlock() - */ - kernfs_get(kn); kernfs_remove(rdtgrp->kn); return 0; @@ -3065,11 +3039,6 @@ static int rdtgroup_ctrl_remove(struct kernfs_node *kn, rdtgrp->flags = RDT_DELETED; list_del(&rdtgrp->rdtgroup_list); - /* - * one extra hold on this, will drop when we kfree(rdtgrp) - * in rdtgroup_kn_unlock() - */ - kernfs_get(kn); kernfs_remove(rdtgrp->kn); return 0; } -- cgit v1.2.3-70-g09d2 From 758999246965eeb8b253d47e72f7bfe508804b16 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen <xiaochen.shen@intel.com> Date: Sat, 31 Oct 2020 03:11:28 +0800 Subject: x86/resctrl: Add necessary kernfs_put() calls to prevent refcount leak On resource group creation via a mkdir an extra kernfs_node reference is obtained by kernfs_get() to ensure that the rdtgroup structure remains accessible for the rdtgroup_kn_unlock() calls where it is removed on deletion. Currently the extra kernfs_node reference count is only dropped by kernfs_put() in rdtgroup_kn_unlock() while the rdtgroup structure is removed in a few other locations that lack the matching reference drop. In call paths of rmdir and umount, when a control group is removed, kernfs_remove() is called to remove the whole kernfs nodes tree of the control group (including the kernfs nodes trees of all child monitoring groups), and then rdtgroup structure is freed by kfree(). The rdtgroup structures of all child monitoring groups under the control group are freed by kfree() in free_all_child_rdtgrp(). Before calling kfree() to free the rdtgroup structures, the kernfs node of the control group itself as well as the kernfs nodes of all child monitoring groups still take the extra references which will never be dropped to 0 and the kernfs nodes will never be freed. It leads to reference count leak and kernfs_node_cache memory leak. For example, reference count leak is observed in these two cases: (1) mount -t resctrl resctrl /sys/fs/resctrl mkdir /sys/fs/resctrl/c1 mkdir /sys/fs/resctrl/c1/mon_groups/m1 umount /sys/fs/resctrl (2) mkdir /sys/fs/resctrl/c1 mkdir /sys/fs/resctrl/c1/mon_groups/m1 rmdir /sys/fs/resctrl/c1 The same reference count leak issue also exists in the error exit paths of mkdir in mkdir_rdt_prepare() and rdtgroup_mkdir_ctrl_mon(). Fix this issue by following changes to make sure the extra kernfs_node reference on rdtgroup is dropped before freeing the rdtgroup structure. (1) Introduce rdtgroup removal helper rdtgroup_remove() to wrap up kernfs_put() and kfree(). (2) Call rdtgroup_remove() in rdtgroup removal path where the rdtgroup structure is about to be freed by kfree(). (3) Call rdtgroup_remove() or kernfs_put() as appropriate in the error exit paths of mkdir where an extra reference is taken by kernfs_get(). Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support") Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files") Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system") Reported-by: Willem de Bruijn <willemb@google.com> Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Reinette Chatre <reinette.chatre@intel.com> Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1604085088-31707-1-git-send-email-xiaochen.shen@intel.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 2ab1266a5f14..6f4ca4bea625 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -507,6 +507,24 @@ unlock: return ret ?: nbytes; } +/** + * rdtgroup_remove - the helper to remove resource group safely + * @rdtgrp: resource group to remove + * + * On resource group creation via a mkdir, an extra kernfs_node reference is + * taken to ensure that the rdtgroup structure remains accessible for the + * rdtgroup_kn_unlock() calls where it is removed. + * + * Drop the extra reference here, then free the rdtgroup structure. + * + * Return: void + */ +static void rdtgroup_remove(struct rdtgroup *rdtgrp) +{ + kernfs_put(rdtgrp->kn); + kfree(rdtgrp); +} + struct task_move_callback { struct callback_head work; struct rdtgroup *rdtgrp; @@ -529,7 +547,7 @@ static void move_myself(struct callback_head *head) (rdtgrp->flags & RDT_DELETED)) { current->closid = 0; current->rmid = 0; - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } if (unlikely(current->flags & PF_EXITING)) @@ -2065,8 +2083,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn) rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) rdtgroup_pseudo_lock_remove(rdtgrp); kernfs_unbreak_active_protection(kn); - kernfs_put(rdtgrp->kn); - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } else { kernfs_unbreak_active_protection(kn); } @@ -2341,7 +2358,7 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) if (atomic_read(&sentry->waitcount) != 0) sentry->flags = RDT_DELETED; else - kfree(sentry); + rdtgroup_remove(sentry); } } @@ -2383,7 +2400,7 @@ static void rmdir_all_sub(void) if (atomic_read(&rdtgrp->waitcount) != 0) rdtgrp->flags = RDT_DELETED; else - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ update_closid_rmid(cpu_online_mask, &rdtgroup_default); @@ -2818,7 +2835,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, * kernfs_remove() will drop the reference count on "kn" which * will free it. But we still need it to stick around for the * rdtgroup_kn_unlock(kn) call. Take one extra reference here, - * which will be dropped inside rdtgroup_kn_unlock(). + * which will be dropped by kernfs_put() in rdtgroup_remove(). */ kernfs_get(kn); @@ -2859,6 +2876,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, out_idfree: free_rmid(rdtgrp->mon.rmid); out_destroy: + kernfs_put(rdtgrp->kn); kernfs_remove(rdtgrp->kn); out_free_rgrp: kfree(rdtgrp); @@ -2871,7 +2889,7 @@ static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) { kernfs_remove(rgrp->kn); free_rmid(rgrp->mon.rmid); - kfree(rgrp); + rdtgroup_remove(rgrp); } /* -- cgit v1.2.3-70-g09d2 From a7361b9c4615951f52ffd2b1afa09a1384c7b4e4 Mon Sep 17 00:00:00 2001 From: Adam Sampson <ats@offog.org> Date: Mon, 23 Nov 2020 17:47:39 +0000 Subject: ARM: dts: sun7i: pcduino3-nano: enable RGMII RX/TX delay on PHY The RX/TX delays for the Ethernet PHY on the Linksprite pcDuino 3 Nano are configured in hardware, using resistors that are populated to pull the RTL8211E's RXDLY/TXDLY pins low or high as needed. phy-mode should be set to rgmii-id to reflect this. Previously it was set to rgmii, which used to work but now results in the delays being disabled again as a result of the bugfix in commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"). Tested on two pcDuino 3 Nano boards purchased in 2015. Without this fix, Ethernet works unreliably on one board and doesn't work at all on the other. Fixes: 061035d456c9 ("ARM: dts: sun7i: Add dts file for pcDuino 3 Nano board") Signed-off-by: Adam Sampson <ats@offog.org> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Reviewed-by: Andrew Lunn <andrew@lunn.ch> Acked-by: Chen-Yu Tsai <wens@csie.org> Link: https://lore.kernel.org/r/20201123174739.6809-1-ats@offog.org --- arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts b/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts index fce2f7fcd084..bf38c66c1815 100644 --- a/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts +++ b/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts @@ -1,5 +1,5 @@ /* - * Copyright 2015 Adam Sampson <ats@offog.org> + * Copyright 2015-2020 Adam Sampson <ats@offog.org> * * This file is dual-licensed: you can use it either under the terms * of the GPL or the X11 license, at your option. Note that this dual @@ -115,7 +115,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; -- cgit v1.2.3-70-g09d2 From 58c644ba512cfbc2e39b758dd979edd1d6d00e27 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra <peterz@infradead.org> Date: Fri, 20 Nov 2020 11:50:35 +0100 Subject: sched/idle: Fix arch_cpu_idle() vs tracing We call arch_cpu_idle() with RCU disabled, but then use local_irq_{en,dis}able(), which invokes tracing, which relies on RCU. Switch all arch_cpu_idle() implementations to use raw_local_irq_{en,dis}able() and carefully manage the lockdep,rcu,tracing state like we do in entry. (XXX: we really should change arch_cpu_idle() to not return with interrupts enabled) Reported-by: Sven Schnelle <svens@linux.ibm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Mark Rutland <mark.rutland@arm.com> Tested-by: Mark Rutland <mark.rutland@arm.com> Link: https://lkml.kernel.org/r/20201120114925.594122626@infradead.org --- arch/alpha/kernel/process.c | 2 +- arch/arm/kernel/process.c | 2 +- arch/arm64/kernel/process.c | 2 +- arch/csky/kernel/process.c | 2 +- arch/h8300/kernel/process.c | 2 +- arch/hexagon/kernel/process.c | 2 +- arch/ia64/kernel/process.c | 2 +- arch/microblaze/kernel/process.c | 2 +- arch/mips/kernel/idle.c | 12 ++++++------ arch/nios2/kernel/process.c | 2 +- arch/openrisc/kernel/process.c | 2 +- arch/parisc/kernel/process.c | 2 +- arch/powerpc/kernel/idle.c | 4 ++-- arch/riscv/kernel/process.c | 2 +- arch/s390/kernel/idle.c | 6 +++--- arch/sh/kernel/idle.c | 2 +- arch/sparc/kernel/leon_pmc.c | 4 ++-- arch/sparc/kernel/process_32.c | 2 +- arch/sparc/kernel/process_64.c | 4 ++-- arch/um/kernel/process.c | 2 +- arch/x86/include/asm/mwait.h | 2 -- arch/x86/kernel/process.c | 12 +++++++----- kernel/sched/idle.c | 28 +++++++++++++++++++++++++++- 23 files changed, 64 insertions(+), 38 deletions(-) (limited to 'arch') diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 7462a7911002..4c7b0414a3ff 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -57,7 +57,7 @@ EXPORT_SYMBOL(pm_power_off); void arch_cpu_idle(void) { wtint(0); - local_irq_enable(); + raw_local_irq_enable(); } void arch_cpu_idle_dead(void) diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 8e6ace03e960..9f199b1e8383 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -71,7 +71,7 @@ void arch_cpu_idle(void) arm_pm_idle(); else cpu_do_idle(); - local_irq_enable(); + raw_local_irq_enable(); } void arch_cpu_idle_prepare(void) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 4784011cecac..9ebe02574127 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -126,7 +126,7 @@ void arch_cpu_idle(void) * tricks */ cpu_do_idle(); - local_irq_enable(); + raw_local_irq_enable(); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c index f730869e21ee..69af6bc87e64 100644 --- a/arch/csky/kernel/process.c +++ b/arch/csky/kernel/process.c @@ -102,6 +102,6 @@ void arch_cpu_idle(void) #ifdef CONFIG_CPU_PM_STOP asm volatile("stop\n"); #endif - local_irq_enable(); + raw_local_irq_enable(); } #endif diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index aea0a40b77a9..bc1364db58fe 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -57,7 +57,7 @@ asmlinkage void ret_from_kernel_thread(void); */ void arch_cpu_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); __asm__("sleep"); } diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index 5a0a95d93ddb..67767c5ed98c 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -44,7 +44,7 @@ void arch_cpu_idle(void) { __vmwait(); /* interrupts wake us up, but irqs are still disabled */ - local_irq_enable(); + raw_local_irq_enable(); } /* diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 6b61a703bcf5..c9ff8796b509 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -239,7 +239,7 @@ void arch_cpu_idle(void) if (mark_idle) (*mark_idle)(1); - safe_halt(); + raw_safe_halt(); if (mark_idle) (*mark_idle)(0); diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index a9e46e525cd0..f99860771ff4 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -149,5 +149,5 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpregs) void arch_cpu_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); } diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c index 5bc3b04693c7..18e69ebf5691 100644 --- a/arch/mips/kernel/idle.c +++ b/arch/mips/kernel/idle.c @@ -33,19 +33,19 @@ static void __cpuidle r3081_wait(void) { unsigned long cfg = read_c0_conf(); write_c0_conf(cfg | R30XX_CONF_HALT); - local_irq_enable(); + raw_local_irq_enable(); } static void __cpuidle r39xx_wait(void) { if (!need_resched()) write_c0_conf(read_c0_conf() | TX39_CONF_HALT); - local_irq_enable(); + raw_local_irq_enable(); } void __cpuidle r4k_wait(void) { - local_irq_enable(); + raw_local_irq_enable(); __r4k_wait(); } @@ -64,7 +64,7 @@ void __cpuidle r4k_wait_irqoff(void) " .set arch=r4000 \n" " wait \n" " .set pop \n"); - local_irq_enable(); + raw_local_irq_enable(); } /* @@ -84,7 +84,7 @@ static void __cpuidle rm7k_wait_irqoff(void) " wait \n" " mtc0 $1, $12 # stalls until W stage \n" " .set pop \n"); - local_irq_enable(); + raw_local_irq_enable(); } /* @@ -257,7 +257,7 @@ void arch_cpu_idle(void) if (cpu_wait) cpu_wait(); else - local_irq_enable(); + raw_local_irq_enable(); } #ifdef CONFIG_CPU_IDLE diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c index 4ffe857e6ada..50b4eb19a6cc 100644 --- a/arch/nios2/kernel/process.c +++ b/arch/nios2/kernel/process.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL(pm_power_off); void arch_cpu_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); } /* diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 0ff391f00334..3c98728cce24 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -79,7 +79,7 @@ void machine_power_off(void) */ void arch_cpu_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); if (mfspr(SPR_UPR) & SPR_UPR_PMP) mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME); } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index f196d96e2f9f..a92a23d6acd9 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -169,7 +169,7 @@ void __cpuidle arch_cpu_idle_dead(void) void __cpuidle arch_cpu_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); /* nop on real hardware, qemu will idle sleep. */ asm volatile("or %%r10,%%r10,%%r10\n":::); diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index ae0e2632393d..1f835539fda4 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -52,9 +52,9 @@ void arch_cpu_idle(void) * interrupts enabled, some don't. */ if (irqs_disabled()) - local_irq_enable(); + raw_local_irq_enable(); } else { - local_irq_enable(); + raw_local_irq_enable(); /* * Go into low thread priority and possibly * low power mode. diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 19225ec65db6..dd5f985b1f40 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -36,7 +36,7 @@ extern asmlinkage void ret_from_kernel_thread(void); void arch_cpu_idle(void) { wait_for_interrupt(); - local_irq_enable(); + raw_local_irq_enable(); } void show_regs(struct pt_regs *regs) diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index f7f1e64e0d98..2b85096964f8 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -33,10 +33,10 @@ void enabled_wait(void) PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; clear_cpu_flag(CIF_NOHZ_DELAY); - local_irq_save(flags); + raw_local_irq_save(flags); /* Call the assembler magic in entry.S */ psw_idle(idle, psw_mask); - local_irq_restore(flags); + raw_local_irq_restore(flags); /* Account time spent with enabled wait psw loaded as idle time. */ raw_write_seqcount_begin(&idle->seqcount); @@ -123,7 +123,7 @@ void arch_cpu_idle_enter(void) void arch_cpu_idle(void) { enabled_wait(); - local_irq_enable(); + raw_local_irq_enable(); } void arch_cpu_idle_exit(void) diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index 0dc0f52f9bb8..f59814983bd5 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -22,7 +22,7 @@ static void (*sh_idle)(void); void default_idle(void) { set_bl_bit(); - local_irq_enable(); + raw_local_irq_enable(); /* Isn't this racy ? */ cpu_sleep(); clear_bl_bit(); diff --git a/arch/sparc/kernel/leon_pmc.c b/arch/sparc/kernel/leon_pmc.c index 065e2d4b7290..396f46bca52e 100644 --- a/arch/sparc/kernel/leon_pmc.c +++ b/arch/sparc/kernel/leon_pmc.c @@ -50,7 +50,7 @@ static void pmc_leon_idle_fixup(void) register unsigned int address = (unsigned int)leon3_irqctrl_regs; /* Interrupts need to be enabled to not hang the CPU */ - local_irq_enable(); + raw_local_irq_enable(); __asm__ __volatile__ ( "wr %%g0, %%asr19\n" @@ -66,7 +66,7 @@ static void pmc_leon_idle_fixup(void) static void pmc_leon_idle(void) { /* Interrupts need to be enabled to not hang the CPU */ - local_irq_enable(); + raw_local_irq_enable(); /* For systems without power-down, this will be no-op */ __asm__ __volatile__ ("wr %g0, %asr19\n\t"); diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index adfcaeab3ddc..a02363735915 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -74,7 +74,7 @@ void arch_cpu_idle(void) { if (sparc_idle) (*sparc_idle)(); - local_irq_enable(); + raw_local_irq_enable(); } /* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */ diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index a75093b993f9..6f8c7822fc06 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -62,11 +62,11 @@ void arch_cpu_idle(void) { if (tlb_type != hypervisor) { touch_nmi_watchdog(); - local_irq_enable(); + raw_local_irq_enable(); } else { unsigned long pstate; - local_irq_enable(); + raw_local_irq_enable(); /* The sun4v sleeping code requires that we have PSTATE.IE cleared over * the cpu sleep hypervisor call. diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 3bed09538dd9..9505a7e87396 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -217,7 +217,7 @@ void arch_cpu_idle(void) { cpu_tasks[current_thread_info()->cpu].pid = os_getpid(); um_idle_sleep(); - local_irq_enable(); + raw_local_irq_enable(); } int __cant_sleep(void) { diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index e039a933aca3..29dd27b5a339 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -88,8 +88,6 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, static inline void __sti_mwait(unsigned long eax, unsigned long ecx) { - trace_hardirqs_on(); - mds_idle_clear_cpu_buffers(); /* "mwait %eax, %ecx;" */ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ba4593a913fa..145a7ac0c19a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -685,7 +685,7 @@ void arch_cpu_idle(void) */ void __cpuidle default_idle(void) { - safe_halt(); + raw_safe_halt(); } #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) EXPORT_SYMBOL(default_idle); @@ -736,6 +736,8 @@ void stop_this_cpu(void *dummy) /* * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power * states (local apic timer and TSC stop). + * + * XXX this function is completely buggered vs RCU and tracing. */ static void amd_e400_idle(void) { @@ -757,9 +759,9 @@ static void amd_e400_idle(void) * The switch back from broadcast mode needs to be called with * interrupts disabled. */ - local_irq_disable(); + raw_local_irq_disable(); tick_broadcast_exit(); - local_irq_enable(); + raw_local_irq_enable(); } /* @@ -801,9 +803,9 @@ static __cpuidle void mwait_idle(void) if (!need_resched()) __sti_mwait(0, 0); else - local_irq_enable(); + raw_local_irq_enable(); } else { - local_irq_enable(); + raw_local_irq_enable(); } __current_clr_polling(); } diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 24d0ee26377d..c6932b8f4467 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -78,7 +78,7 @@ void __weak arch_cpu_idle_dead(void) { } void __weak arch_cpu_idle(void) { cpu_idle_force_poll = 1; - local_irq_enable(); + raw_local_irq_enable(); } /** @@ -94,9 +94,35 @@ void __cpuidle default_idle_call(void) trace_cpu_idle(1, smp_processor_id()); stop_critical_timings(); + + /* + * arch_cpu_idle() is supposed to enable IRQs, however + * we can't do that because of RCU and tracing. + * + * Trace IRQs enable here, then switch off RCU, and have + * arch_cpu_idle() use raw_local_irq_enable(). Note that + * rcu_idle_enter() relies on lockdep IRQ state, so switch that + * last -- this is very similar to the entry code. + */ + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(_THIS_IP_); rcu_idle_enter(); + lockdep_hardirqs_on(_THIS_IP_); + arch_cpu_idle(); + + /* + * OK, so IRQs are enabled here, but RCU needs them disabled to + * turn itself back on.. funny thing is that disabling IRQs + * will cause tracing, which needs RCU. Jump through hoops to + * make it 'work'. + */ + raw_local_irq_disable(); + lockdep_hardirqs_off(_THIS_IP_); rcu_idle_exit(); + lockdep_hardirqs_on(_THIS_IP_); + raw_local_irq_enable(); + start_critical_timings(); trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } -- cgit v1.2.3-70-g09d2 From e2be2a833ab5338fa5b8b99ba622b911d96f1795 Mon Sep 17 00:00:00 2001 From: Lu Baolu <baolu.lu@linux.intel.com> Date: Wed, 25 Nov 2020 09:41:24 +0800 Subject: x86/tboot: Don't disable swiotlb when iommu is forced on After commit 327d5b2fee91c ("iommu/vt-d: Allow 32bit devices to uses DMA domain"), swiotlb could also be used for direct memory access if IOMMU is enabled but a device is configured to pass through the DMA translation. Keep swiotlb when IOMMU is forced on, otherwise, some devices won't work if "iommu=pt" kernel parameter is used. Fixes: 327d5b2fee91 ("iommu/vt-d: Allow 32bit devices to uses DMA domain") Reported-and-tested-by: Adrian Huang <ahuang12@lenovo.com> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com> Link: https://lore.kernel.org/r/20201125014124.4070776-1-baolu.lu@linux.intel.com Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=210237 Signed-off-by: Will Deacon <will@kernel.org> --- arch/x86/kernel/tboot.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 420be871d9d4..ae64f98ec2ab 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -514,13 +514,10 @@ int tboot_force_iommu(void) if (!tboot_enabled()) return 0; - if (no_iommu || swiotlb || dmar_disabled) + if (no_iommu || dmar_disabled) pr_warn("Forcing Intel-IOMMU to enabled\n"); dmar_disabled = 0; -#ifdef CONFIG_SWIOTLB - swiotlb = 0; -#endif no_iommu = 0; return 1; -- cgit v1.2.3-70-g09d2 From fb319496935b7475a863a00c76895e8bb3216704 Mon Sep 17 00:00:00 2001 From: Jon Hunter <jonathanh@nvidia.com> Date: Mon, 16 Nov 2020 16:20:26 +0000 Subject: arm64: tegra: Disable the ACONNECT for Jetson TX2 Commit ff4c371d2bc0 ("arm64: defconfig: Build ADMA and ACONNECT driver") enable the Tegra ADMA and ACONNECT drivers and this is causing resume from system suspend to fail on Jetson TX2. Resume is failing because the ACONNECT driver is being resumed before the BPMP driver, and the ACONNECT driver is attempting to power on a power-domain that is provided by the BPMP. While a proper fix for the resume sequencing problem is identified, disable the ACONNECT for Jetson TX2 temporarily to avoid breaking system suspend. Please note that ACONNECT driver is used by the Audio Processing Engine (APE) on Tegra, but because there is no mainline support for APE on Jetson TX2 currently, disabling the ACONNECT does not disable any useful feature at the moment. Signed-off-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Thierry Reding <treding@nvidia.com> --- arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts index 381a84912ba8..c28d51cc5797 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts +++ b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts @@ -10,18 +10,6 @@ model = "NVIDIA Jetson TX2 Developer Kit"; compatible = "nvidia,p2771-0000", "nvidia,tegra186"; - aconnect { - status = "okay"; - - dma-controller@2930000 { - status = "okay"; - }; - - interrupt-controller@2a40000 { - status = "okay"; - }; - }; - i2c@3160000 { power-monitor@42 { compatible = "ti,ina3221"; -- cgit v1.2.3-70-g09d2 From 476e23f4c540949ac5ea4fad4f6f6fa0e2d41f42 Mon Sep 17 00:00:00 2001 From: Jon Hunter <jonathanh@nvidia.com> Date: Wed, 11 Nov 2020 10:41:17 +0000 Subject: arm64: tegra: Correct the UART for Jetson Xavier NX The Jetson Xavier NX board routes UARTA to the 40-pin header and UARTC to a 12-pin debug header. The UARTs can be used by either the Tegra Combined UART (TCU) driver or the Tegra 8250 driver. By default, the TCU will use UARTC on Jetson Xavier NX. Currently, device-tree for Xavier NX enables the TCU and the Tegra 8250 node for UARTC. Fix this by disabling the Tegra 8250 node for UARTC and enabling the Tegra 8250 node for UARTA. Fixes: 3f9efbbe57bc ("arm64: tegra: Add support for Jetson Xavier NX") Cc: stable@vger.kernel.org Signed-off-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Thierry Reding <treding@nvidia.com> --- arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi index a2893be80507..0dc8304a2edd 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi @@ -54,7 +54,7 @@ status = "okay"; }; - serial@c280000 { + serial@3100000 { status = "okay"; }; -- cgit v1.2.3-70-g09d2 From f24a2acc15bcc7bbd295f9759efc873b88fbe429 Mon Sep 17 00:00:00 2001 From: JC Kuo <jckuo@nvidia.com> Date: Thu, 19 Nov 2020 15:23:45 +0800 Subject: arm64: tegra: Fix USB_VBUS_EN0 regulator on Jetson TX1 USB host mode is broken on the OTG port of Jetson TX1 platform because the USB_VBUS_EN0 regulator (regulator@11) is being overwritten by the vdd-cam-1v2 regulator. This commit rearranges USB_VBUS_EN0 to be regulator@14. Fixes: 257c8047be44 ("arm64: tegra: jetson-tx1: Add camera supplies") Cc: stable@vger.kernel.org Signed-off-by: JC Kuo <jckuo@nvidia.com> Reviewed-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Thierry Reding <treding@nvidia.com> --- arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi index e18e1a9a3011..a9caaf7c0d67 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi @@ -1663,16 +1663,6 @@ vin-supply = <&vdd_5v0_sys>; }; - vdd_usb_vbus_otg: regulator@11 { - compatible = "regulator-fixed"; - regulator-name = "USB_VBUS_EN0"; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; - gpio = <&gpio TEGRA_GPIO(CC, 4) GPIO_ACTIVE_HIGH>; - enable-active-high; - vin-supply = <&vdd_5v0_sys>; - }; - vdd_hdmi: regulator@10 { compatible = "regulator-fixed"; regulator-name = "VDD_HDMI_5V0"; @@ -1712,4 +1702,14 @@ enable-active-high; vin-supply = <&vdd_3v3_sys>; }; + + vdd_usb_vbus_otg: regulator@14 { + compatible = "regulator-fixed"; + regulator-name = "USB_VBUS_EN0"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + gpio = <&gpio TEGRA_GPIO(CC, 4) GPIO_ACTIVE_HIGH>; + enable-active-high; + vin-supply = <&vdd_5v0_sys>; + }; }; -- cgit v1.2.3-70-g09d2 From 1741e18737948c140ccc4cc643e8126d95ee6e79 Mon Sep 17 00:00:00 2001 From: Dipen Patel <dipenp@nvidia.com> Date: Fri, 11 Sep 2020 19:26:45 -0700 Subject: arm64: tegra: Wrong AON HSP reg property size The AON HSP node's "reg" property size 0xa0000 will overlap with other resources. This patch fixes that wrong value with correct size 0x90000. Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com> Signed-off-by: Dipen Patel <dipenp@nvidia.com> Fixes: a38570c22e9d ("arm64: tegra: Add nodes for TCU on Tegra194") Signed-off-by: Thierry Reding <treding@nvidia.com> --- arch/arm64/boot/dts/nvidia/tegra194.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index e9c90f0f44ff..93438d2b9469 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -1161,7 +1161,7 @@ hsp_aon: hsp@c150000 { compatible = "nvidia,tegra194-hsp", "nvidia,tegra186-hsp"; - reg = <0x0c150000 0xa0000>; + reg = <0x0c150000 0x90000>; interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>, -- cgit v1.2.3-70-g09d2 From d98bccf10dd0f36cabee71a425381fce0908de3b Mon Sep 17 00:00:00 2001 From: Jon Hunter <jonathanh@nvidia.com> Date: Wed, 18 Nov 2020 16:04:58 +0000 Subject: arm64: tegra: Fix Tegra234 VDK node names When the device-tree board file was added for the Tegra234 VDK simulator it incorrectly used the names 'cbb' and 'sdhci' instead of 'bus' and 'mmc', respectively. The names 'bus' and 'mmc' are required by the device-tree json-schema validation tools. Therefore, fix this by renaming these nodes accordingly. Fixes: 639448912ba1 ("arm64: tegra: Initial Tegra234 VDK support") Reported-by: Ashish Singhal <ashishsingha@nvidia.com> Signed-off-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Thierry Reding <treding@nvidia.com> --- arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts b/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts index f6e6a24829af..b5d9a5526272 100644 --- a/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts +++ b/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts @@ -8,7 +8,7 @@ compatible = "nvidia,tegra234-vdk", "nvidia,tegra234"; aliases { - sdhci3 = "/cbb@0/sdhci@3460000"; + mmc3 = "/bus@0/mmc@3460000"; serial0 = &uarta; }; @@ -17,12 +17,12 @@ stdout-path = "serial0:115200n8"; }; - cbb@0 { + bus@0 { serial@3100000 { status = "okay"; }; - sdhci@3460000 { + mmc@3460000 { status = "okay"; bus-width = <8>; non-removable; -- cgit v1.2.3-70-g09d2 From e553fdc8105ac2ef3f321739da3908bb6673f7de Mon Sep 17 00:00:00 2001 From: Nathan Chancellor <natechancellor@gmail.com> Date: Sun, 8 Nov 2020 13:37:37 -0700 Subject: riscv: Explicitly specify the build id style in vDSO Makefile again Commit a96843372331 ("kbuild: explicitly specify the build id style") explicitly set the build ID style to SHA1. Commit c2c81bb2f691 ("RISC-V: Fix the VDSO symbol generaton for binutils-2.35+") undid this change, likely unintentionally. Restore it so that the build ID style stays consistent across the tree regardless of linker. Fixes: c2c81bb2f691 ("RISC-V: Fix the VDSO symbol generaton for binutils-2.35+") Signed-off-by: Nathan Chancellor <natechancellor@gmail.com> Reviewed-by: Nick Desaulniers <ndesaulniers@google.com> Reviewed-by: Bill Wendling <morbo@google.com> Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com> --- arch/riscv/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index cb8f9e4cfcbf..0cfd6da784f8 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -44,7 +44,7 @@ SYSCFLAGS_vdso.so.dbg = $(c_flags) $(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE $(call if_changed,vdsold) SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ - -Wl,--build-id -Wl,--hash-style=both + -Wl,--build-id=sha1 -Wl,--hash-style=both # We also create a special relocatable object that should mirror the symbol # table and layout of the linked DSO. With ld --just-symbols we can then -- cgit v1.2.3-70-g09d2 From 6134b110f97178d6919441a82dc91a7f3664b4e0 Mon Sep 17 00:00:00 2001 From: Anup Patel <anup.patel@wdc.com> Date: Fri, 6 Nov 2020 13:23:59 +0530 Subject: RISC-V: Add missing jump label initialization The jump_label_init() should be called from setup_arch() very early for proper functioning of jump label support. Fixes: ebc00dde8a97 ("riscv: Add jump-label implementation") Signed-off-by: Anup Patel <anup.patel@wdc.com> Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com> --- arch/riscv/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index c424cc6dd833..117f3212a8e4 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -75,6 +75,7 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; early_ioremap_setup(); + jump_label_init(); parse_early_param(); efi_init(); -- cgit v1.2.3-70-g09d2 From 30aca1bacb398dec6c1ed5eeca33f355bd7b6203 Mon Sep 17 00:00:00 2001 From: Randy Dunlap <rdunlap@infradead.org> Date: Mon, 16 Nov 2020 17:39:51 -0800 Subject: RISC-V: fix barrier() use in <vdso/processor.h> riscv's <vdso/processor.h> uses barrier() so it should include <asm/barrier.h> Fixes this build error: CC [M] drivers/net/ethernet/emulex/benet/be_main.o In file included from ./include/vdso/processor.h:10, from ./arch/riscv/include/asm/processor.h:11, from ./include/linux/prefetch.h:15, from drivers/net/ethernet/emulex/benet/be_main.c:14: ./arch/riscv/include/asm/vdso/processor.h: In function 'cpu_relax': ./arch/riscv/include/asm/vdso/processor.h:14:2: error: implicit declaration of function 'barrier' [-Werror=implicit-function-declaration] 14 | barrier(); This happens with a total of 5 networking drivers -- they all use <linux/prefetch.h>. rv64 allmodconfig now builds cleanly after this patch. Fixes fallout from: 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive") Fixes: ad5d1122b82f ("riscv: use vDSO common flow to reduce the latency of the time-related functions") Reported-by: Andreas Schwab <schwab@linux-m68k.org> Signed-off-by: Randy Dunlap <rdunlap@infradead.org> Acked-by: Arvind Sankar <nivedita@alum.mit.edu> Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com> Reviewed-by: Nick Desaulniers <ndesaulniers@google.com> Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com> --- arch/riscv/include/asm/vdso/processor.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/riscv/include/asm/vdso/processor.h b/arch/riscv/include/asm/vdso/processor.h index 82a5693b1861..134388cbaaa1 100644 --- a/arch/riscv/include/asm/vdso/processor.h +++ b/arch/riscv/include/asm/vdso/processor.h @@ -4,6 +4,8 @@ #ifndef __ASSEMBLY__ +#include <asm/barrier.h> + static inline void cpu_relax(void) { #ifdef __riscv_muldiv -- cgit v1.2.3-70-g09d2 From 33fc379df76b4991e5ae312f07bcd6820811971e Mon Sep 17 00:00:00 2001 From: Anand K Mistry <amistry@google.com> Date: Tue, 10 Nov 2020 12:33:53 +1100 Subject: x86/speculation: Fix prctl() when spectre_v2_user={seccomp,prctl},ibpb When spectre_v2_user={seccomp,prctl},ibpb is specified on the command line, IBPB is force-enabled and STIPB is conditionally-enabled (or not available). However, since 21998a351512 ("x86/speculation: Avoid force-disabling IBPB based on STIBP and enhanced IBRS.") the spectre_v2_user_ibpb variable is set to SPECTRE_V2_USER_{PRCTL,SECCOMP} instead of SPECTRE_V2_USER_STRICT, which is the actual behaviour. Because the issuing of IBPB relies on the switch_mm_*_ibpb static branches, the mitigations behave as expected. Since 1978b3a53a74 ("x86/speculation: Allow IBPB to be conditionally enabled on CPUs with always-on STIBP") this discrepency caused the misreporting of IB speculation via prctl(). On CPUs with STIBP always-on and spectre_v2_user=seccomp,ibpb, prctl(PR_GET_SPECULATION_CTRL) would return PR_SPEC_PRCTL | PR_SPEC_ENABLE instead of PR_SPEC_DISABLE since both IBPB and STIPB are always on. It also allowed prctl(PR_SET_SPECULATION_CTRL) to set the IB speculation mode, even though the flag is ignored. Similarly, for CPUs without SMT, prctl(PR_GET_SPECULATION_CTRL) should also return PR_SPEC_DISABLE since IBPB is always on and STIBP is not available. [ bp: Massage commit message. ] Fixes: 21998a351512 ("x86/speculation: Avoid force-disabling IBPB based on STIBP and enhanced IBRS.") Fixes: 1978b3a53a74 ("x86/speculation: Allow IBPB to be conditionally enabled on CPUs with always-on STIBP") Signed-off-by: Anand K Mistry <amistry@google.com> Signed-off-by: Borislav Petkov <bp@suse.de> Cc: <stable@vger.kernel.org> Link: https://lkml.kernel.org/r/20201110123349.1.Id0cbf996d2151f4c143c90f9028651a5b49a5908@changeid --- arch/x86/kernel/cpu/bugs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 581fb7223ad0..d41b70fe4918 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -739,11 +739,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); + spectre_v2_user_ibpb = mode; switch (cmd) { case SPECTRE_V2_USER_CMD_FORCE: case SPECTRE_V2_USER_CMD_PRCTL_IBPB: case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: static_branch_enable(&switch_mm_always_ibpb); + spectre_v2_user_ibpb = SPECTRE_V2_USER_STRICT; break; case SPECTRE_V2_USER_CMD_PRCTL: case SPECTRE_V2_USER_CMD_AUTO: @@ -757,8 +759,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", static_key_enabled(&switch_mm_always_ibpb) ? "always-on" : "conditional"); - - spectre_v2_user_ibpb = mode; } /* -- cgit v1.2.3-70-g09d2 From 5844cc25fd121074de7895181a2fa1ce100a0fdd Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin@gmail.com> Date: Thu, 26 Nov 2020 20:25:27 +1000 Subject: powerpc/64s: Fix hash ISA v3.0 TLBIEL instruction generation A typo has the R field of the instruction assigned by lucky dip a la register allocator. Fixes: d4748276ae14c ("powerpc/64s: Improve local TLB flush for boot and MCE on POWER9") Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20201126102530.691335-2-npiggin@gmail.com --- arch/powerpc/mm/book3s64/hash_native.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c index 0203cdf48c54..97fa42d7027e 100644 --- a/arch/powerpc/mm/book3s64/hash_native.c +++ b/arch/powerpc/mm/book3s64/hash_native.c @@ -68,7 +68,7 @@ static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned in rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) - : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r) + : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r) : "memory"); } -- cgit v1.2.3-70-g09d2 From c0b27c517acf8a2b359dd373a7e7e88b01a8308e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin@gmail.com> Date: Thu, 26 Nov 2020 20:25:28 +1000 Subject: powerpc/64s/pseries: Fix hash tlbiel_all_isa300 for guest kernels tlbiel_all() can not be usable in !HVMODE when running hash presently, remove HV privileged flushes when running in guest to make it usable. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20201126102530.691335-3-npiggin@gmail.com --- arch/powerpc/mm/book3s64/hash_native.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c index 97fa42d7027e..52e170bd95ae 100644 --- a/arch/powerpc/mm/book3s64/hash_native.c +++ b/arch/powerpc/mm/book3s64/hash_native.c @@ -92,16 +92,15 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) asm volatile("ptesync": : :"memory"); /* - * Flush the first set of the TLB, and any caching of partition table - * entries. Then flush the remaining sets of the TLB. Hash mode uses - * partition scoped TLB translations. + * Flush the partition table cache if this is HV mode. */ - tlbiel_hash_set_isa300(0, is, 0, 2, 0); - for (set = 1; set < num_sets; set++) - tlbiel_hash_set_isa300(set, is, 0, 0, 0); + if (early_cpu_has_feature(CPU_FTR_HVMODE)) + tlbiel_hash_set_isa300(0, is, 0, 2, 0); /* - * Now invalidate the process table cache. + * Now invalidate the process table cache. UPRT=0 HPT modes (what + * current hardware implements) do not use the process table, but + * add the flushes anyway. * * From ISA v3.0B p. 1078: * The following forms are invalid. @@ -110,6 +109,14 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) */ tlbiel_hash_set_isa300(0, is, 0, 2, 1); + /* + * Then flush the sets of the TLB proper. Hash mode uses + * partition scoped TLB translations, which may be flushed + * in !HV mode. + */ + for (set = 0; set < num_sets; set++) + tlbiel_hash_set_isa300(set, is, 0, 0, 0); + ppc_after_tlbiel_barrier(); asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); -- cgit v1.2.3-70-g09d2 From 01b0f0eae0812e80efeee4ee17687e5386335e08 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin@gmail.com> Date: Thu, 26 Nov 2020 20:25:30 +1000 Subject: powerpc/64s: Trim offlined CPUs from mm_cpumasks When offlining a CPU, powerpc/64s does not flush TLBs, rather it just leaves the CPU set in mm_cpumasks, so it continues to receive TLBIEs to manage its TLBs. However the exit_flush_lazy_tlbs() function expects that after returning, all CPUs (except self) have flushed TLBs for that mm, in which case TLBIEL can be used for this flush. This breaks for offline CPUs because they don't get the IPI to flush their TLB. This can lead to stale translations. Fix this by clearing the CPU from mm_cpumasks, then flushing all TLBs before going offline. These offlined CPU bits stuck in the cpumask also prevents the cpumask from being trimmed back to local mode, which means continual broadcast IPIs or TLBIEs are needed for TLB flushing. This patch prevents that situation too. A cast of many were involved in working this out, but in particular Milton, Aneesh, Paul made key discoveries. Fixes: 0cef77c7798a7 ("powerpc/64s/radix: flush remote CPUs out of single-threaded mm_cpumask") Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Debugged-by: Milton Miller <miltonm@us.ibm.com> Debugged-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Debugged-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20201126102530.691335-5-npiggin@gmail.com --- arch/powerpc/include/asm/book3s/64/mmu.h | 12 ++++++++++++ arch/powerpc/mm/book3s64/mmu_context.c | 20 ++++++++++++++++++++ arch/powerpc/platforms/powermac/smp.c | 2 ++ arch/powerpc/platforms/powernv/smp.c | 3 +++ arch/powerpc/platforms/pseries/hotplug-cpu.c | 3 +++ 5 files changed, 40 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index e0b52940e43c..750918451dd2 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -242,6 +242,18 @@ extern void radix_init_pseries(void); static inline void radix_init_pseries(void) { }; #endif +#ifdef CONFIG_HOTPLUG_CPU +#define arch_clear_mm_cpumask_cpu(cpu, mm) \ + do { \ + if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { \ + atomic_dec(&(mm)->context.active_cpus); \ + cpumask_clear_cpu(cpu, mm_cpumask(mm)); \ + } \ + } while (0) + +void cleanup_cpu_mmu_context(void); +#endif + static inline int get_user_context(mm_context_t *ctx, unsigned long ea) { int index = ea >> MAX_EA_BITS_PER_CONTEXT; diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c index 1c54821de7bf..0c8557220ae2 100644 --- a/arch/powerpc/mm/book3s64/mmu_context.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -17,6 +17,7 @@ #include <linux/export.h> #include <linux/gfp.h> #include <linux/slab.h> +#include <linux/cpu.h> #include <asm/mmu_context.h> #include <asm/pgalloc.h> @@ -307,3 +308,22 @@ void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) isync(); } #endif + +/** + * cleanup_cpu_mmu_context - Clean up MMU details for this CPU (newly offlined) + * + * This clears the CPU from mm_cpumask for all processes, and then flushes the + * local TLB to ensure TLB coherency in case the CPU is onlined again. + * + * KVM guest translations are not necessarily flushed here. If KVM started + * using mm_cpumask or the Linux APIs which do, this would have to be resolved. + */ +#ifdef CONFIG_HOTPLUG_CPU +void cleanup_cpu_mmu_context(void) +{ + int cpu = smp_processor_id(); + + clear_tasks_mm_cpumask(cpu); + tlbiel_all(); +} +#endif diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 74ebe664b016..adae2a6712e1 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -911,6 +911,8 @@ static int smp_core99_cpu_disable(void) mpic_cpu_set_priority(0xf); + cleanup_cpu_mmu_context(); + return 0; } diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 54c4ba45c7ce..cbb67813cd5d 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -143,6 +143,9 @@ static int pnv_smp_cpu_disable(void) xive_smp_disable_cpu(); else xics_migrate_irqs_away(); + + cleanup_cpu_mmu_context(); + return 0; } diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index f2837e33bf5d..a02012f1b04a 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -90,6 +90,9 @@ static int pseries_cpu_disable(void) xive_smp_disable_cpu(); else xics_migrate_irqs_away(); + + cleanup_cpu_mmu_context(); + return 0; } -- cgit v1.2.3-70-g09d2 From 10f78fd0dabbc3856ddd67b09a46abdedb045913 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Date: Fri, 27 Nov 2020 11:07:38 +0530 Subject: powerpc/numa: Fix a regression on memoryless node 0 Commit e75130f20b1f ("powerpc/numa: Offline memoryless cpuless node 0") offlines node 0 and expects nodes to be subsequently onlined when CPUs or nodes are detected. Commit 6398eaa26816 ("powerpc/numa: Prefer node id queried from vphn") skips onlining node 0 when CPUs are associated with node 0. On systems with node 0 having CPUs but no memory, this causes node 0 be marked offline. This causes issues at boot time when trying to set memory node for online CPUs while building the zonelist. 0:mon> t [link register ] c000000000400354 __build_all_zonelists+0x164/0x280 [c00000000161bda0] c0000000016533c8 node_states+0x20/0xa0 (unreliable) [c00000000161bdc0] c000000000400384 __build_all_zonelists+0x194/0x280 [c00000000161be30] c000000001041800 build_all_zonelists_init+0x4c/0x118 [c00000000161be80] c0000000004020d0 build_all_zonelists+0x190/0x1b0 [c00000000161bef0] c000000001003cf8 start_kernel+0x18c/0x6a8 [c00000000161bf90] c00000000000adb4 start_here_common+0x1c/0x3e8 0:mon> r R00 = c000000000400354 R16 = 000000000b57a0e8 R01 = c00000000161bda0 R17 = 000000000b57a6b0 R02 = c00000000161ce00 R18 = 000000000b5afee8 R03 = 0000000000000000 R19 = 000000000b6448a0 R04 = 0000000000000000 R20 = fffffffffffffffd R05 = 0000000000000000 R21 = 0000000001400000 R06 = 0000000000000000 R22 = 000000001ec00000 R07 = 0000000000000001 R23 = c000000001175580 R08 = 0000000000000000 R24 = c000000001651ed8 R09 = c0000000017e84d8 R25 = c000000001652480 R10 = 0000000000000000 R26 = c000000001175584 R11 = c000000c7fac0d10 R27 = c0000000019568d0 R12 = c000000000400180 R28 = 0000000000000000 R13 = c000000002200000 R29 = c00000000164dd78 R14 = 000000000b579f78 R30 = 0000000000000000 R15 = 000000000b57a2b8 R31 = c000000001175584 pc = c000000000400194 local_memory_node+0x24/0x80 cfar= c000000000074334 mcount+0xc/0x10 lr = c000000000400354 __build_all_zonelists+0x164/0x280 msr = 8000000002001033 cr = 44002284 ctr = c000000000400180 xer = 0000000000000001 trap = 380 dar = 0000000000001388 dsisr = c00000000161bc90 0:mon> Fix this by setting node to be online while onlining CPUs that belong to node 0. Fixes: e75130f20b1f ("powerpc/numa: Offline memoryless cpuless node 0") Fixes: 6398eaa26816 ("powerpc/numa: Prefer node id queried from vphn") Reported-by: Milan Mohanty <milmohan@in.ibm.com> Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20201127053738.10085-1-srikar@linux.vnet.ibm.com --- arch/powerpc/mm/numa.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 63f61d8b55e5..f2bf98bdcea2 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -742,8 +742,7 @@ static int __init parse_numa_properties(void) of_node_put(cpu); } - if (likely(nid > 0)) - node_set_online(nid); + node_set_online(nid); } get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); -- cgit v1.2.3-70-g09d2 From 72c3bcdcda494cbd600712a32e67702cdee60c07 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini@redhat.com> Date: Fri, 27 Nov 2020 08:53:52 +0100 Subject: KVM: x86: handle !lapic_in_kernel case in kvm_cpu_*_extint Centralize handling of interrupts from the userspace APIC in kvm_cpu_has_extint and kvm_cpu_get_extint, since userspace APIC interrupts are handled more or less the same as ExtINTs are with split irqchip. This removes duplicated code from kvm_cpu_has_injectable_intr and kvm_cpu_has_interrupt, and makes the code more similar between kvm_cpu_has_{extint,interrupt} on one side and kvm_cpu_get_{extint,interrupt} on the other. Cc: stable@vger.kernel.org Reviewed-by: Filippo Sironi <sironi@amazon.de> Reviewed-by: David Woodhouse <dwmw@amazon.co.uk> Tested-by: David Woodhouse <dwmw@amazon.co.uk> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- arch/x86/kvm/irq.c | 83 +++++++++++++++++++++------------------------------- arch/x86/kvm/lapic.c | 2 +- 2 files changed, 34 insertions(+), 51 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 99d118ffc67d..ee94671272e3 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -41,28 +41,9 @@ static int pending_userspace_extint(struct kvm_vcpu *v) * non-APIC source without intack. */ static int kvm_cpu_has_extint(struct kvm_vcpu *v) -{ - u8 accept = kvm_apic_accept_pic_intr(v); - - if (accept) { - if (irqchip_split(v->kvm)) - return pending_userspace_extint(v); - else - return v->kvm->arch.vpic->output; - } else - return 0; -} - -/* - * check if there is injectable interrupt: - * when virtual interrupt delivery enabled, - * interrupt from apic will handled by hardware, - * we don't need to check it here. - */ -int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) { /* - * FIXME: interrupt.injected represents an interrupt that it's + * FIXME: interrupt.injected represents an interrupt whose * side-effects have already been applied (e.g. bit from IRR * already moved to ISR). Therefore, it is incorrect to rely * on interrupt.injected to know if there is a pending @@ -75,6 +56,23 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) if (!lapic_in_kernel(v)) return v->arch.interrupt.injected; + if (!kvm_apic_accept_pic_intr(v)) + return 0; + + if (irqchip_split(v->kvm)) + return pending_userspace_extint(v); + else + return v->kvm->arch.vpic->output; +} + +/* + * check if there is injectable interrupt: + * when virtual interrupt delivery enabled, + * interrupt from apic will handled by hardware, + * we don't need to check it here. + */ +int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) +{ if (kvm_cpu_has_extint(v)) return 1; @@ -91,20 +89,6 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_injectable_intr); */ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) { - /* - * FIXME: interrupt.injected represents an interrupt that it's - * side-effects have already been applied (e.g. bit from IRR - * already moved to ISR). Therefore, it is incorrect to rely - * on interrupt.injected to know if there is a pending - * interrupt in the user-mode LAPIC. - * This leads to nVMX/nSVM not be able to distinguish - * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on - * pending interrupt or should re-inject an injected - * interrupt. - */ - if (!lapic_in_kernel(v)) - return v->arch.interrupt.injected; - if (kvm_cpu_has_extint(v)) return 1; @@ -118,16 +102,21 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); */ static int kvm_cpu_get_extint(struct kvm_vcpu *v) { - if (kvm_cpu_has_extint(v)) { - if (irqchip_split(v->kvm)) { - int vector = v->arch.pending_external_vector; - - v->arch.pending_external_vector = -1; - return vector; - } else - return kvm_pic_read_irq(v->kvm); /* PIC */ - } else + if (!kvm_cpu_has_extint(v)) { + WARN_ON(!lapic_in_kernel(v)); return -1; + } + + if (!lapic_in_kernel(v)) + return v->arch.interrupt.nr; + + if (irqchip_split(v->kvm)) { + int vector = v->arch.pending_external_vector; + + v->arch.pending_external_vector = -1; + return vector; + } else + return kvm_pic_read_irq(v->kvm); /* PIC */ } /* @@ -135,13 +124,7 @@ static int kvm_cpu_get_extint(struct kvm_vcpu *v) */ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) { - int vector; - - if (!lapic_in_kernel(v)) - return v->arch.interrupt.nr; - - vector = kvm_cpu_get_extint(v); - + int vector = kvm_cpu_get_extint(v); if (vector != -1) return vector; /* PIC */ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 105e7859d1f2..86c33d53c90a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2465,7 +2465,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; u32 ppr; - if (!kvm_apic_hw_enabled(apic)) + if (!kvm_apic_present(vcpu)) return -1; __apic_update_ppr(apic, &ppr); -- cgit v1.2.3-70-g09d2 From 71cc849b7093bb83af966c0e60cb11b7f35cd746 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini <pbonzini@redhat.com> Date: Fri, 27 Nov 2020 09:18:20 +0100 Subject: KVM: x86: Fix split-irqchip vs interrupt injection window request kvm_cpu_accept_dm_intr and kvm_vcpu_ready_for_interrupt_injection are a hodge-podge of conditions, hacked together to get something that more or less works. But what is actually needed is much simpler; in both cases the fundamental question is, do we have a place to stash an interrupt if userspace does KVM_INTERRUPT? In userspace irqchip mode, that is !vcpu->arch.interrupt.injected. Currently kvm_event_needs_reinjection(vcpu) covers it, but it is unnecessarily restrictive. In split irqchip mode it's a bit more complicated, we need to check kvm_apic_accept_pic_intr(vcpu) (the IRQ window exit is basically an INTACK cycle and thus requires ExtINTs not to be masked) as well as !pending_userspace_extint(vcpu). However, there is no need to check kvm_event_needs_reinjection(vcpu), since split irqchip keeps pending ExtINT state separate from event injection state, and checking kvm_cpu_has_interrupt(vcpu) is wrong too since ExtINT has higher priority than APIC interrupts. In fact the latter fixes a bug: when userspace requests an IRQ window vmexit, an interrupt in the local APIC can cause kvm_cpu_has_interrupt() to be true and thus kvm_vcpu_ready_for_interrupt_injection() to return false. When this happens, vcpu_run does not exit to userspace but the interrupt window vmexits keep occurring. The VM loops without any hope of making progress. Once we try to fix these with something like return kvm_arch_interrupt_allowed(vcpu) && - !kvm_cpu_has_interrupt(vcpu) && - !kvm_event_needs_reinjection(vcpu) && - kvm_cpu_accept_dm_intr(vcpu); + (!lapic_in_kernel(vcpu) + ? !vcpu->arch.interrupt.injected + : (kvm_apic_accept_pic_intr(vcpu) + && !pending_userspace_extint(v))); we realize two things. First, thanks to the previous patch the complex conditional can reuse !kvm_cpu_has_extint(vcpu). Second, the interrupt window request in vcpu_enter_guest() bool req_int_win = dm_request_for_irq_injection(vcpu) && kvm_cpu_accept_dm_intr(vcpu); should be kept in sync with kvm_vcpu_ready_for_interrupt_injection(): it is unnecessary to ask the processor for an interrupt window if we would not be able to return to userspace. Therefore, kvm_cpu_accept_dm_intr(vcpu) is basically !kvm_cpu_has_extint(vcpu) ANDed with the existing check for masked ExtINT. It all makes sense: - we can accept an interrupt from userspace if there is a place to stash it (and, for irqchip split, ExtINTs are not masked). Interrupts from userspace _can_ be accepted even if right now EFLAGS.IF=0. - in order to tell userspace we will inject its interrupt ("IRQ window open" i.e. kvm_vcpu_ready_for_interrupt_injection), both KVM and the vCPU need to be ready to accept the interrupt. ... and this is what the patch implements. Reported-by: David Woodhouse <dwmw@amazon.co.uk> Analyzed-by: David Woodhouse <dwmw@amazon.co.uk> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Reviewed-by: Nikos Tsironis <ntsironis@arrikto.com> Reviewed-by: David Woodhouse <dwmw@amazon.co.uk> Tested-by: David Woodhouse <dwmw@amazon.co.uk> --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/irq.c | 2 +- arch/x86/kvm/x86.c | 18 ++++++++++-------- 3 files changed, 12 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 324ddd7fd0aa..7e5f33a0d0e2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1656,6 +1656,7 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); +int kvm_cpu_has_extint(struct kvm_vcpu *v); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index ee94671272e3..814698e5b152 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -40,7 +40,7 @@ static int pending_userspace_extint(struct kvm_vcpu *v) * check if there is pending interrupt from * non-APIC source without intack. */ -static int kvm_cpu_has_extint(struct kvm_vcpu *v) +int kvm_cpu_has_extint(struct kvm_vcpu *v) { /* * FIXME: interrupt.injected represents an interrupt whose diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 078a39d489fe..e545a8a613b1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4051,21 +4051,23 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu) { + /* + * We can accept userspace's request for interrupt injection + * as long as we have a place to store the interrupt number. + * The actual injection will happen when the CPU is able to + * deliver the interrupt. + */ + if (kvm_cpu_has_extint(vcpu)) + return false; + + /* Acknowledging ExtINT does not happen if LINT0 is masked. */ return (!lapic_in_kernel(vcpu) || kvm_apic_accept_pic_intr(vcpu)); } -/* - * if userspace requested an interrupt window, check that the - * interrupt window is open. - * - * No need to exit to userspace if we already have an interrupt queued. - */ static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) { return kvm_arch_interrupt_allowed(vcpu) && - !kvm_cpu_has_interrupt(vcpu) && - !kvm_event_needs_reinjection(vcpu) && kvm_cpu_accept_dm_intr(vcpu); } -- cgit v1.2.3-70-g09d2 From 9a2a0d3ca163fc645991804b8b032f7d59326bb5 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov <vkuznets@redhat.com> Date: Thu, 26 Nov 2020 12:02:06 +0100 Subject: kvm: x86/mmu: Fix get_mmio_spte() on CPUs supporting 5-level PT Commit 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU") caused the following WARNING on an Intel Ice Lake CPU: get_mmio_spte: detect reserved bits on spte, addr 0xb80a0, dump hierarchy: ------ spte 0xb80a0 level 5. ------ spte 0xfcd210107 level 4. ------ spte 0x1004c40107 level 3. ------ spte 0x1004c41107 level 2. ------ spte 0x1db00000000b83b6 level 1. WARNING: CPU: 109 PID: 10254 at arch/x86/kvm/mmu/mmu.c:3569 kvm_mmu_page_fault.cold.150+0x54/0x22f [kvm] ... Call Trace: ? kvm_io_bus_get_first_dev+0x55/0x110 [kvm] vcpu_enter_guest+0xaa1/0x16a0 [kvm] ? vmx_get_cs_db_l_bits+0x17/0x30 [kvm_intel] ? skip_emulated_instruction+0xaa/0x150 [kvm_intel] kvm_arch_vcpu_ioctl_run+0xca/0x520 [kvm] The guest triggering this crashes. Note, this happens with the traditional MMU and EPT enabled, not with the newly introduced TDP MMU. Turns out, there was a subtle change in the above mentioned commit. Previously, walk_shadow_page_get_mmio_spte() was setting 'root' to 'iterator.level' which is returned by shadow_walk_init() and this equals to 'vcpu->arch.mmu->shadow_root_level'. Now, get_mmio_spte() sets it to 'int root = vcpu->arch.mmu->root_level'. The difference between 'root_level' and 'shadow_root_level' on CPUs supporting 5-level page tables is that in some case we don't want to use 5-level, in particular when 'cpuid_maxphyaddr(vcpu) <= 48' kvm_mmu_get_tdp_level() returns '4'. In case upper layer is not used, the corresponding SPTE will fail '__is_rsvd_bits_set()' check. Revert to using 'shadow_root_level'. Fixes: 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU") Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Message-Id: <20201126110206.2118959-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- arch/x86/kvm/mmu/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 5bb1939b65d8..7a6ae9e90bd7 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3517,7 +3517,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { u64 sptes[PT64_ROOT_MAX_LEVEL]; struct rsvd_bits_validate *rsvd_check; - int root = vcpu->arch.mmu->root_level; + int root = vcpu->arch.mmu->shadow_root_level; int leaf; int level; bool reserved = false; -- cgit v1.2.3-70-g09d2 From 25bc65d8ddfc17cc1d7a45bd48e9bdc0e729ced3 Mon Sep 17 00:00:00 2001 From: Gabriele Paoloni <gabriele.paoloni@intel.com> Date: Fri, 27 Nov 2020 16:18:15 +0000 Subject: x86/mce: Do not overwrite no_way_out if mce_end() fails Currently, if mce_end() fails, no_way_out - the variable denoting whether the machine can recover from this MCE - is determined by whether the worst severity that was found across the MCA banks associated with the current CPU, is of panic severity. However, at this point no_way_out could have been already set by mca_start() after looking at all severities of all CPUs that entered the MCE handler. If mce_end() fails, check first if no_way_out is already set and, if so, stick to it, otherwise use the local worst value. [ bp: Massage. ] Signed-off-by: Gabriele Paoloni <gabriele.paoloni@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Tony Luck <tony.luck@intel.com> Cc: <stable@vger.kernel.org> Link: https://lkml.kernel.org/r/20201127161819.3106432-2-gabriele.paoloni@intel.com --- arch/x86/kernel/cpu/mce/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 4102b866e7c0..32b7099e3511 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1384,8 +1384,10 @@ noinstr void do_machine_check(struct pt_regs *regs) * When there's any problem use only local no_way_out state. */ if (!lmce) { - if (mce_end(order) < 0) - no_way_out = worst >= MCE_PANIC_SEVERITY; + if (mce_end(order) < 0) { + if (!no_way_out) + no_way_out = worst >= MCE_PANIC_SEVERITY; + } } else { /* * If there was a fatal machine check we should have -- cgit v1.2.3-70-g09d2 From 9ea69a55b3b9a71cded9726af591949c1138f235 Mon Sep 17 00:00:00 2001 From: Laurent Vivier <lvivier@redhat.com> Date: Thu, 26 Nov 2020 09:28:52 +0100 Subject: powerpc/pseries: Pass MSI affinity to irq_create_mapping() With virtio multiqueue, normally each queue IRQ is mapped to a CPU. Commit 0d9f0a52c8b9f ("virtio_scsi: use virtio IRQ affinity") exposed an existing shortcoming of the arch code by moving virtio_scsi to the automatic IRQ affinity assignment. The affinity is correctly computed in msi_desc but this is not applied to the system IRQs. It appears the affinity is correctly passed to rtas_setup_msi_irqs() but lost at this point and never passed to irq_domain_alloc_descs() (see commit 06ee6d571f0e ("genirq: Add affinity hint to irq allocation")) because irq_create_mapping() doesn't take an affinity parameter. Use the new irq_create_mapping_affinity() function, which allows to forward the affinity setting from rtas_setup_msi_irqs() to irq_domain_alloc_descs(). With this change, the virtqueues are correctly dispatched between the CPUs on pseries. Fixes: e75eafb9b039 ("genirq/msi: Switch to new irq spreading infrastructure") Signed-off-by: Laurent Vivier <lvivier@redhat.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Greg Kurz <groug@kaod.org> Acked-by: Michael Ellerman <mpe@ellerman.id.au> Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201126082852.1178497-3-lvivier@redhat.com --- arch/powerpc/platforms/pseries/msi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 133f6adcb39c..b3ac2455faad 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -458,7 +458,8 @@ again: return hwirq; } - virq = irq_create_mapping(NULL, hwirq); + virq = irq_create_mapping_affinity(NULL, hwirq, + entry->affinity); if (!virq) { pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq); -- cgit v1.2.3-70-g09d2 From ca1314d73eed493c49bb1932c60a8605530db2e4 Mon Sep 17 00:00:00 2001 From: Mark Rutland <mark.rutland@arm.com> Date: Mon, 30 Nov 2020 11:59:40 +0000 Subject: arm64: syscall: exit userspace before unmasking exceptions In el0_svc_common() we unmask exceptions before we call user_exit(), and so there's a window where an IRQ or debug exception can be taken while RCU is not watching. In do_debug_exception() we account for this in via debug_exception_{enter,exit}(), but in the el1_irq asm we do not and we call trace functions which rely on RCU before we have a guarantee that RCU is watching. Let's avoid this by having el0_svc_common() exit userspace before unmasking exceptions, matching what we do for all other EL0 entry paths. We can use user_exit_irqoff() to avoid the pointless save/restore of IRQ flags while we're sure exceptions are masked in DAIF. The workaround for Cortex-A76 erratum 1463225 may trigger a debug exception before this point, but the debug code invoked in this case is safe even when RCU is not watching. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: James Morse <james.morse@arm.com> Cc: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/r/20201130115950.22492-2-mark.rutland@arm.com Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/kernel/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index e4c0dadf0d92..13fe79f8e2db 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -120,8 +120,8 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, */ cortex_a76_erratum_1463225_svc_handler(); + user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); - user_exit(); if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) { /* -- cgit v1.2.3-70-g09d2 From 114e0a684753516ef4b71ccb55a8ebcfa8735edb Mon Sep 17 00:00:00 2001 From: Mark Rutland <mark.rutland@arm.com> Date: Mon, 30 Nov 2020 11:59:41 +0000 Subject: arm64: mark idle code as noinstr Core code disables RCU when calling arch_cpu_idle(), so it's not safe for arch_cpu_idle() or its calees to be instrumented, as the instrumentation callbacks may attempt to use RCU or other features which are unsafe to use in this context. Mark them noinstr to prevent issues. The use of local_irq_enable() in arch_cpu_idle() is similarly problematic, and the "sched/idle: Fix arch_cpu_idle() vs tracing" patch queued in the tip tree addresses that case. Reported-by: Marco Elver <elver@google.com> Signed-off-by: Mark Rutland <mark.rutland@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: James Morse <james.morse@arm.com> Cc: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/r/20201130115950.22492-3-mark.rutland@arm.com Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/kernel/process.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index a47a40ec6ad9..0c65a91f8f53 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -72,13 +72,13 @@ EXPORT_SYMBOL_GPL(pm_power_off); void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); -static void __cpu_do_idle(void) +static void noinstr __cpu_do_idle(void) { dsb(sy); wfi(); } -static void __cpu_do_idle_irqprio(void) +static void noinstr __cpu_do_idle_irqprio(void) { unsigned long pmr; unsigned long daif_bits; @@ -108,7 +108,7 @@ static void __cpu_do_idle_irqprio(void) * ensure that interrupts are not masked at the PMR (because the core will * not wake up if we block the wake up signal in the interrupt controller). */ -void cpu_do_idle(void) +void noinstr cpu_do_idle(void) { if (system_uses_irq_prio_masking()) __cpu_do_idle_irqprio(); @@ -119,7 +119,7 @@ void cpu_do_idle(void) /* * This is our default idle handler. */ -void arch_cpu_idle(void) +void noinstr arch_cpu_idle(void) { /* * This should do all the clock switching and wait for interrupt -- cgit v1.2.3-70-g09d2 From da192676483232a0a9478c89cdddd412e5167470 Mon Sep 17 00:00:00 2001 From: Mark Rutland <mark.rutland@arm.com> Date: Mon, 30 Nov 2020 11:59:42 +0000 Subject: arm64: entry: mark entry code as noinstr Functions in entry-common.c are marked as notrace and NOKPROBE_SYMBOL(), but they're still subject to other instrumentation which may rely on lockdep/rcu/context-tracking being up-to-date, and may cause nested exceptions (e.g. for WARN/BUG or KASAN's use of BRK) which will corrupt exceptions registers which have not yet been read. Prevent this by marking all functions in entry-common.c as noinstr to prevent compiler instrumentation. This also blacklists the functions for tracing and kprobes, so we don't need to handle that separately. Functions elsewhere will be dealt with in subsequent patches. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: James Morse <james.morse@arm.com> Cc: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/r/20201130115950.22492-4-mark.rutland@arm.com Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/kernel/entry-common.c | 75 ++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 50 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 43d4c329775f..75e99161f79e 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -17,7 +17,7 @@ #include <asm/mmu.h> #include <asm/sysreg.h> -static void notrace el1_abort(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -25,32 +25,28 @@ static void notrace el1_abort(struct pt_regs *regs, unsigned long esr) far = untagged_addr(far); do_mem_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el1_abort); -static void notrace el1_pc(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); local_daif_inherit(regs); do_sp_pc_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el1_pc); -static void notrace el1_undef(struct pt_regs *regs) +static void noinstr el1_undef(struct pt_regs *regs) { local_daif_inherit(regs); do_undefinstr(regs); } -NOKPROBE_SYMBOL(el1_undef); -static void notrace el1_inv(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr) { local_daif_inherit(regs); bad_mode(regs, 0, esr); } -NOKPROBE_SYMBOL(el1_inv); -static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -64,16 +60,14 @@ static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr) do_debug_exception(far, esr, regs); } -NOKPROBE_SYMBOL(el1_dbg); -static void notrace el1_fpac(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr) { local_daif_inherit(regs); do_ptrauth_fault(regs, esr); } -NOKPROBE_SYMBOL(el1_fpac); -asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) +asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -106,9 +100,8 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) el1_inv(regs, esr); } } -NOKPROBE_SYMBOL(el1_sync_handler); -static void notrace el0_da(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_da(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -117,9 +110,8 @@ static void notrace el0_da(struct pt_regs *regs, unsigned long esr) far = untagged_addr(far); do_mem_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el0_da); -static void notrace el0_ia(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -135,41 +127,36 @@ static void notrace el0_ia(struct pt_regs *regs, unsigned long esr) local_daif_restore(DAIF_PROCCTX); do_mem_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el0_ia); -static void notrace el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_fpsimd_acc(esr, regs); } -NOKPROBE_SYMBOL(el0_fpsimd_acc); -static void notrace el0_sve_acc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_sve_acc(esr, regs); } -NOKPROBE_SYMBOL(el0_sve_acc); -static void notrace el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_fpsimd_exc(esr, regs); } -NOKPROBE_SYMBOL(el0_fpsimd_exc); -static void notrace el0_sys(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_sysinstr(esr, regs); } -NOKPROBE_SYMBOL(el0_sys); -static void notrace el0_pc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -180,41 +167,36 @@ static void notrace el0_pc(struct pt_regs *regs, unsigned long esr) local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el0_pc); -static void notrace el0_sp(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(regs->sp, esr, regs); } -NOKPROBE_SYMBOL(el0_sp); -static void notrace el0_undef(struct pt_regs *regs) +static void noinstr el0_undef(struct pt_regs *regs) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_undefinstr(regs); } -NOKPROBE_SYMBOL(el0_undef); -static void notrace el0_bti(struct pt_regs *regs) +static void noinstr el0_bti(struct pt_regs *regs) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_bti(regs); } -NOKPROBE_SYMBOL(el0_bti); -static void notrace el0_inv(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); bad_el0_sync(regs, 0, esr); } -NOKPROBE_SYMBOL(el0_inv); -static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr) { /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */ unsigned long far = read_sysreg(far_el1); @@ -226,26 +208,23 @@ static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr) do_debug_exception(far, esr, regs); local_daif_restore(DAIF_PROCCTX_NOIRQ); } -NOKPROBE_SYMBOL(el0_dbg); -static void notrace el0_svc(struct pt_regs *regs) +static void noinstr el0_svc(struct pt_regs *regs) { if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); do_el0_svc(regs); } -NOKPROBE_SYMBOL(el0_svc); -static void notrace el0_fpac(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_ptrauth_fault(regs, esr); } -NOKPROBE_SYMBOL(el0_fpac); -asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) +asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -297,27 +276,24 @@ asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) el0_inv(regs, esr); } } -NOKPROBE_SYMBOL(el0_sync_handler); #ifdef CONFIG_COMPAT -static void notrace el0_cp15(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); do_cp15instr(esr, regs); } -NOKPROBE_SYMBOL(el0_cp15); -static void notrace el0_svc_compat(struct pt_regs *regs) +static void noinstr el0_svc_compat(struct pt_regs *regs) { if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); do_el0_svc_compat(regs); } -NOKPROBE_SYMBOL(el0_svc_compat); -asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs) +asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -360,5 +336,4 @@ asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs) el0_inv(regs, esr); } } -NOKPROBE_SYMBOL(el0_sync_compat_handler); #endif /* CONFIG_COMPAT */ -- cgit v1.2.3-70-g09d2 From 2f911d494f3f028bbe6346e383a354225682cf1b Mon Sep 17 00:00:00 2001 From: Mark Rutland <mark.rutland@arm.com> Date: Mon, 30 Nov 2020 11:59:43 +0000 Subject: arm64: entry: move enter_from_user_mode to entry-common.c In later patches we'll want to extend enter_from_user_mode() and add a corresponding exit_to_user_mode(). As these will be common for all entries/exits from userspace, it'd be better for these to live in entry-common.c with the rest of the entry logic. This patch moves enter_from_user_mode() into entry-common.c. As with other functions in entry-common.c it is marked as noinstr (which prevents all instrumentation, tracing, and kprobes) but there are no other functional changes. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: James Morse <james.morse@arm.com> Cc: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/r/20201130115950.22492-5-mark.rutland@arm.com Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/kernel/entry-common.c | 6 ++++++ arch/arm64/kernel/traps.c | 7 ------- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 75e99161f79e..9a685e7686fe 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -101,6 +101,12 @@ asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs) } } +asmlinkage void noinstr enter_from_user_mode(void) +{ + CT_WARN_ON(ct_state() != CONTEXT_USER); + user_exit_irqoff(); +} + static void noinstr el0_da(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 8af4e0e85736..580c60afc39a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -876,13 +876,6 @@ asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr) nmi_exit(); } -asmlinkage void enter_from_user_mode(void) -{ - CT_WARN_ON(ct_state() != CONTEXT_USER); - user_exit_irqoff(); -} -NOKPROBE_SYMBOL(enter_from_user_mode); - /* GENERIC_BUG traps */ int is_valid_bugaddr(unsigned long addr) -- cgit v1.2.3-70-g09d2 From 3cb5ed4d76c15fb97c10e5e9f5268d92c68222ca Mon Sep 17 00:00:00 2001 From: Mark Rutland <mark.rutland@arm.com> Date: Mon, 30 Nov 2020 11:59:44 +0000 Subject: arm64: entry: prepare ret_to_user for function call In a subsequent patch ret_to_user will need to make a C function call (in some configurations) which may clobber x0-x18 at the start of the finish_ret_to_user block, before enable_step_tsk consumes the flags loaded into x1. In preparation for this, let's load the flags into x19, which is preserved across C function calls. This avoids a redundant reload of the flags and ensures we operate on a consistent shapshot regardless. There should be no functional change as a result of this patch. At this point of the entry/exit paths we only need to preserve x28 (tsk) and the sp, and x19 is free for this use. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: James Morse <james.morse@arm.com> Cc: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/r/20201130115950.22492-6-mark.rutland@arm.com Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/kernel/entry.S | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b295fb912b12..84aec600eeed 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -774,13 +774,13 @@ SYM_CODE_END(el0_error) SYM_CODE_START_LOCAL(ret_to_user) disable_daif gic_prio_kentry_setup tmp=x3 - ldr x1, [tsk, #TSK_TI_FLAGS] - and x2, x1, #_TIF_WORK_MASK + ldr x19, [tsk, #TSK_TI_FLAGS] + and x2, x19, #_TIF_WORK_MASK cbnz x2, work_pending finish_ret_to_user: /* Ignore asynchronous tag check faults in the uaccess routines */ clear_mte_async_tcf - enable_step_tsk x1, x2 + enable_step_tsk x19, x2 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK bl stackleak_erase #endif @@ -791,11 +791,12 @@ finish_ret_to_user: */ work_pending: mov x0, sp // 'regs' + mov x1, x19 bl do_notify_resume #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_on // enabled while in userspace #endif - ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step + ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step b finish_ret_to_user SYM_CODE_END(ret_to_user) -- cgit v1.2.3-70-g09d2 From 105fc3352077bba5faaf12cf39f7e3aad26fb70b Mon Sep 17 00:00:00 2001 From: Mark Rutland <mark.rutland@arm.com> Date: Mon, 30 Nov 2020 11:59:45 +0000 Subject: arm64: entry: move el1 irq/nmi logic to C In preparation for reworking the EL1 irq/nmi entry code, move the existing logic to C. We no longer need the asm_nmi_enter() and asm_nmi_exit() wrappers, so these are removed. The new C functions are marked noinstr, which prevents compiler instrumentation and runtime probing. In subsequent patches we'll want the new C helpers to be called in all cases, so we don't bother wrapping the calls with ifdeferry. Even when the new C functions are stubs the trivial calls are unlikely to have a measurable impact on the IRQ or NMI paths anyway. Prototypes are added to <asm/exception.h> as otherwise (in some configurations) GCC will complain about the lack of a forward declaration. We already do this for existing function, e.g. enter_from_user_mode(). The new helpers are marked as noinstr (which prevents all instrumentation, tracing, and kprobes). Otherwise, there should be no functional change as a result of this patch. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: James Morse <james.morse@arm.com> Cc: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/r/20201130115950.22492-7-mark.rutland@arm.com Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/include/asm/exception.h | 2 ++ arch/arm64/kernel/entry-common.c | 16 ++++++++++++++++ arch/arm64/kernel/entry.S | 34 ++++------------------------------ arch/arm64/kernel/irq.c | 15 --------------- 4 files changed, 22 insertions(+), 45 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 99b9383cd036..d69d53dd7be7 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -31,6 +31,8 @@ static inline u32 disr_to_esr(u64 disr) return esr; } +asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs); +asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void enter_from_user_mode(void); void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 9a685e7686fe..920da254be1d 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -17,6 +17,22 @@ #include <asm/mmu.h> #include <asm/sysreg.h> +asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) + nmi_enter(); + + trace_hardirqs_off(); +} + +asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) + nmi_exit(); + else + trace_hardirqs_on(); +} + static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 84aec600eeed..53e30750fc28 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -637,16 +637,8 @@ SYM_CODE_START_LOCAL_NOALIGN(el1_irq) gic_prio_irq_setup pmr=x20, tmp=x1 enable_da_f -#ifdef CONFIG_ARM64_PSEUDO_NMI - test_irqs_unmasked res=x0, pmr=x20 - cbz x0, 1f - bl asm_nmi_enter -1: -#endif - -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif + mov x0, sp + bl enter_el1_irq_or_nmi irq_handler @@ -665,26 +657,8 @@ alternative_else_nop_endif 1: #endif -#ifdef CONFIG_ARM64_PSEUDO_NMI - /* - * When using IRQ priority masking, we can get spurious interrupts while - * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a - * section with interrupts disabled. Skip tracing in those cases. - */ - test_irqs_unmasked res=x0, pmr=x20 - cbz x0, 1f - bl asm_nmi_exit -1: -#endif - -#ifdef CONFIG_TRACE_IRQFLAGS -#ifdef CONFIG_ARM64_PSEUDO_NMI - test_irqs_unmasked res=x0, pmr=x20 - cbnz x0, 1f -#endif - bl trace_hardirqs_on -1: -#endif + mov x0, sp + bl exit_el1_irq_or_nmi kernel_exit 1 SYM_CODE_END(el1_irq) diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 9cf2fb87584a..60456a62da11 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -67,18 +67,3 @@ void __init init_IRQ(void) local_daif_restore(DAIF_PROCCTX_NOIRQ); } } - -/* - * Stubs to make nmi_enter/exit() code callable from ASM - */ -asmlinkage void notrace asm_nmi_enter(void) -{ - nmi_enter(); -} -NOKPROBE_SYMBOL(asm_nmi_enter); - -asmlinkage void notrace asm_nmi_exit(void) -{ - nmi_exit(); -} -NOKPROBE_SYMBOL(asm_nmi_exit); -- cgit v1.2.3-70-g09d2 From 23529049c68423820487304f244144e0d576e85a Mon Sep 17 00:00:00 2001 From: Mark Rutland <mark.rutland@arm.com> Date: Mon, 30 Nov 2020 11:59:46 +0000 Subject: arm64: entry: fix non-NMI user<->kernel transitions When built with PROVE_LOCKING, NO_HZ_FULL, and CONTEXT_TRACKING_FORCE will WARN() at boot time that interrupts are enabled when we call context_tracking_user_enter(), despite the DAIF flags indicating that IRQs are masked. The problem is that we're not tracking IRQ flag changes accurately, and so lockdep believes interrupts are enabled when they are not (and vice-versa). We can shuffle things so to make this more accurate. For kernel->user transitions there are a number of constraints we need to consider: 1) When we call __context_tracking_user_enter() HW IRQs must be disabled and lockdep must be up-to-date with this. 2) Userspace should be treated as having IRQs enabled from the PoV of both lockdep and tracing. 3) As context_tracking_user_enter() stops RCU from watching, we cannot use RCU after calling it. 4) IRQ flag tracing and lockdep have state that must be manipulated before RCU is disabled. ... with similar constraints applying for user->kernel transitions, with the ordering reversed. The generic entry code has enter_from_user_mode() and exit_to_user_mode() helpers to handle this. We can't use those directly, so we add arm64 copies for now (without the instrumentation markers which aren't used on arm64). These replace the existing user_exit() and user_exit_irqoff() calls spread throughout handlers, and the exception unmasking is left as-is. Note that: * The accounting for debug exceptions from userspace now happens in el0_dbg() and ret_to_user(), so this is removed from debug_exception_enter() and debug_exception_exit(). As user_exit_irqoff() wakes RCU, the userspace-specific check is removed. * The accounting for syscalls now happens in el0_svc(), el0_svc_compat(), and ret_to_user(), so this is removed from el0_svc_common(). This does not adversely affect the workaround for erratum 1463225, as this does not depend on any of the state tracking. * In ret_to_user() we mask interrupts with local_daif_mask(), and so we need to inform lockdep and tracing. Here a trace_hardirqs_off() is sufficient and safe as we have not yet exited kernel context and RCU is usable. * As PROVE_LOCKING selects TRACE_IRQFLAGS, the ifdeferry in entry.S only needs to check for the latter. * EL0 SError handling will be dealt with in a subsequent patch, as this needs to be treated as an NMI. Prior to this patch, booting an appropriately-configured kernel would result in spats as below: | DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled()) | WARNING: CPU: 2 PID: 1 at kernel/locking/lockdep.c:5280 check_flags.part.54+0x1dc/0x1f0 | Modules linked in: | CPU: 2 PID: 1 Comm: init Not tainted 5.10.0-rc3 #3 | Hardware name: linux,dummy-virt (DT) | pstate: 804003c5 (Nzcv DAIF +PAN -UAO -TCO BTYPE=--) | pc : check_flags.part.54+0x1dc/0x1f0 | lr : check_flags.part.54+0x1dc/0x1f0 | sp : ffff80001003bd80 | x29: ffff80001003bd80 x28: ffff66ce801e0000 | x27: 00000000ffffffff x26: 00000000000003c0 | x25: 0000000000000000 x24: ffffc31842527258 | x23: ffffc31842491368 x22: ffffc3184282d000 | x21: 0000000000000000 x20: 0000000000000001 | x19: ffffc318432ce000 x18: 0080000000000000 | x17: 0000000000000000 x16: ffffc31840f18a78 | x15: 0000000000000001 x14: ffffc3184285c810 | x13: 0000000000000001 x12: 0000000000000000 | x11: ffffc318415857a0 x10: ffffc318406614c0 | x9 : ffffc318415857a0 x8 : ffffc31841f1d000 | x7 : 647261685f706564 x6 : ffffc3183ff7c66c | x5 : ffff66ce801e0000 x4 : 0000000000000000 | x3 : ffffc3183fe00000 x2 : ffffc31841500000 | x1 : e956dc24146b3500 x0 : 0000000000000000 | Call trace: | check_flags.part.54+0x1dc/0x1f0 | lock_is_held_type+0x10c/0x188 | rcu_read_lock_sched_held+0x70/0x98 | __context_tracking_enter+0x310/0x350 | context_tracking_enter.part.3+0x5c/0xc8 | context_tracking_user_enter+0x6c/0x80 | finish_ret_to_user+0x2c/0x13cr Signed-off-by: Mark Rutland <mark.rutland@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: James Morse <james.morse@arm.com> Cc: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/r/20201130115950.22492-8-mark.rutland@arm.com Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/include/asm/exception.h | 1 + arch/arm64/kernel/entry-common.c | 40 +++++++++++++++++++++++++------------- arch/arm64/kernel/entry.S | 35 +++++++++++++-------------------- arch/arm64/kernel/syscall.c | 1 - arch/arm64/mm/fault.c | 22 ++++++++++----------- 5 files changed, 51 insertions(+), 48 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index d69d53dd7be7..d579b2e6db7a 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -34,6 +34,7 @@ static inline u32 disr_to_esr(u64 disr) asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void enter_from_user_mode(void); +asmlinkage void exit_to_user_mode(void); void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs); void do_bti(struct pt_regs *regs); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 920da254be1d..49d1c1dd9baf 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -119,15 +119,25 @@ asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs) asmlinkage void noinstr enter_from_user_mode(void) { + lockdep_hardirqs_off(CALLER_ADDR0); CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit_irqoff(); + trace_hardirqs_off_finish(); +} + +asmlinkage void noinstr exit_to_user_mode(void) +{ + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + user_enter_irqoff(); + lockdep_hardirqs_on(CALLER_ADDR0); } static void noinstr el0_da(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); far = untagged_addr(far); do_mem_abort(far, esr, regs); @@ -145,35 +155,35 @@ static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr) if (!is_ttbr0_addr(far)) arm64_apply_bp_hardening(); - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_mem_abort(far, esr, regs); } static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_fpsimd_acc(esr, regs); } static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sve_acc(esr, regs); } static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_fpsimd_exc(esr, regs); } static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sysinstr(esr, regs); } @@ -185,35 +195,35 @@ static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr) if (!is_ttbr0_addr(instruction_pointer(regs))) arm64_apply_bp_hardening(); - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(far, esr, regs); } static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(regs->sp, esr, regs); } static void noinstr el0_undef(struct pt_regs *regs) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_undefinstr(regs); } static void noinstr el0_bti(struct pt_regs *regs) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_bti(regs); } static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); bad_el0_sync(regs, 0, esr); } @@ -226,7 +236,7 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - user_exit_irqoff(); + enter_from_user_mode(); do_debug_exception(far, esr, regs); local_daif_restore(DAIF_PROCCTX_NOIRQ); } @@ -236,12 +246,13 @@ static void noinstr el0_svc(struct pt_regs *regs) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + enter_from_user_mode(); do_el0_svc(regs); } static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_ptrauth_fault(regs, esr); } @@ -302,7 +313,7 @@ asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs) #ifdef CONFIG_COMPAT static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_cp15instr(esr, regs); } @@ -312,6 +323,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + enter_from_user_mode(); do_el0_svc_compat(regs); } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 53e30750fc28..d72c818b019c 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -30,18 +30,18 @@ #include <asm/unistd.h> /* - * Context tracking subsystem. Used to instrument transitions - * between user and kernel mode. + * Context tracking and irqflag tracing need to instrument transitions between + * user and kernel mode. */ - .macro ct_user_exit_irqoff -#ifdef CONFIG_CONTEXT_TRACKING + .macro user_exit_irqoff +#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS) bl enter_from_user_mode #endif .endm - .macro ct_user_enter -#ifdef CONFIG_CONTEXT_TRACKING - bl context_tracking_user_enter + .macro user_enter_irqoff +#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS) + bl exit_to_user_mode #endif .endm @@ -298,9 +298,6 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING alternative_else_nop_endif ldp x21, x22, [sp, #S_PC] // load ELR, SPSR - .if \el == 0 - ct_user_enter - .endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN alternative_if_not ARM64_HAS_PAN @@ -700,21 +697,14 @@ SYM_CODE_START_LOCAL_NOALIGN(el0_irq) kernel_entry 0 el0_irq_naked: gic_prio_irq_setup pmr=x20, tmp=x0 - ct_user_exit_irqoff + user_exit_irqoff enable_da_f -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif - tbz x22, #55, 1f bl do_el0_irq_bp_hardening 1: irq_handler -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_on -#endif b ret_to_user SYM_CODE_END(el0_irq) @@ -733,7 +723,7 @@ SYM_CODE_START_LOCAL(el0_error) el0_error_naked: mrs x25, esr_el1 gic_prio_kentry_setup tmp=x2 - ct_user_exit_irqoff + user_exit_irqoff enable_dbg mov x0, sp mov x1, x25 @@ -748,10 +738,14 @@ SYM_CODE_END(el0_error) SYM_CODE_START_LOCAL(ret_to_user) disable_daif gic_prio_kentry_setup tmp=x3 +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif ldr x19, [tsk, #TSK_TI_FLAGS] and x2, x19, #_TIF_WORK_MASK cbnz x2, work_pending finish_ret_to_user: + user_enter_irqoff /* Ignore asynchronous tag check faults in the uaccess routines */ clear_mte_async_tcf enable_step_tsk x19, x2 @@ -767,9 +761,6 @@ work_pending: mov x0, sp // 'regs' mov x1, x19 bl do_notify_resume -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_on // enabled while in userspace -#endif ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step b finish_ret_to_user SYM_CODE_END(ret_to_user) diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 13fe79f8e2db..f8f758e4a306 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -120,7 +120,6 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, */ cortex_a76_erratum_1463225_svc_handler(); - user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) { diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1ee94002801f..1f450b784d2c 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -789,16 +789,14 @@ void __init hook_debug_fault_code(int nr, */ static void debug_exception_enter(struct pt_regs *regs) { - /* - * Tell lockdep we disabled irqs in entry.S. Do nothing if they were - * already disabled to preserve the last enabled/disabled addresses. - */ - if (interrupts_enabled(regs)) - trace_hardirqs_off(); + if (!user_mode(regs)) { + /* + * Tell lockdep we disabled irqs in entry.S. Do nothing if they were + * already disabled to preserve the last enabled/disabled addresses. + */ + if (interrupts_enabled(regs)) + trace_hardirqs_off(); - if (user_mode(regs)) { - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - } else { /* * We might have interrupted pretty much anything. In * fact, if we're a debug exception, we can even interrupt @@ -819,8 +817,10 @@ static void debug_exception_exit(struct pt_regs *regs) { preempt_enable_no_resched(); - if (!user_mode(regs)) - rcu_nmi_exit(); + if (user_mode(regs)) + return; + + rcu_nmi_exit(); if (interrupts_enabled(regs)) trace_hardirqs_on(); -- cgit v1.2.3-70-g09d2 From 1ec2f2c05b2ab845d068bff29bd32dbfc6a6ad4c Mon Sep 17 00:00:00 2001 From: Mark Rutland <mark.rutland@arm.com> Date: Mon, 30 Nov 2020 11:59:47 +0000 Subject: arm64: ptrace: prepare for EL1 irq/rcu tracking Exceptions from EL1 may be taken when RCU isn't watching (e.g. in idle sequences), or when the lockdep hardirqs transiently out-of-sync with the hardware state (e.g. in the middle of local_irq_enable()). To correctly handle these cases, we'll need to save/restore this state across some exceptions taken from EL1. A series of subsequent patches will update EL1 exception handlers to handle this. In preparation for this, and to avoid dependencies between those patches, this patch adds two new fields to struct pt_regs so that exception handlers can track this state. Note that this is placed in pt_regs as some entry/exit sequences such as el1_irq are invoked from assembly, which makes it very difficult to add a separate structure as with the irqentry_state used by x86. We can separate this once more of the exception logic is moved to C. While the fields only need to be bool, they are both made u64 to keep pt_regs 16-byte aligned. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: James Morse <james.morse@arm.com> Cc: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/r/20201130115950.22492-9-mark.rutland@arm.com Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/include/asm/ptrace.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 997cf8c8cd52..28c85b87b8cd 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -193,6 +193,10 @@ struct pt_regs { /* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */ u64 pmr_save; u64 stackframe[2]; + + /* Only valid for some EL1 exceptions. */ + u64 lockdep_hardirqs; + u64 exit_rcu; }; static inline bool in_syscall(struct pt_regs const *regs) -- cgit v1.2.3-70-g09d2 From 7cd1ea1010acbede7eb87b6abb6198921fb36957 Mon Sep 17 00:00:00 2001 From: Mark Rutland <mark.rutland@arm.com> Date: Mon, 30 Nov 2020 11:59:48 +0000 Subject: arm64: entry: fix non-NMI kernel<->kernel transitions There are periods in kernel mode when RCU is not watching and/or the scheduler tick is disabled, but we can still take exceptions such as interrupts. The arm64 exception handlers do not account for this, and it's possible that RCU is not watching while an exception handler runs. The x86/generic entry code handles this by ensuring that all (non-NMI) kernel exception handlers call irqentry_enter() and irqentry_exit(), which handle RCU, lockdep, and IRQ flag tracing. We can't yet move to the generic entry code, and already hadnle the user<->kernel transitions elsewhere, so we add new kernel<->kernel transition helpers alog the lines of the generic entry code. Since we now track interrupts becoming masked when an exception is taken, local_daif_inherit() is modified to track interrupts becoming re-enabled when the original context is inherited. To balance the entry/exit paths, each handler masks all DAIF exceptions before exit_to_kernel_mode(). Signed-off-by: Mark Rutland <mark.rutland@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: James Morse <james.morse@arm.com> Cc: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/r/20201130115950.22492-10-mark.rutland@arm.com Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/include/asm/daifflags.h | 3 ++ arch/arm64/kernel/entry-common.c | 67 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 67 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index ec213b4a1650..1c26d7baa67f 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -128,6 +128,9 @@ static inline void local_daif_inherit(struct pt_regs *regs) { unsigned long flags = regs->pstate & DAIF_MASK; + if (interrupts_enabled(regs)) + trace_hardirqs_on(); + /* * We can't use local_daif_restore(regs->pstate) here as * system_has_prio_mask_debugging() won't restore the I bit if it can diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 49d1c1dd9baf..86622d8dd0d8 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -17,12 +17,58 @@ #include <asm/mmu.h> #include <asm/sysreg.h> +/* + * This is intended to match the logic in irqentry_enter(), handling the kernel + * mode transitions only. + */ +static void noinstr enter_from_kernel_mode(struct pt_regs *regs) +{ + regs->exit_rcu = false; + + if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) { + lockdep_hardirqs_off(CALLER_ADDR0); + rcu_irq_enter(); + trace_hardirqs_off_finish(); + + regs->exit_rcu = true; + return; + } + + lockdep_hardirqs_off(CALLER_ADDR0); + rcu_irq_enter_check_tick(); + trace_hardirqs_off_finish(); +} + +/* + * This is intended to match the logic in irqentry_exit(), handling the kernel + * mode transitions only, and with preemption handled elsewhere. + */ +static void noinstr exit_to_kernel_mode(struct pt_regs *regs) +{ + lockdep_assert_irqs_disabled(); + + if (interrupts_enabled(regs)) { + if (regs->exit_rcu) { + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + rcu_irq_exit(); + lockdep_hardirqs_on(CALLER_ADDR0); + return; + } + + trace_hardirqs_on(); + } else { + if (regs->exit_rcu) + rcu_irq_exit(); + } +} + asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) { if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) nmi_enter(); - - trace_hardirqs_off(); + else + enter_from_kernel_mode(regs); } asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) @@ -30,36 +76,48 @@ asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) nmi_exit(); else - trace_hardirqs_on(); + exit_to_kernel_mode(regs); } static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); + enter_from_kernel_mode(regs); local_daif_inherit(regs); far = untagged_addr(far); do_mem_abort(far, esr, regs); + local_daif_mask(); + exit_to_kernel_mode(regs); } static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); + enter_from_kernel_mode(regs); local_daif_inherit(regs); do_sp_pc_abort(far, esr, regs); + local_daif_mask(); + exit_to_kernel_mode(regs); } static void noinstr el1_undef(struct pt_regs *regs) { + enter_from_kernel_mode(regs); local_daif_inherit(regs); do_undefinstr(regs); + local_daif_mask(); + exit_to_kernel_mode(regs); } static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr) { + enter_from_kernel_mode(regs); local_daif_inherit(regs); bad_mode(regs, 0, esr); + local_daif_mask(); + exit_to_kernel_mode(regs); } static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) @@ -79,8 +137,11 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr) { + enter_from_kernel_mode(regs); local_daif_inherit(regs); do_ptrauth_fault(regs, esr); + local_daif_mask(); + exit_to_kernel_mode(regs); } asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs) -- cgit v1.2.3-70-g09d2 From f0cd5ac1e4c53cb691b3ed3cda1031e1c42153e2 Mon Sep 17 00:00:00 2001 From: Mark Rutland <mark.rutland@arm.com> Date: Mon, 30 Nov 2020 11:59:49 +0000 Subject: arm64: entry: fix NMI {user, kernel}->kernel transitions Exceptions which can be taken at (almost) any time are consdiered to be NMIs. On arm64 that includes: * SDEI events * GICv3 Pseudo-NMIs * Kernel stack overflows * Unexpected/unhandled exceptions ... but currently debug exceptions (BRKs, breakpoints, watchpoints, single-step) are not considered NMIs. As these can be taken at any time, kernel features (lockdep, RCU, ftrace) may not be in a consistent kernel state. For example, we may take an NMI from the idle code or partway through an entry/exit path. While nmi_enter() and nmi_exit() handle most of this state, notably they don't save/restore the lockdep state across an NMI being taken and handled. When interrupts are enabled and an NMI is taken, lockdep may see interrupts become disabled within the NMI code, but not see interrupts become enabled when returning from the NMI, leaving lockdep believing interrupts are disabled when they are actually disabled. The x86 code handles this in idtentry_{enter,exit}_nmi(), which will shortly be moved to the generic entry code. As we can't use either yet, we copy the x86 approach in arm64-specific helpers. All the NMI entrypoints are marked as noinstr to prevent any instrumentation handling code being invoked before the state has been corrected. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: James Morse <james.morse@arm.com> Cc: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/r/20201130115950.22492-11-mark.rutland@arm.com Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/include/asm/exception.h | 2 ++ arch/arm64/kernel/entry-common.c | 34 ++++++++++++++++++++++++++++++++-- arch/arm64/kernel/sdei.c | 7 ++++--- arch/arm64/kernel/traps.c | 15 ++++++++++----- 4 files changed, 48 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index d579b2e6db7a..0756191f44f6 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -35,6 +35,8 @@ asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void enter_from_user_mode(void); asmlinkage void exit_to_user_mode(void); +void arm64_enter_nmi(struct pt_regs *regs); +void arm64_exit_nmi(struct pt_regs *regs); void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs); void do_bti(struct pt_regs *regs); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 86622d8dd0d8..6d70be0d3e8f 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -63,10 +63,40 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs) } } +void noinstr arm64_enter_nmi(struct pt_regs *regs) +{ + regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); + + __nmi_enter(); + lockdep_hardirqs_off(CALLER_ADDR0); + lockdep_hardirq_enter(); + rcu_nmi_enter(); + + trace_hardirqs_off_finish(); + ftrace_nmi_enter(); +} + +void noinstr arm64_exit_nmi(struct pt_regs *regs) +{ + bool restore = regs->lockdep_hardirqs; + + ftrace_nmi_exit(); + if (restore) { + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + } + + rcu_nmi_exit(); + lockdep_hardirq_exit(); + if (restore) + lockdep_hardirqs_on(CALLER_ADDR0); + __nmi_exit(); +} + asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) { if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) - nmi_enter(); + arm64_enter_nmi(regs); else enter_from_kernel_mode(regs); } @@ -74,7 +104,7 @@ asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) { if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) - nmi_exit(); + arm64_exit_nmi(regs); else exit_to_kernel_mode(regs); } diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 7689f2031c0c..793c46d6a447 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -10,6 +10,7 @@ #include <linux/uaccess.h> #include <asm/alternative.h> +#include <asm/exception.h> #include <asm/kprobes.h> #include <asm/mmu.h> #include <asm/ptrace.h> @@ -223,16 +224,16 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs, } -asmlinkage __kprobes notrace unsigned long +asmlinkage noinstr unsigned long __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg) { unsigned long ret; - nmi_enter(); + arm64_enter_nmi(regs); ret = _sdei_handler(regs, arg); - nmi_exit(); + arm64_exit_nmi(regs); return ret; } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 580c60afc39a..2059d8f43f55 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -34,6 +34,7 @@ #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/esr.h> +#include <asm/exception.h> #include <asm/extable.h> #include <asm/insn.h> #include <asm/kprobes.h> @@ -753,8 +754,10 @@ const char *esr_get_class_string(u32 esr) * bad_mode handles the impossible case in the exception vector. This is always * fatal. */ -asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) +asmlinkage void notrace bad_mode(struct pt_regs *regs, int reason, unsigned int esr) { + arm64_enter_nmi(regs); + console_verbose(); pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n", @@ -786,7 +789,7 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack) __aligned(16); -asmlinkage void handle_bad_stack(struct pt_regs *regs) +asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs) { unsigned long tsk_stk = (unsigned long)current->stack; unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr); @@ -794,6 +797,8 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs) unsigned int esr = read_sysreg(esr_el1); unsigned long far = read_sysreg(far_el1); + arm64_enter_nmi(regs); + console_verbose(); pr_emerg("Insufficient stack space to handle exception!"); @@ -865,15 +870,15 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr) } } -asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr) +asmlinkage void noinstr do_serror(struct pt_regs *regs, unsigned int esr) { - nmi_enter(); + arm64_enter_nmi(regs); /* non-RAS errors are not containable */ if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr)) arm64_serror_panic(regs, esr); - nmi_exit(); + arm64_exit_nmi(regs); } /* GENERIC_BUG traps */ -- cgit v1.2.3-70-g09d2 From 2a9b3e6ac69a8bf177d8496a11e749e2dc72fa22 Mon Sep 17 00:00:00 2001 From: Mark Rutland <mark.rutland@arm.com> Date: Mon, 30 Nov 2020 11:59:50 +0000 Subject: arm64: entry: fix EL1 debug transitions In debug_exception_enter() and debug_exception_exit() we trace hardirqs on/off while RCU isn't guaranteed to be watching, and we don't save and restore the hardirq state, and so may return with this having changed. Handle this appropriately with new entry/exit helpers which do the bare minimum to ensure this is appropriately maintained, without marking debug exceptions as NMIs. These are placed in entry-common.c with the other entry/exit helpers. In future we'll want to reconsider whether some debug exceptions should be NMIs, but this will require a significant refactoring, and for now this should prevent issues with lockdep and RCU. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Cc: Catalin Marins <catalin.marinas@arm.com> Cc: James Morse <james.morse@arm.com> Cc: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/r/20201130115950.22492-12-mark.rutland@arm.com Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/kernel/entry-common.c | 26 ++++++++++++++++++++++++++ arch/arm64/mm/fault.c | 25 ------------------------- 2 files changed, 26 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 6d70be0d3e8f..70e0a7591245 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -150,6 +150,30 @@ static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr) exit_to_kernel_mode(regs); } +static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs) +{ + regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); + + lockdep_hardirqs_off(CALLER_ADDR0); + rcu_nmi_enter(); + + trace_hardirqs_off_finish(); +} + +static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs) +{ + bool restore = regs->lockdep_hardirqs; + + if (restore) { + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + } + + rcu_nmi_exit(); + if (restore) + lockdep_hardirqs_on(CALLER_ADDR0); +} + static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -162,7 +186,9 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + arm64_enter_el1_dbg(regs); do_debug_exception(far, esr, regs); + arm64_exit_el1_dbg(regs); } static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1f450b784d2c..795d224f184f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -789,23 +789,6 @@ void __init hook_debug_fault_code(int nr, */ static void debug_exception_enter(struct pt_regs *regs) { - if (!user_mode(regs)) { - /* - * Tell lockdep we disabled irqs in entry.S. Do nothing if they were - * already disabled to preserve the last enabled/disabled addresses. - */ - if (interrupts_enabled(regs)) - trace_hardirqs_off(); - - /* - * We might have interrupted pretty much anything. In - * fact, if we're a debug exception, we can even interrupt - * NMI processing. We don't want this code makes in_nmi() - * to return true, but we need to notify RCU. - */ - rcu_nmi_enter(); - } - preempt_disable(); /* This code is a bit fragile. Test it. */ @@ -816,14 +799,6 @@ NOKPROBE_SYMBOL(debug_exception_enter); static void debug_exception_exit(struct pt_regs *regs) { preempt_enable_no_resched(); - - if (user_mode(regs)) - return; - - rcu_nmi_exit(); - - if (interrupts_enabled(regs)) - trace_hardirqs_on(); } NOKPROBE_SYMBOL(debug_exception_exit); -- cgit v1.2.3-70-g09d2 From 9e5344e0ffc33f4fee899f98b6939a0682b1d9c3 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino <vincenzo.frascino@arm.com> Date: Mon, 30 Nov 2020 17:07:09 +0000 Subject: arm64: mte: Fix typo in macro definition UL in the definition of SYS_TFSR_EL1_TF1 was misspelled causing compilation issues when trying to implement in kernel MTE async mode. Fix the macro correcting the typo. Note: MTE async mode will be introduced with a future series. Fixes: c058b1c4a5ea ("arm64: mte: system register definitions") Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will@kernel.org> Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com> Acked-by: Catalin Marinas <catalin.marinas@arm.com> Link: https://lore.kernel.org/r/20201130170709.22309-1-vincenzo.frascino@arm.com Signed-off-by: Will Deacon <will@kernel.org> --- arch/arm64/include/asm/sysreg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 174817ba119c..c8ab9900293f 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -983,7 +983,7 @@ #define SYS_TFSR_EL1_TF0_SHIFT 0 #define SYS_TFSR_EL1_TF1_SHIFT 1 #define SYS_TFSR_EL1_TF0 (UL(1) << SYS_TFSR_EL1_TF0_SHIFT) -#define SYS_TFSR_EL1_TF1 (UK(2) << SYS_TFSR_EL1_TF1_SHIFT) +#define SYS_TFSR_EL1_TF1 (UL(1) << SYS_TFSR_EL1_TF1_SHIFT) /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ #define SYS_MPIDR_SAFE_VAL (BIT(31)) -- cgit v1.2.3-70-g09d2 From 70e734fed740939704d1b3b76d6f2e6909698586 Mon Sep 17 00:00:00 2001 From: Robert Karszniewicz <r.karszniewicz@phytec.de> Date: Fri, 20 Nov 2020 18:51:24 +0100 Subject: ARM: imx: Use correct SRC base address Commit 4a4fb66119eb ("ARM: imx: Add missing of_node_put()") accidentally forgot to rename a variable, which caused the wrong address to be used and, in our case, the ULL getting falsely identified as ULZ. Fixes: 4a4fb66119eb ("ARM: imx: Add missing of_node_put()") Signed-off-by: Robert Karszniewicz <r.karszniewicz@phytec.de> Reviewed-by: Fabio Estevam <festevam@gmail.com> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm/mach-imx/anatop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-imx/anatop.c b/arch/arm/mach-imx/anatop.c index d841bed8664d..7bb47eb3fc07 100644 --- a/arch/arm/mach-imx/anatop.c +++ b/arch/arm/mach-imx/anatop.c @@ -136,7 +136,7 @@ void __init imx_init_revision_from_anatop(void) src_np = of_find_compatible_node(NULL, NULL, "fsl,imx6ul-src"); - src_base = of_iomap(np, 0); + src_base = of_iomap(src_np, 0); of_node_put(src_np); WARN_ON(!src_base); sbmr2 = readl_relaxed(src_base + SRC_SBMR2); -- cgit v1.2.3-70-g09d2 From 58d6bca5efc73235b0f84c0d53321737177c651e Mon Sep 17 00:00:00 2001 From: Fabio Estevam <festevam@gmail.com> Date: Mon, 30 Nov 2020 17:54:47 -0300 Subject: ARM: dts: imx6qdl-wandboard-revd1: Remove PAD_GPIO_6 from enetgrp Since commit 8ad2d1dcce54 ("ARM: dts: imx6qdl-wandboard: Add OV5645 camera support") the PAD_GPIO_6 is used for providing the camera sensor clock. Remove it from the enetgrp to fix the following IOMXU conflict: [ 9.972414] imx6q-pinctrl 20e0000.pinctrl: pin MX6Q_PAD_GPIO_6 already requested by 2188000.ethernet; cannot claim for 1-003c [ 9.983857] imx6q-pinctrl 20e0000.pinctrl: pin-140 (1-003c) status -22 [ 9.990514] imx6q-pinctrl 20e0000.pinctrl: could not request pin 140 (MX6Q_PAD_GPIO_6) from group ov5645grp on device 20e0000.pinctrl Fixes: 8ad2d1dcce54 ("ARM: dts: imx6qdl-wandboard: Add OV5645 camera support") Signed-off-by: Fabio Estevam <festevam@gmail.com> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi b/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi index 93909796885a..b9b698f72b26 100644 --- a/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi +++ b/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi @@ -166,7 +166,6 @@ MX6QDL_PAD_RGMII_RD2__RGMII_RD2 0x1b030 MX6QDL_PAD_RGMII_RD3__RGMII_RD3 0x1b030 MX6QDL_PAD_RGMII_RX_CTL__RGMII_RX_CTL 0x1b030 - MX6QDL_PAD_GPIO_6__ENET_IRQ 0x000b1 >; }; -- cgit v1.2.3-70-g09d2 From 19ba8fb810c60b46869acc9f455613de454e0fca Mon Sep 17 00:00:00 2001 From: Bernd Bauer <bernd.bauer@anton-paar.com> Date: Thu, 26 Nov 2020 18:56:28 +0100 Subject: ARM: dts: imx6qdl-kontron-samx6i: fix I2C_PM scl pin Use the correct pin for the i2c scl signal else we can't access the SoM eeprom. Fixes: 2a51f9dae13d ("ARM: dts: imx6qdl-kontron-samx6i: Add iMX6-based Kontron SMARC-sAMX6i module") Signed-off-by: Bernd Bauer <bernd.bauer@anton-paar.com> [m.felsch@pengutronix.de: Adapt commit message] Signed-off-by: Marco Felsch <m.felsch@pengutronix.de> Signed-off-by: Shawn Guo <shawnguo@kernel.org> --- arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi index 265f5f3dbff6..24f793ca2886 100644 --- a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi +++ b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi @@ -551,7 +551,7 @@ pinctrl_i2c3: i2c3grp { fsl,pins = < - MX6QDL_PAD_GPIO_3__I2C3_SCL 0x4001b8b1 + MX6QDL_PAD_GPIO_5__I2C3_SCL 0x4001b8b1 MX6QDL_PAD_GPIO_16__I2C3_SDA 0x4001b8b1 >; }; -- cgit v1.2.3-70-g09d2 From c2b111e59a7be1534bbd62b3f8f933f714c5ba71 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt <xypron.glpk@gmx.de> Date: Sun, 29 Nov 2020 17:26:27 +0100 Subject: arm64: dts: allwinner: A64 Sopine: phy-mode rgmii-id Since commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config") iSCSI booting fails on the Pine A64 LTS. This patch changes the phy-mode to use internal delays both for RX and TX as has been done for other boards affected by the same commit. Fixes: bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config") Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Link: https://lore.kernel.org/r/20201129162627.1244808-1-xypron.glpk@gmx.de --- arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts index 9ebb9e07fae3..d4069749d721 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts @@ -79,7 +79,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_dc1sw>; status = "okay"; -- cgit v1.2.3-70-g09d2 From d0c6707ca4235b78d06bcd62f0e24fbeac3e6d10 Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt <xypron.glpk@gmx.de> Date: Sun, 29 Nov 2020 20:45:12 +0100 Subject: arm64: dts: allwinner: H5: NanoPi Neo Plus2: phy-mode rgmii-id Since commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config") network is broken on the NanoPi Neo Plus2. This patch changes the phy-mode to use internal delays both for RX and TX as has been done for other boards affected by the same commit. Fixes: bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config") Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de> Signed-off-by: Maxime Ripard <maxime@cerno.tech> Reviewed-by: Andrew Lunn <andrew@lunn.ch> Link: https://lore.kernel.org/r/20201129194512.1475586-1-xypron.glpk@gmx.de --- arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts index 4f9ba53ffaae..9d93fe153689 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts @@ -96,7 +96,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; -- cgit v1.2.3-70-g09d2 From f54db39fbe40731c40aefdd3bc26e7d56d668c64 Mon Sep 17 00:00:00 2001 From: Greg Kurz <groug@kaod.org> Date: Mon, 30 Nov 2020 13:19:27 +0100 Subject: KVM: PPC: Book3S HV: XIVE: Fix vCPU id sanity check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 062cfab7069f ("KVM: PPC: Book3S HV: XIVE: Make VP block size configurable") updated kvmppc_xive_vcpu_id_valid() in a way that allows userspace to trigger an assertion in skiboot and crash the host: [ 696.186248988,3] XIVE[ IC 08 ] eq_blk != vp_blk (0 vs. 1) for target 0x4300008c/0 [ 696.186314757,0] Assert fail: hw/xive.c:2370:0 [ 696.186342458,0] Aborting! xive-kvCPU 0043 Backtrace: S: 0000000031e2b8f0 R: 0000000030013840 .backtrace+0x48 S: 0000000031e2b990 R: 000000003001b2d0 ._abort+0x4c S: 0000000031e2ba10 R: 000000003001b34c .assert_fail+0x34 S: 0000000031e2ba90 R: 0000000030058984 .xive_eq_for_target.part.20+0xb0 S: 0000000031e2bb40 R: 0000000030059fdc .xive_setup_silent_gather+0x2c S: 0000000031e2bc20 R: 000000003005a334 .opal_xive_set_vp_info+0x124 S: 0000000031e2bd20 R: 00000000300051a4 opal_entry+0x134 --- OPAL call token: 0x8a caller R1: 0xc000001f28563850 --- XIVE maintains the interrupt context state of non-dispatched vCPUs in an internal VP structure. We allocate a bunch of those on startup to accommodate all possible vCPUs. Each VP has an id, that we derive from the vCPU id for efficiency: static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server) { return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server); } The KVM XIVE device used to allocate KVM_MAX_VCPUS VPs. This was limitting the number of concurrent VMs because the VP space is limited on the HW. Since most of the time, VMs run with a lot less vCPUs, commit 062cfab7069f ("KVM: PPC: Book3S HV: XIVE: Make VP block size configurable") gave the possibility for userspace to tune the size of the VP block through the KVM_DEV_XIVE_NR_SERVERS attribute. The check in kvmppc_pack_vcpu_id() was changed from cpu < KVM_MAX_VCPUS * xive->kvm->arch.emul_smt_mode to cpu < xive->nr_servers * xive->kvm->arch.emul_smt_mode The previous check was based on the fact that the VP block had KVM_MAX_VCPUS entries and that kvmppc_pack_vcpu_id() guarantees that packed vCPU ids are below KVM_MAX_VCPUS. We've changed the size of the VP block, but kvmppc_pack_vcpu_id() has nothing to do with it and it certainly doesn't ensure that the packed vCPU ids are below xive->nr_servers. kvmppc_xive_vcpu_id_valid() might thus return true when the VM was configured with a non-standard VSMT mode, even if the packed vCPU id is higher than what we expect. We end up using an unallocated VP id, which confuses OPAL. The assert in OPAL is probably abusive and should be converted to a regular error that the kernel can handle, but we shouldn't really use broken VP ids in the first place. Fix kvmppc_xive_vcpu_id_valid() so that it checks the packed vCPU id is below xive->nr_servers, which is explicitly what we want. Fixes: 062cfab7069f ("KVM: PPC: Book3S HV: XIVE: Make VP block size configurable") Cc: stable@vger.kernel.org # v5.5+ Signed-off-by: Greg Kurz <groug@kaod.org> Reviewed-by: Cédric Le Goater <clg@kaod.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/160673876747.695514.1809676603724514920.stgit@bahia.lan --- arch/powerpc/kvm/book3s_xive.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 85215e79db42..a0ebc29f30b2 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1214,12 +1214,9 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu) { /* We have a block of xive->nr_servers VPs. We just need to check - * raw vCPU ids are below the expected limit for this guest's - * core stride ; kvmppc_pack_vcpu_id() will pack them down to an - * index that can be safely used to compute a VP id that belongs - * to the VP block. + * packed vCPU ids are below that. */ - return cpu < xive->nr_servers * xive->kvm->arch.emul_smt_mode; + return kvmppc_pack_vcpu_id(xive->kvm, cpu) < xive->nr_servers; } int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp) -- cgit v1.2.3-70-g09d2 From 59612b24f78a0b61fe078ec9dff2e48e9cec52c0 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor <natechancellor@gmail.com> Date: Thu, 19 Nov 2020 13:46:56 -0700 Subject: kbuild: Hoist '--orphan-handling' into Kconfig Currently, '--orphan-handling=warn' is spread out across four different architectures in their respective Makefiles, which makes it a little unruly to deal with in case it needs to be disabled for a specific linker version (in this case, ld.lld 10.0.1). To make it easier to control this, hoist this warning into Kconfig and the main Makefile so that disabling it is simpler, as the warning will only be enabled in a couple places (main Makefile and a couple of compressed boot folders that blow away LDFLAGS_vmlinx) and making it conditional is easier due to Kconfig syntax. One small additional benefit of this is saving a call to ld-option on incremental builds because we will have already evaluated it for CONFIG_LD_ORPHAN_WARN. To keep the list of supported architectures the same, introduce CONFIG_ARCH_WANT_LD_ORPHAN_WARN, which an architecture can select to gain this automatically after all of the sections are specified and size asserted. A special thanks to Kees Cook for the help text on this config. Link: https://github.com/ClangBuiltLinux/linux/issues/1187 Acked-by: Kees Cook <keescook@chromium.org> Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc) Reviewed-by: Nick Desaulniers <ndesaulniers@google.com> Tested-by: Nick Desaulniers <ndesaulniers@google.com> Signed-off-by: Nathan Chancellor <natechancellor@gmail.com> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org> --- Makefile | 6 ++++++ arch/Kconfig | 9 +++++++++ arch/arm/Kconfig | 1 + arch/arm/Makefile | 4 ---- arch/arm/boot/compressed/Makefile | 4 +++- arch/arm64/Kconfig | 1 + arch/arm64/Makefile | 4 ---- arch/powerpc/Kconfig | 1 + arch/powerpc/Makefile | 1 - arch/x86/Kconfig | 1 + arch/x86/Makefile | 3 --- arch/x86/boot/compressed/Makefile | 4 +++- init/Kconfig | 5 +++++ 13 files changed, 30 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/Makefile b/Makefile index ae1592c1f5d6..8327725e5d76 100644 --- a/Makefile +++ b/Makefile @@ -986,6 +986,12 @@ ifeq ($(CONFIG_RELR),y) LDFLAGS_vmlinux += --pack-dyn-relocs=relr endif +# We never want expected sections to be placed heuristically by the +# linker. All sections should be explicitly named in the linker script. +ifdef CONFIG_LD_ORPHAN_WARN +LDFLAGS_vmlinux += --orphan-handling=warn +endif + # Align the bit size of userspace programs with the kernel KBUILD_USERCFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS)) KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS)) diff --git a/arch/Kconfig b/arch/Kconfig index 56b6ccc0e32d..ba4e966484ab 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1028,6 +1028,15 @@ config HAVE_STATIC_CALL_INLINE bool depends on HAVE_STATIC_CALL +config ARCH_WANT_LD_ORPHAN_WARN + bool + help + An arch should select this symbol once all linker sections are explicitly + included, size-asserted, or discarded in the linker scripts. This is + important because we never want expected sections to be placed heuristically + by the linker, since the locations of such sections can change between linker + versions. + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fe2f17eb2b50..002e0cf025f5 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -35,6 +35,7 @@ config ARM select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_IPC_PARSE_VERSION + select ARCH_WANT_LD_ORPHAN_WARN select BINFMT_FLAT_ARGVP_ENVP_ON_STACK select BUILDTIME_TABLE_SORT if MMU select CLONE_BACKWARDS diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 4d76eab2b22d..e15f76ca2887 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -16,10 +16,6 @@ LDFLAGS_vmlinux += --be8 KBUILD_LDFLAGS_MODULE += --be8 endif -# We never want expected sections to be placed heuristically by the -# linker. All sections should be explicitly named in the linker script. -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) - GZFLAGS :=-9 #KBUILD_CFLAGS +=-pipe diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 47f001ca5499..e1567418a2b1 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -129,7 +129,9 @@ LDFLAGS_vmlinux += --no-undefined # Delete all temporary local symbols LDFLAGS_vmlinux += -X # Report orphan sections -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) +ifdef CONFIG_LD_ORPHAN_WARN +LDFLAGS_vmlinux += --orphan-handling=warn +endif # Next argument is a linker script LDFLAGS_vmlinux += -T diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1515f6f153a0..a6b5b7ef40ae 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -81,6 +81,7 @@ config ARM64 select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) + select ARCH_WANT_LD_ORPHAN_WARN select ARCH_HAS_UBSAN_SANITIZE_ALL select ARM_AMBA select ARM_ARCH_TIMER diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 5789c2d18d43..6a87d592bd00 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -28,10 +28,6 @@ LDFLAGS_vmlinux += --fix-cortex-a53-843419 endif endif -# We never want expected sections to be placed heuristically by the -# linker. All sections should be explicitly named in the linker script. -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) - ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS), y) ifneq ($(CONFIG_ARM64_LSE_ATOMICS), y) $(warning LSE atomics not supported by binutils) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e9f13fe08492..5181872f9452 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -152,6 +152,7 @@ config PPC select ARCH_USE_QUEUED_SPINLOCKS if PPC_QUEUED_SPINLOCKS select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WANT_IRQS_OFF_ACTIVATE_MM + select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WEAK_RELEASE_ACQUIRE select BINFMT_ELF select BUILDTIME_TABLE_SORT diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index a4d56f0a41d9..d9eb0da845e1 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -123,7 +123,6 @@ endif LDFLAGS_vmlinux-y := -Bstatic LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y) -LDFLAGS_vmlinux += $(call ld-option,--orphan-handling=warn) ifdef CONFIG_PPC64 ifeq ($(call cc-option-yn,-mcmodel=medium),y) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f6946b81f74a..fbf26e0f7a6a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -100,6 +100,7 @@ config X86 select ARCH_WANT_DEFAULT_BPF_JIT if X86_64 select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANT_HUGE_PMD_SHARE + select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANTS_THP_SWAP if X86_64 select BUILDTIME_TABLE_SORT select CLKEVT_I8253 diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 154259f18b8b..1bf21746f4ce 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -209,9 +209,6 @@ ifdef CONFIG_X86_64 LDFLAGS_vmlinux += -z max-page-size=0x200000 endif -# We never want expected sections to be placed heuristically by the -# linker. All sections should be explicitly named in the linker script. -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/x86/tools relocs diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index ee249088cbfe..40b8fd375d52 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -61,7 +61,9 @@ KBUILD_LDFLAGS += $(call ld-option,--no-ld-generated-unwind-info) # Compressed kernel should be built as PIE since it may be loaded at any # address by the bootloader. LDFLAGS_vmlinux := -pie $(call ld-option, --no-dynamic-linker) -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) +ifdef CONFIG_LD_ORPHAN_WARN +LDFLAGS_vmlinux += --orphan-handling=warn +endif LDFLAGS_vmlinux += -T hostprogs := mkpiggy diff --git a/init/Kconfig b/init/Kconfig index c9446911cf41..92c58b45abb8 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1348,6 +1348,11 @@ config LD_DEAD_CODE_DATA_ELIMINATION present. This option is not well tested yet, so use at your own risk. +config LD_ORPHAN_WARN + def_bool y + depends on ARCH_WANT_LD_ORPHAN_WARN + depends on $(ld-option,--orphan-handling=warn) + config SYSCTL bool -- cgit v1.2.3-70-g09d2 From fae3a13d2a3d49a89391889808428cf1e72afbd7 Mon Sep 17 00:00:00 2001 From: Babu Moger <babu.moger@amd.com> Date: Mon, 30 Nov 2020 09:57:20 -0600 Subject: x86/resctrl: Fix AMD L3 QOS CDP enable/disable When the AMD QoS feature CDP (code and data prioritization) is enabled or disabled, the CDP bit in MSR 0000_0C81 is written on one of the CPUs in an L3 domain (core complex). That is not correct - the CDP bit needs to be updated on all the logical CPUs in the domain. This was not spelled out clearly in the spec earlier. The specification has been updated and the updated document, "AMD64 Technology Platform Quality of Service Extensions Publication # 56375 Revision: 1.02 Issue Date: October 2020" is available now. Refer the section: Code and Data Prioritization. Fix the issue by adding a new flag arch_has_per_cpu_cfg in rdt_cache data structure. The documentation can be obtained at: https://developer.amd.com/wp-content/resources/56375.pdf Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 [ bp: Massage commit message. ] Fixes: 4d05bf71f157 ("x86/resctrl: Introduce AMD QOS feature") Signed-off-by: Babu Moger <babu.moger@amd.com> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Reinette Chatre <reinette.chatre@intel.com> Link: https://lkml.kernel.org/r/160675180380.15628.3309402017215002347.stgit@bmoger-ubuntu --- arch/x86/kernel/cpu/resctrl/core.c | 4 ++++ arch/x86/kernel/cpu/resctrl/internal.h | 3 +++ arch/x86/kernel/cpu/resctrl/rdtgroup.c | 9 +++++++-- 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index e5f4ee8f4c3b..e8b5f1cf1ae8 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -570,6 +570,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) if (d) { cpumask_set_cpu(cpu, &d->cpu_mask); + if (r->cache.arch_has_per_cpu_cfg) + rdt_domain_reconfigure_cdp(r); return; } @@ -923,6 +925,7 @@ static __init void rdt_init_res_defs_intel(void) r->rid == RDT_RESOURCE_L2CODE) { r->cache.arch_has_sparse_bitmaps = false; r->cache.arch_has_empty_bitmaps = false; + r->cache.arch_has_per_cpu_cfg = false; } else if (r->rid == RDT_RESOURCE_MBA) { r->msr_base = MSR_IA32_MBA_THRTL_BASE; r->msr_update = mba_wrmsr_intel; @@ -943,6 +946,7 @@ static __init void rdt_init_res_defs_amd(void) r->rid == RDT_RESOURCE_L2CODE) { r->cache.arch_has_sparse_bitmaps = true; r->cache.arch_has_empty_bitmaps = true; + r->cache.arch_has_per_cpu_cfg = true; } else if (r->rid == RDT_RESOURCE_MBA) { r->msr_base = MSR_IA32_MBA_BW_BASE; r->msr_update = mba_wrmsr_amd; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 80fa997fae60..f65d3c0dbc41 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -360,6 +360,8 @@ struct msr_param { * executing entities * @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid. * @arch_has_empty_bitmaps: True if the '0' bitmap is valid. + * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache + * level has CPU scope. */ struct rdt_cache { unsigned int cbm_len; @@ -369,6 +371,7 @@ struct rdt_cache { unsigned int shareable_bits; bool arch_has_sparse_bitmaps; bool arch_has_empty_bitmaps; + bool arch_has_per_cpu_cfg; }; /** diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 6f4ca4bea625..f3418428682b 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1909,8 +1909,13 @@ static int set_cache_qos_cfg(int level, bool enable) r_l = &rdt_resources_all[level]; list_for_each_entry(d, &r_l->domains, list) { - /* Pick one CPU from each domain instance to update MSR */ - cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); + if (r_l->cache.arch_has_per_cpu_cfg) + /* Pick all the CPUs in the domain instance */ + for_each_cpu(cpu, &d->cpu_mask) + cpumask_set_cpu(cpu, cpu_mask); + else + /* Pick one CPU from each domain instance to update MSR */ + cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); } cpu = get_cpu(); /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */ -- cgit v1.2.3-70-g09d2 From 5c646b7e1d8bcb12317426287c516dfa4c5171c2 Mon Sep 17 00:00:00 2001 From: Yanan Wang <wangyanan55@huawei.com> Date: Wed, 2 Dec 2020 04:10:32 +0800 Subject: KVM: arm64: Fix memory leak on stage2 update of a valid PTE When installing a new leaf PTE onto an invalid ptep, we need to get_page(ptep) to account for the new mapping. However, simply updating a valid PTE shouldn't result in any additional refcounting, as there is new mapping. This otherwise results in a page being forever wasted. Address this by fixing-up the refcount in stage2_map_walker_try_leaf() if the PTE was already valid, balancing out the later get_page() in stage2_map_walk_leaf(). Signed-off-by: Yanan Wang <wangyanan55@huawei.com> [maz: update commit message, add comment in the code] Signed-off-by: Marc Zyngier <maz@kernel.org> Acked-by: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/r/20201201201034.116760-2-wangyanan55@huawei.com --- arch/arm64/kvm/hyp/pgtable.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 0271b4a3b9fe..2beba1dc40ec 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -470,6 +470,15 @@ static bool stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, if (!kvm_block_mapping_supported(addr, end, phys, level)) return false; + /* + * If the PTE was already valid, drop the refcount on the table + * early, as it will be bumped-up again in stage2_map_walk_leaf(). + * This ensures that the refcount stays constant across a valid to + * valid PTE update. + */ + if (kvm_pte_valid(*ptep)) + put_page(virt_to_page(ptep)); + if (kvm_set_valid_leaf_pte(ptep, phys, data->attr, level)) goto out; -- cgit v1.2.3-70-g09d2 From 3a0b870e3448302ca2ba703bea1b79b61c3f33c6 Mon Sep 17 00:00:00 2001 From: Yanan Wang <wangyanan55@huawei.com> Date: Wed, 2 Dec 2020 04:10:33 +0800 Subject: KVM: arm64: Fix handling of merging tables into a block entry When dirty logging is enabled, we collapse block entries into tables as necessary. If dirty logging gets canceled, we can end-up merging tables back into block entries. When this happens, we must not only free the non-huge page-table pages but also invalidate all the TLB entries that can potentially cover the block. Otherwise, we end-up with multiple possible translations for the same physical page, which can legitimately result in a TLB conflict. To address this, replease the bogus invalidation by IPA with a full VM invalidation. Although this is pretty heavy handed, it happens very infrequently and saves a bunch of invalidations by IPA. Signed-off-by: Yanan Wang <wangyanan55@huawei.com> [maz: fixup commit message] Signed-off-by: Marc Zyngier <maz@kernel.org> Acked-by: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/r/20201201201034.116760-3-wangyanan55@huawei.com --- arch/arm64/kvm/hyp/pgtable.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 2beba1dc40ec..bdf8e55ed308 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -502,7 +502,13 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, return 0; kvm_set_invalid_pte(ptep); - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, 0); + + /* + * Invalidate the whole stage-2, as we may have numerous leaf + * entries below us which would otherwise need invalidating + * individually. + */ + kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu); data->anchor = ptep; return 0; } -- cgit v1.2.3-70-g09d2 From 7d894834a305568a0168c55d4729216f5f8cb4e6 Mon Sep 17 00:00:00 2001 From: Yanan Wang <wangyanan55@huawei.com> Date: Wed, 2 Dec 2020 04:10:34 +0800 Subject: KVM: arm64: Add usage of stage 2 fault lookup level in user_mem_abort() If we get a FSC_PERM fault, just using (logging_active && writable) to determine calling kvm_pgtable_stage2_map(). There will be two more cases we should consider. (1) After logging_active is configged back to false from true. When we get a FSC_PERM fault with write_fault and adjustment of hugepage is needed, we should merge tables back to a block entry. This case is ignored by still calling kvm_pgtable_stage2_relax_perms(), which will lead to an endless loop and guest panic due to soft lockup. (2) We use (FSC_PERM && logging_active && writable) to determine collapsing a block entry into a table by calling kvm_pgtable_stage2_map(). But sometimes we may only need to relax permissions when trying to write to a page other than a block. In this condition,using kvm_pgtable_stage2_relax_perms() will be fine. The ISS filed bit[1:0] in ESR_EL2 regesiter indicates the stage2 lookup level at which a D-abort or I-abort occurred. By comparing granule of the fault lookup level with vma_pagesize, we can strictly distinguish conditions of calling kvm_pgtable_stage2_relax_perms() or kvm_pgtable_stage2_map(), and the above two cases will be well considered. Suggested-by: Keqian Zhu <zhukeqian1@huawei.com> Signed-off-by: Yanan Wang <wangyanan55@huawei.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Acked-by: Will Deacon <will@kernel.org> Link: https://lore.kernel.org/r/20201201201034.116760-4-wangyanan55@huawei.com --- arch/arm64/include/asm/esr.h | 1 + arch/arm64/include/asm/kvm_emulate.h | 5 +++++ arch/arm64/kvm/mmu.c | 11 +++++++++-- 3 files changed, 15 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 22c81f1edda2..85a3e49f92f4 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -104,6 +104,7 @@ /* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */ #define ESR_ELx_FSC (0x3F) #define ESR_ELx_FSC_TYPE (0x3C) +#define ESR_ELx_FSC_LEVEL (0x03) #define ESR_ELx_FSC_EXTABT (0x10) #define ESR_ELx_FSC_SERROR (0x11) #define ESR_ELx_FSC_ACCESS (0x08) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5ef2669ccd6c..00bc6f1234ba 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -350,6 +350,11 @@ static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vc return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE; } +static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL; +} + static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) { switch (kvm_vcpu_trap_get_fault(vcpu)) { diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 1a01da9fdc99..75814a02d189 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -754,10 +754,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn_t gfn; kvm_pfn_t pfn; bool logging_active = memslot_is_logging(memslot); - unsigned long vma_pagesize; + unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu); + unsigned long vma_pagesize, fault_granule; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; struct kvm_pgtable *pgt; + fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level); write_fault = kvm_is_write_fault(vcpu); exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); VM_BUG_ON(write_fault && exec_fault); @@ -896,7 +898,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, else if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) prot |= KVM_PGTABLE_PROT_X; - if (fault_status == FSC_PERM && !(logging_active && writable)) { + /* + * Under the premise of getting a FSC_PERM fault, we just need to relax + * permissions only if vma_pagesize equals fault_granule. Otherwise, + * kvm_pgtable_stage2_map() should be called to change block size. + */ + if (fault_status == FSC_PERM && vma_pagesize == fault_granule) { ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); } else { ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, -- cgit v1.2.3-70-g09d2 From a1ee28117077c3bf24e5ab6324c835eaab629c45 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin <npiggin@gmail.com> Date: Sat, 28 Nov 2020 17:07:21 +1000 Subject: powerpc/64s/powernv: Fix memory corruption when saving SLB entries on MCE This can be hit by an HPT guest running on an HPT host and bring down the host, so it's quite important to fix. Fixes: 7290f3b3d3e6 ("powerpc/64s/powernv: machine check dump SLB contents") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Acked-by: Mahesh Salgaonkar <mahesh@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20201128070728.825934-2-npiggin@gmail.com --- arch/powerpc/platforms/powernv/setup.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 46115231a3b2..4426a109ec2f 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -211,11 +211,16 @@ static void __init pnv_init(void) add_preferred_console("hvc", 0, NULL); if (!radix_enabled()) { + size_t size = sizeof(struct slb_entry) * mmu_slb_size; int i; /* Allocate per cpu area to save old slb contents during MCE */ - for_each_possible_cpu(i) - paca_ptrs[i]->mce_faulty_slbs = memblock_alloc_node(mmu_slb_size, __alignof__(*paca_ptrs[i]->mce_faulty_slbs), cpu_to_node(i)); + for_each_possible_cpu(i) { + paca_ptrs[i]->mce_faulty_slbs = + memblock_alloc_node(size, + __alignof__(struct slb_entry), + cpu_to_node(i)); + } } } -- cgit v1.2.3-70-g09d2 From a2bd4097b3ec242f4de4924db463a9c94530e03a Mon Sep 17 00:00:00 2001 From: Alexander Gordeev <agordeev@linux.ibm.com> Date: Thu, 26 Nov 2020 18:00:37 +0100 Subject: s390/pci: fix CPU address in MSI for directed IRQ The directed MSIs are delivered to CPUs whose address is written to the MSI message address. The current code assumes that a CPU logical number (as it is seen by the kernel) is also the CPU address. The above assumption is not correct, as the CPU address is rather the value returned by STAP instruction. That value does not necessarily match the kernel logical CPU number. Fixes: e979ce7bced2 ("s390/pci: provide support for CPU directed interrupts") Cc: <stable@vger.kernel.org> # v5.2+ Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com> Reviewed-by: Halil Pasic <pasic@linux.ibm.com> Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com> --- arch/s390/pci/pci_irq.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 743f257cf2cb..75217fb63d7b 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -103,9 +103,10 @@ static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *de { struct msi_desc *entry = irq_get_msi_desc(data->irq); struct msi_msg msg = entry->msg; + int cpu_addr = smp_cpu_get_cpu_address(cpumask_first(dest)); msg.address_lo &= 0xff0000ff; - msg.address_lo |= (cpumask_first(dest) << 8); + msg.address_lo |= (cpu_addr << 8); pci_write_msi_msg(data->irq, &msg); return IRQ_SET_MASK_OK; @@ -238,6 +239,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) unsigned long bit; struct msi_desc *msi; struct msi_msg msg; + int cpu_addr; int rc, irq; zdev->aisb = -1UL; @@ -287,9 +289,15 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) handle_percpu_irq); msg.data = hwirq - bit; if (irq_delivery == DIRECTED) { + if (msi->affinity) + cpu = cpumask_first(&msi->affinity->mask); + else + cpu = 0; + cpu_addr = smp_cpu_get_cpu_address(cpu); + msg.address_lo = zdev->msi_addr & 0xff0000ff; - msg.address_lo |= msi->affinity ? - (cpumask_first(&msi->affinity->mask) << 8) : 0; + msg.address_lo |= (cpu_addr << 8); + for_each_possible_cpu(cpu) { airq_iv_set_data(zpci_ibv[cpu], hwirq, irq); } -- cgit v1.2.3-70-g09d2 From b1cae1f84a0f609a34ebcaa087fbecef32f69882 Mon Sep 17 00:00:00 2001 From: Heiko Carstens <hca@linux.ibm.com> Date: Wed, 2 Dec 2020 11:46:01 +0100 Subject: s390: fix irq state tracing With commit 58c644ba512c ("sched/idle: Fix arch_cpu_idle() vs tracing") common code calls arch_cpu_idle() with a lockdep state that tells irqs are on. This doesn't work very well for s390: psw_idle() will enable interrupts to wait for an interrupt. As soon as an interrupt occurs the interrupt handler will verify if the old context was psw_idle(). If that is the case the interrupt enablement bits in the old program status word will be cleared. A subsequent test in both the external as well as the io interrupt handler checks if in the old context interrupts were enabled. Due to the above patching of the old program status word it is assumed the old context had interrupts disabled, and therefore a call to TRACE_IRQS_OFF (aka trace_hardirqs_off_caller) is skipped. Which in turn makes lockdep incorrectly "think" that interrupts are enabled within the interrupt handler. Fix this by unconditionally calling TRACE_IRQS_OFF when entering interrupt handlers. Also call unconditionally TRACE_IRQS_ON when leaving interrupts handlers. This leaves the special psw_idle() case, which now returns with interrupts disabled, but has an "irqs on" lockdep state. So callers of psw_idle() must adjust the state on their own, if required. This is currently only __udelay_disabled(). Fixes: 58c644ba512c ("sched/idle: Fix arch_cpu_idle() vs tracing") Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Heiko Carstens <hca@linux.ibm.com> --- arch/s390/kernel/entry.S | 15 --------------- arch/s390/lib/delay.c | 5 ++--- 2 files changed, 2 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 26bb0603c5a1..92beb1444644 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -763,12 +763,7 @@ ENTRY(io_int_handler) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ jo .Lio_restore -#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS) - tmhh %r8,0x300 - jz 1f TRACE_IRQS_OFF -1: -#endif xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) .Lio_loop: lgr %r2,%r11 # pass pointer to pt_regs @@ -791,12 +786,7 @@ ENTRY(io_int_handler) TSTMSK __LC_CPU_FLAGS,_CIF_WORK jnz .Lio_work .Lio_restore: -#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS) - tm __PT_PSW(%r11),3 - jno 0f TRACE_IRQS_ON -0: -#endif mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) tm __PT_PSW+1(%r11),0x01 # returning to user ? jno .Lio_exit_kernel @@ -976,12 +966,7 @@ ENTRY(ext_int_handler) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ jo .Lio_restore -#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS) - tmhh %r8,0x300 - jz 1f TRACE_IRQS_OFF -1: -#endif xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs lghi %r3,EXT_INTERRUPT diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index daca7bad66de..8c0c68e7770e 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL(__delay); static void __udelay_disabled(unsigned long long usecs) { - unsigned long cr0, cr0_new, psw_mask, flags; + unsigned long cr0, cr0_new, psw_mask; struct s390_idle_data idle; u64 end; @@ -45,9 +45,8 @@ static void __udelay_disabled(unsigned long long usecs) psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT; set_clock_comparator(end); set_cpu_flag(CIF_IGNORE_IRQ); - local_irq_save(flags); psw_idle(&idle, psw_mask); - local_irq_restore(flags); + trace_hardirqs_off(); clear_cpu_flag(CIF_IGNORE_IRQ); set_clock_comparator(S390_lowcore.clock_comparator); __ctl_load(cr0, 0, 0); -- cgit v1.2.3-70-g09d2 From 5debf02131227d39988e44adf5090fb796fa8466 Mon Sep 17 00:00:00 2001 From: Namhyung Kim <namhyung@kernel.org> Date: Thu, 26 Nov 2020 20:09:21 +0900 Subject: perf/x86/intel: Fix a warning on x86_pmu_stop() with large PEBS The commit 3966c3feca3f ("x86/perf/amd: Remove need to check "running" bit in NMI handler") introduced this. It seems x86_pmu_stop can be called recursively (like when it losts some samples) like below: x86_pmu_stop intel_pmu_disable_event (x86_pmu_disable) intel_pmu_pebs_disable intel_pmu_drain_pebs_nhm (x86_pmu_drain_pebs_buffer) x86_pmu_stop While commit 35d1ce6bec13 ("perf/x86/intel/ds: Fix x86_pmu_stop warning for large PEBS") fixed it for the normal cases, there's another path to call x86_pmu_stop() recursively when a PEBS error was detected (like two or more counters overflowed at the same time). Like in the Kan's previous fix, we can skip the interrupt accounting for large PEBS, so check the iregs which is set for PMI only. Fixes: 3966c3feca3f ("x86/perf/amd: Remove need to check "running" bit in NMI handler") Reported-by: John Sperbeck <jsperbeck@google.com> Suggested-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20201126110922.317681-1-namhyung@kernel.org --- arch/x86/events/intel/ds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index b47cc4226934..89dba588636e 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1940,7 +1940,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d if (error[bit]) { perf_log_lost_samples(event, error[bit]); - if (perf_event_account_interrupt(event)) + if (iregs && perf_event_account_interrupt(event)) x86_pmu_stop(event, 0); } -- cgit v1.2.3-70-g09d2 From fc17db8aa4c53cbd2d5469bb0521ea0f0a6dbb27 Mon Sep 17 00:00:00 2001 From: Stephane Eranian <eranian@google.com> Date: Thu, 26 Nov 2020 20:09:22 +0900 Subject: perf/x86/intel: Check PEBS status correctly The kernel cannot disambiguate when 2+ PEBS counters overflow at the same time. This is what the comment for this code suggests. However, I see the comparison is done with the unfiltered p->status which is a copy of IA32_PERF_GLOBAL_STATUS at the time of the sample. This register contains more than the PEBS counter overflow bits. It also includes many other bits which could also be set. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Signed-off-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20201126110922.317681-2-namhyung@kernel.org --- arch/x86/events/intel/ds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 89dba588636e..485c5066f8b8 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1916,7 +1916,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d * that caused the PEBS record. It's called collision. * If collision happened, the record will be dropped. */ - if (p->status != (1ULL << bit)) { + if (pebs_status != (1ULL << bit)) { for_each_set_bit(i, (unsigned long *)&pebs_status, size) error[i]++; continue; -- cgit v1.2.3-70-g09d2 From 8dcc0e19dfbd73ad6b3172924d6da8f7f3f8b3b0 Mon Sep 17 00:00:00 2001 From: Mike Travis <mike.travis@hpe.com> Date: Thu, 3 Dec 2020 09:22:52 -0600 Subject: x86/platform/uv: Fix UV4 hub revision adjustment Currently, UV4 is incorrectly identified as UV4A and UV4A as UV5. Hub chip starts with revision 1, fix it. [ bp: Massage commit message. ] Fixes: 647128f1536e ("x86/platform/uv: Update UV MMRs for UV5") Signed-off-by: Mike Travis <mike.travis@hpe.com> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Steve Wahl <steve.wahl@hpe.com> Acked-by: Dimitri Sivanich <dimitri.sivanich@hpe.com> Link: https://lkml.kernel.org/r/20201203152252.371199-1-mike.travis@hpe.com --- arch/x86/kernel/apic/x2apic_uv_x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 1b98f8c12b96..235f5cde06fc 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -161,7 +161,7 @@ static int __init early_set_hub_type(void) /* UV4/4A only have a revision difference */ case UV4_HUB_PART_NUMBER: uv_min_hub_revision_id = node_id.s.revision - + UV4_HUB_REVISION_BASE; + + UV4_HUB_REVISION_BASE - 1; uv_hub_type_set(UV4); if (uv_min_hub_revision_id == UV4A_HUB_REVISION_BASE) uv_hub_type_set(UV4|UV4A); -- cgit v1.2.3-70-g09d2 From a2b2d4bf5076832339762556b816eec58ca38f77 Mon Sep 17 00:00:00 2001 From: Jacob Xu <jacobhxu@google.com> Date: Thu, 3 Dec 2020 12:59:39 -0800 Subject: kvm: svm: de-allocate svm_cpu_data for all cpus in svm_cpu_uninit() The cpu arg for svm_cpu_uninit() was previously ignored resulting in the per cpu structure svm_cpu_data not being de-allocated for all cpus. Signed-off-by: Jacob Xu <jacobhxu@google.com> Message-Id: <20201203205939.1783969-1-jacobhxu@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- arch/x86/kvm/svm/svm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 79b3a564f1c9..da7eb4aaf44f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -530,12 +530,12 @@ static int svm_hardware_enable(void) static void svm_cpu_uninit(int cpu) { - struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id()); + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); if (!sd) return; - per_cpu(svm_data, raw_smp_processor_id()) = NULL; + per_cpu(svm_data, cpu) = NULL; kfree(sd->sev_vmcbs); __free_page(sd->save_area); kfree(sd); -- cgit v1.2.3-70-g09d2 From 339f5a7fb2d6350fdb11f067da5240fd97e4f284 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe <rick.p.edgecombe@intel.com> Date: Thu, 3 Dec 2020 15:11:20 -0800 Subject: kvm: x86/mmu: Use cpuid to determine max gfn In the TDP MMU, use shadow_phys_bits to dermine the maximum possible GFN mapped in the guest for zapping operations. boot_cpu_data.x86_phys_bits may be reduced in the case of HW features that steal HPA bits for other purposes. However, this doesn't necessarily reduce GPA space that can be accessed via TDP. So zap based on a maximum gfn calculated with MAXPHYADDR retrieved from CPUID. This is already stored in shadow_phys_bits, so use it instead of x86_phys_bits. Fixes: faaf05b00aec ("kvm: x86/mmu: Support zapping SPTEs in the TDP MMU") Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com> Message-Id: <20201203231120.27307-1-rick.p.edgecombe@intel.com> Reviewed-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- arch/x86/kvm/mmu/tdp_mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index ff28a5c6abd6..84c8f06bec26 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -66,7 +66,7 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root) { - gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT); + gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); lockdep_assert_held(&kvm->mmu_lock); @@ -456,7 +456,7 @@ bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end) void kvm_tdp_mmu_zap_all(struct kvm *kvm) { - gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT); + gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); bool flush; flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn); -- cgit v1.2.3-70-g09d2 From 45c5775460f32ed8cdb7c16986ae1a2c254346b3 Mon Sep 17 00:00:00 2001 From: Linus Walleij <linus.walleij@linaro.org> Date: Mon, 30 Nov 2020 09:30:33 +0100 Subject: usb: ohci-omap: Fix descriptor conversion There were a bunch of issues with the patch converting the OMAP1 OSK board to use descriptors for controlling the USB host: - The chip label was incorrect - The GPIO offset was off-by-one - The code should use sleeping accessors This patch tries to fix all issues at the same time. Cc: Aaro Koskinen <aaro.koskinen@iki.fi> Reported-by: Aaro Koskinen <aaro.koskinen@iki.fi> Fixes: 15d157e87443 ("usb: ohci-omap: Convert to use GPIO descriptors") Signed-off-by: Linus Walleij <linus.walleij@linaro.org> Link: https://lore.kernel.org/r/20201130083033.29435-1-linus.walleij@linaro.org Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- arch/arm/mach-omap1/board-osk.c | 2 +- drivers/usb/host/ohci-omap.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c index 144b9caa935c..a720259099ed 100644 --- a/arch/arm/mach-omap1/board-osk.c +++ b/arch/arm/mach-omap1/board-osk.c @@ -288,7 +288,7 @@ static struct gpiod_lookup_table osk_usb_gpio_table = { .dev_id = "ohci", .table = { /* Power GPIO on the I2C-attached TPS65010 */ - GPIO_LOOKUP("i2c-tps65010", 1, "power", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("tps65010", 0, "power", GPIO_ACTIVE_HIGH), GPIO_LOOKUP(OMAP_GPIO_LABEL, 9, "overcurrent", GPIO_ACTIVE_HIGH), }, diff --git a/drivers/usb/host/ohci-omap.c b/drivers/usb/host/ohci-omap.c index 9ccdf2c216b5..6374501ba139 100644 --- a/drivers/usb/host/ohci-omap.c +++ b/drivers/usb/host/ohci-omap.c @@ -91,14 +91,14 @@ static int omap_ohci_transceiver_power(struct ohci_omap_priv *priv, int on) | ((1 << 5/*usb1*/) | (1 << 3/*usb2*/)), INNOVATOR_FPGA_CAM_USB_CONTROL); else if (priv->power) - gpiod_set_value(priv->power, 0); + gpiod_set_value_cansleep(priv->power, 0); } else { if (machine_is_omap_innovator() && cpu_is_omap1510()) __raw_writeb(__raw_readb(INNOVATOR_FPGA_CAM_USB_CONTROL) & ~((1 << 5/*usb1*/) | (1 << 3/*usb2*/)), INNOVATOR_FPGA_CAM_USB_CONTROL); else if (priv->power) - gpiod_set_value(priv->power, 1); + gpiod_set_value_cansleep(priv->power, 1); } return 0; -- cgit v1.2.3-70-g09d2 From 4e9a5ae8df5b3365183150f6df49e49dece80d8c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu <mhiramat@kernel.org> Date: Thu, 3 Dec 2020 13:50:37 +0900 Subject: x86/uprobes: Do not use prefixes.nbytes when looping over prefixes.bytes Since insn.prefixes.nbytes can be bigger than the size of insn.prefixes.bytes[] when a prefix is repeated, the proper check must be insn.prefixes.bytes[i] != 0 and i < 4 instead of using insn.prefixes.nbytes. Introduce a for_each_insn_prefix() macro for this purpose. Debugged by Kees Cook <keescook@chromium.org>. [ bp: Massage commit message, sync with the respective header in tools/ and drop "we". ] Fixes: 2b1444983508 ("uprobes, mm, x86: Add the ability to install and remove uprobes breakpoints") Reported-by: syzbot+9b64b619f10f19d19a7c@syzkaller.appspotmail.com Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/160697103739.3146288.7437620795200799020.stgit@devnote2 --- arch/x86/include/asm/insn.h | 15 +++++++++++++++ arch/x86/kernel/uprobes.c | 10 ++++++---- tools/arch/x86/include/asm/insn.h | 15 +++++++++++++++ 3 files changed, 36 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 5c1ae3eff9d4..a8c3d284fa46 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -201,6 +201,21 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +/** + * for_each_insn_prefix() -- Iterate prefixes in the instruction + * @insn: Pointer to struct insn. + * @idx: Index storage. + * @prefix: Prefix byte. + * + * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix + * and the index is stored in @idx (note that this @idx is just for a cursor, + * do not change it.) + * Since prefixes.nbytes can be bigger than 4 if some prefixes + * are repeated, it cannot be used for looping over the prefixes. + */ +#define for_each_insn_prefix(insn, idx, prefix) \ + for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++) + #define POP_SS_OPCODE 0x1f #define MOV_SREG_OPCODE 0x8e diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 3fdaa042823d..138bdb1fd136 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -255,12 +255,13 @@ static volatile u32 good_2byte_insns[256 / 32] = { static bool is_prefix_bad(struct insn *insn) { + insn_byte_t p; int i; - for (i = 0; i < insn->prefixes.nbytes; i++) { + for_each_insn_prefix(insn, i, p) { insn_attr_t attr; - attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]); + attr = inat_get_opcode_attribute(p); switch (attr) { case INAT_MAKE_PREFIX(INAT_PFX_ES): case INAT_MAKE_PREFIX(INAT_PFX_CS): @@ -715,6 +716,7 @@ static const struct uprobe_xol_ops push_xol_ops = { static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) { u8 opc1 = OPCODE1(insn); + insn_byte_t p; int i; switch (opc1) { @@ -746,8 +748,8 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix. * No one uses these insns, reject any branch insns with such prefix. */ - for (i = 0; i < insn->prefixes.nbytes; i++) { - if (insn->prefixes.bytes[i] == 0x66) + for_each_insn_prefix(insn, i, p) { + if (p == 0x66) return -ENOTSUPP; } diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h index 568854b14d0a..52c6262e6bfd 100644 --- a/tools/arch/x86/include/asm/insn.h +++ b/tools/arch/x86/include/asm/insn.h @@ -201,6 +201,21 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +/** + * for_each_insn_prefix() -- Iterate prefixes in the instruction + * @insn: Pointer to struct insn. + * @idx: Index storage. + * @prefix: Prefix byte. + * + * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix + * and the index is stored in @idx (note that this @idx is just for a cursor, + * do not change it.) + * Since prefixes.nbytes can be bigger than 4 if some prefixes + * are repeated, it cannot be used for looping over the prefixes. + */ +#define for_each_insn_prefix(insn, idx, prefix) \ + for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++) + #define POP_SS_OPCODE 0x1f #define MOV_SREG_OPCODE 0x8e -- cgit v1.2.3-70-g09d2 From 12cb908a11b2544b5f53e9af856e6b6a90ed5533 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu <mhiramat@kernel.org> Date: Thu, 3 Dec 2020 13:50:50 +0900 Subject: x86/insn-eval: Use new for_each_insn_prefix() macro to loop over prefixes bytes Since insn.prefixes.nbytes can be bigger than the size of insn.prefixes.bytes[] when a prefix is repeated, the proper check must be insn.prefixes.bytes[i] != 0 and i < 4 instead of using insn.prefixes.nbytes. Use the new for_each_insn_prefix() macro which does it correctly. Debugged by Kees Cook <keescook@chromium.org>. [ bp: Massage commit message. ] Fixes: 32d0b95300db ("x86/insn-eval: Add utility functions to get segment selector") Reported-by: syzbot+9b64b619f10f19d19a7c@syzkaller.appspotmail.com Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org> Signed-off-by: Borislav Petkov <bp@suse.de> Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/160697104969.3146288.16329307586428270032.stgit@devnote2 --- arch/x86/lib/insn-eval.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 58f7fb95c7f4..4229950a5d78 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -63,13 +63,12 @@ static bool is_string_insn(struct insn *insn) */ bool insn_has_rep_prefix(struct insn *insn) { + insn_byte_t p; int i; insn_get_prefixes(insn); - for (i = 0; i < insn->prefixes.nbytes; i++) { - insn_byte_t p = insn->prefixes.bytes[i]; - + for_each_insn_prefix(insn, i, p) { if (p == 0xf2 || p == 0xf3) return true; } @@ -95,14 +94,15 @@ static int get_seg_reg_override_idx(struct insn *insn) { int idx = INAT_SEG_REG_DEFAULT; int num_overrides = 0, i; + insn_byte_t p; insn_get_prefixes(insn); /* Look for any segment override prefixes. */ - for (i = 0; i < insn->prefixes.nbytes; i++) { + for_each_insn_prefix(insn, i, p) { insn_attr_t attr; - attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]); + attr = inat_get_opcode_attribute(p); switch (attr) { case INAT_MAKE_PREFIX(INAT_PFX_CS): idx = INAT_SEG_REG_CS; -- cgit v1.2.3-70-g09d2 From 84da009f06e60cf59d5e861f8e2101d2d3885517 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu <mhiramat@kernel.org> Date: Thu, 3 Dec 2020 13:51:01 +0900 Subject: x86/sev-es: Use new for_each_insn_prefix() macro to loop over prefixes bytes Since insn.prefixes.nbytes can be bigger than the size of insn.prefixes.bytes[] when a prefix is repeated, the proper check must be: insn.prefixes.bytes[i] != 0 and i < 4 instead of using insn.prefixes.nbytes. Use the new for_each_insn_prefix() macro which does it correctly. Debugged by Kees Cook <keescook@chromium.org>. [ bp: Massage commit message. ] Fixes: 25189d08e516 ("x86/sev-es: Add support for handling IOIO exceptions") Reported-by: syzbot+9b64b619f10f19d19a7c@syzkaller.appspotmail.com Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org> Signed-off-by: Borislav Petkov <bp@suse.de> Link: https://lkml.kernel.org/r/160697106089.3146288.2052422845039649176.stgit@devnote2 --- arch/x86/boot/compressed/sev-es.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c index 954cb2702e23..27826c265aab 100644 --- a/arch/x86/boot/compressed/sev-es.c +++ b/arch/x86/boot/compressed/sev-es.c @@ -32,13 +32,12 @@ struct ghcb *boot_ghcb; */ static bool insn_has_rep_prefix(struct insn *insn) { + insn_byte_t p; int i; insn_get_prefixes(insn); - for (i = 0; i < insn->prefixes.nbytes; i++) { - insn_byte_t p = insn->prefixes.bytes[i]; - + for_each_insn_prefix(insn, i, p) { if (p == 0xf2 || p == 0xf3) return true; } -- cgit v1.2.3-70-g09d2 From e91d8d78237de8d7120c320b3645b7100848f24d Mon Sep 17 00:00:00 2001 From: Minchan Kim <minchan@kernel.org> Date: Sat, 5 Dec 2020 22:14:51 -0800 Subject: mm/zsmalloc.c: drop ZSMALLOC_PGTABLE_MAPPING While I was doing zram testing, I found sometimes decompression failed since the compression buffer was corrupted. With investigation, I found below commit calls cond_resched unconditionally so it could make a problem in atomic context if the task is reschedule. BUG: sleeping function called from invalid context at mm/vmalloc.c:108 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 946, name: memhog 3 locks held by memhog/946: #0: ffff9d01d4b193e8 (&mm->mmap_lock#2){++++}-{4:4}, at: __mm_populate+0x103/0x160 #1: ffffffffa3d53de0 (fs_reclaim){+.+.}-{0:0}, at: __alloc_pages_slowpath.constprop.0+0xa98/0x1160 #2: ffff9d01d56b8110 (&zspage->lock){.+.+}-{3:3}, at: zs_map_object+0x8e/0x1f0 CPU: 0 PID: 946 Comm: memhog Not tainted 5.9.3-00011-gc5bfc0287345-dirty #316 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014 Call Trace: unmap_kernel_range_noflush+0x2eb/0x350 unmap_kernel_range+0x14/0x30 zs_unmap_object+0xd5/0xe0 zram_bvec_rw.isra.0+0x38c/0x8e0 zram_rw_page+0x90/0x101 bdev_write_page+0x92/0xe0 __swap_writepage+0x94/0x4a0 pageout+0xe3/0x3a0 shrink_page_list+0xb94/0xd60 shrink_inactive_list+0x158/0x460 We can fix this by removing the ZSMALLOC_PGTABLE_MAPPING feature (which contains the offending calling code) from zsmalloc. Even though this option showed some amount improvement(e.g., 30%) in some arm32 platforms, it has been headache to maintain since it have abused APIs[1](e.g., unmap_kernel_range in atomic context). Since we are approaching to deprecate 32bit machines and already made the config option available for only builtin build since v5.8, lastly it has been not default option in zsmalloc, it's time to drop the option for better maintenance. [1] http://lore.kernel.org/linux-mm/20201105170249.387069-1-minchan@kernel.org Fixes: e47110e90584 ("mm/vunmap: add cond_resched() in vunmap_pmd_range") Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Cc: Tony Lindgren <tony@atomide.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Harish Sriram <harish@linux.ibm.com> Cc: Uladzislau Rezki <urezki@gmail.com> Cc: <stable@vger.kernel.org> Link: https://lkml.kernel.org/r/20201117202916.GA3856507@google.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/arm/configs/omap2plus_defconfig | 1 - include/linux/zsmalloc.h | 1 - mm/Kconfig | 13 --------- mm/zsmalloc.c | 54 ------------------------------------ 4 files changed, 69 deletions(-) (limited to 'arch') diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 34793aabdb65..58df9fd79a76 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -81,7 +81,6 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_BINFMT_MISC=y CONFIG_CMA=y CONFIG_ZSMALLOC=m -CONFIG_ZSMALLOC_PGTABLE_MAPPING=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 0fdbf653b173..4807ca4d52e0 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -20,7 +20,6 @@ * zsmalloc mapping modes * * NOTE: These only make a difference when a mapped object spans pages. - * They also have no effect when ZSMALLOC_PGTABLE_MAPPING is selected. */ enum zs_mapmode { ZS_MM_RW, /* normal read-write mapping */ diff --git a/mm/Kconfig b/mm/Kconfig index d42423f884a7..390165ffbb0f 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -707,19 +707,6 @@ config ZSMALLOC returned by an alloc(). This handle must be mapped in order to access the allocated space. -config ZSMALLOC_PGTABLE_MAPPING - bool "Use page table mapping to access object in zsmalloc" - depends on ZSMALLOC=y - help - By default, zsmalloc uses a copy-based object mapping method to - access allocations that span two pages. However, if a particular - architecture (ex, ARM) performs VM mapping faster than copying, - then you should select this. This causes zsmalloc to use page table - mapping rather than copying for object mapping. - - You can check speed with zsmalloc benchmark: - https://github.com/spartacus06/zsmapbench - config ZSMALLOC_STAT bool "Export zsmalloc statistics" depends on ZSMALLOC diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 918c7b019b3d..cdfaaadea8ff 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -293,11 +293,7 @@ struct zspage { }; struct mapping_area { -#ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING - struct vm_struct *vm; /* vm area for mapping object that span pages */ -#else char *vm_buf; /* copy buffer for objects that span pages */ -#endif char *vm_addr; /* address of kmap_atomic()'ed pages */ enum zs_mapmode vm_mm; /* mapping mode */ }; @@ -1113,54 +1109,6 @@ static struct zspage *find_get_zspage(struct size_class *class) return zspage; } -#ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING -static inline int __zs_cpu_up(struct mapping_area *area) -{ - /* - * Make sure we don't leak memory if a cpu UP notification - * and zs_init() race and both call zs_cpu_up() on the same cpu - */ - if (area->vm) - return 0; - area->vm = get_vm_area(PAGE_SIZE * 2, 0); - if (!area->vm) - return -ENOMEM; - - /* - * Populate ptes in advance to avoid pte allocation with GFP_KERNEL - * in non-preemtible context of zs_map_object. - */ - return apply_to_page_range(&init_mm, (unsigned long)area->vm->addr, - PAGE_SIZE * 2, NULL, NULL); -} - -static inline void __zs_cpu_down(struct mapping_area *area) -{ - if (area->vm) - free_vm_area(area->vm); - area->vm = NULL; -} - -static inline void *__zs_map_object(struct mapping_area *area, - struct page *pages[2], int off, int size) -{ - unsigned long addr = (unsigned long)area->vm->addr; - - BUG_ON(map_kernel_range(addr, PAGE_SIZE * 2, PAGE_KERNEL, pages) < 0); - area->vm_addr = area->vm->addr; - return area->vm_addr + off; -} - -static inline void __zs_unmap_object(struct mapping_area *area, - struct page *pages[2], int off, int size) -{ - unsigned long addr = (unsigned long)area->vm_addr; - - unmap_kernel_range(addr, PAGE_SIZE * 2); -} - -#else /* CONFIG_ZSMALLOC_PGTABLE_MAPPING */ - static inline int __zs_cpu_up(struct mapping_area *area) { /* @@ -1241,8 +1189,6 @@ out: pagefault_enable(); } -#endif /* CONFIG_ZSMALLOC_PGTABLE_MAPPING */ - static int zs_cpu_prepare(unsigned int cpu) { struct mapping_area *area; -- cgit v1.2.3-70-g09d2 From 9280f726097b436c8c907825131cd346d7eb0c0f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Fri, 4 Dec 2020 00:18:40 +0100 Subject: ARM: keystone: remove SECTION_SIZE_BITS/MAX_PHYSMEM_BITS These definitions are evidently left over from the days when sparsemem settings were platform specific. This was no longer the case when the platform got merged. There was no warning in the past, but now the asm/sparsemem.h header ends up being included indirectly, causing this warning: In file included from /git/arm-soc/arch/arm/mach-keystone/keystone.c:24: arch/arm/mach-keystone/memory.h:10:9: warning: 'SECTION_SIZE_BITS' macro redefined [-Wmacro-redefined] #define SECTION_SIZE_BITS 34 ^ arch/arm/include/asm/sparsemem.h:23:9: note: previous definition is here #define SECTION_SIZE_BITS 28 ^ Clearly the definitions never had any effect here, so remove them. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Link: https://lore.kernel.org/r/20201203231847.1484900-1-arnd@kernel.org' Signed-off-by: Arnd Bergmann <arnd@arndb.de> --- arch/arm/mach-keystone/memory.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-keystone/memory.h b/arch/arm/mach-keystone/memory.h index 9147565d0581..1b9ed1271e05 100644 --- a/arch/arm/mach-keystone/memory.h +++ b/arch/arm/mach-keystone/memory.h @@ -6,9 +6,6 @@ #ifndef __MEMORY_H #define __MEMORY_H -#define MAX_PHYSMEM_BITS 36 -#define SECTION_SIZE_BITS 34 - #define KEYSTONE_LOW_PHYS_START 0x80000000ULL #define KEYSTONE_LOW_PHYS_SIZE 0x80000000ULL /* 2G */ #define KEYSTONE_LOW_PHYS_END (KEYSTONE_LOW_PHYS_START + \ -- cgit v1.2.3-70-g09d2 From 5eedf9fe8db23313df104576845cec5f481b9b60 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@csgroup.eu> Date: Mon, 7 Dec 2020 16:58:01 +0000 Subject: powerpc/mm: Fix KUAP warning by providing copy_from_kernel_nofault_allowed() Since commit c33165253492 ("powerpc: use non-set_fs based maccess routines"), userspace access is not granted anymore when using copy_from_kernel_nofault() However, kthread_probe_data() uses copy_from_kernel_nofault() to check validity of pointers. When the pointer is NULL, it points to userspace, leading to a KUAP fault and triggering the following big hammer warning many times when you request a sysrq "show task": [ 1117.202054] ------------[ cut here ]------------ [ 1117.202102] Bug: fault blocked by AP register ! [ 1117.202261] WARNING: CPU: 0 PID: 377 at arch/powerpc/include/asm/nohash/32/kup-8xx.h:66 do_page_fault+0x4a8/0x5ec [ 1117.202310] Modules linked in: [ 1117.202428] CPU: 0 PID: 377 Comm: sh Tainted: G W 5.10.0-rc5-01340-g83f53be2de31-dirty #4175 [ 1117.202499] NIP: c0012048 LR: c0012048 CTR: 00000000 [ 1117.202573] REGS: cacdbb88 TRAP: 0700 Tainted: G W (5.10.0-rc5-01340-g83f53be2de31-dirty) [ 1117.202625] MSR: 00021032 <ME,IR,DR,RI> CR: 24082222 XER: 20000000 [ 1117.202899] [ 1117.202899] GPR00: c0012048 cacdbc40 c2929290 00000023 c092e554 00000001 c09865e8 c092e640 [ 1117.202899] GPR08: 00001032 00000000 00000000 00014efc 28082224 100d166a 100a0920 00000000 [ 1117.202899] GPR16: 100cac0c 100b0000 1080c3fc 1080d685 100d0000 100d0000 00000000 100a0900 [ 1117.202899] GPR24: 100d0000 c07892ec 00000000 c0921510 c21f4440 0000005c c0000000 cacdbc80 [ 1117.204362] NIP [c0012048] do_page_fault+0x4a8/0x5ec [ 1117.204461] LR [c0012048] do_page_fault+0x4a8/0x5ec [ 1117.204509] Call Trace: [ 1117.204609] [cacdbc40] [c0012048] do_page_fault+0x4a8/0x5ec (unreliable) [ 1117.204771] [cacdbc70] [c00112f0] handle_page_fault+0x8/0x34 [ 1117.204911] --- interrupt: 301 at copy_from_kernel_nofault+0x70/0x1c0 [ 1117.204979] NIP: c010dbec LR: c010dbac CTR: 00000001 [ 1117.205053] REGS: cacdbc80 TRAP: 0301 Tainted: G W (5.10.0-rc5-01340-g83f53be2de31-dirty) [ 1117.205104] MSR: 00009032 <EE,ME,IR,DR,RI> CR: 28082224 XER: 00000000 [ 1117.205416] DAR: 0000005c DSISR: c0000000 [ 1117.205416] GPR00: c0045948 cacdbd38 c2929290 00000001 00000017 00000017 00000027 0000000f [ 1117.205416] GPR08: c09926ec 00000000 00000000 3ffff000 24082224 [ 1117.206106] NIP [c010dbec] copy_from_kernel_nofault+0x70/0x1c0 [ 1117.206202] LR [c010dbac] copy_from_kernel_nofault+0x30/0x1c0 [ 1117.206258] --- interrupt: 301 [ 1117.206372] [cacdbd38] [c004bbb0] kthread_probe_data+0x44/0x70 (unreliable) [ 1117.206561] [cacdbd58] [c0045948] print_worker_info+0xe0/0x194 [ 1117.206717] [cacdbdb8] [c00548ac] sched_show_task+0x134/0x168 [ 1117.206851] [cacdbdd8] [c005a268] show_state_filter+0x70/0x100 [ 1117.206989] [cacdbe08] [c039baa0] sysrq_handle_showstate+0x14/0x24 [ 1117.207122] [cacdbe18] [c039bf18] __handle_sysrq+0xac/0x1d0 [ 1117.207257] [cacdbe48] [c039c0c0] write_sysrq_trigger+0x4c/0x74 [ 1117.207407] [cacdbe68] [c01fba48] proc_reg_write+0xb4/0x114 [ 1117.207550] [cacdbe88] [c0179968] vfs_write+0x12c/0x478 [ 1117.207686] [cacdbf08] [c0179e60] ksys_write+0x78/0x128 [ 1117.207826] [cacdbf38] [c00110d0] ret_from_syscall+0x0/0x34 [ 1117.207938] --- interrupt: c01 at 0xfd4e784 [ 1117.208008] NIP: 0fd4e784 LR: 0fe0f244 CTR: 10048d38 [ 1117.208083] REGS: cacdbf48 TRAP: 0c01 Tainted: G W (5.10.0-rc5-01340-g83f53be2de31-dirty) [ 1117.208134] MSR: 0000d032 <EE,PR,ME,IR,DR,RI> CR: 44002222 XER: 00000000 [ 1117.208470] [ 1117.208470] GPR00: 00000004 7fc34090 77bfb4e0 00000001 1080fa40 00000002 7400000f fefefeff [ 1117.208470] GPR08: 7f7f7f7f 10048d38 1080c414 7fc343c0 00000000 [ 1117.209104] NIP [0fd4e784] 0xfd4e784 [ 1117.209180] LR [0fe0f244] 0xfe0f244 [ 1117.209236] --- interrupt: c01 [ 1117.209274] Instruction dump: [ 1117.209353] 714a4000 418200f0 73ca0001 40820084 73ca0032 408200f8 73c90040 4082ff60 [ 1117.209727] 0fe00000 3c60c082 386399f4 48013b65 <0fe00000> 80010034 3860000b 7c0803a6 [ 1117.210102] ---[ end trace 1927c0323393af3e ]--- To avoid that, copy_from_kernel_nofault_allowed() is used to check whether the address is a valid kernel address. But the default version of it returns true for any address. Provide a powerpc version of copy_from_kernel_nofault_allowed() that returns false when the address is below TASK_USER_MAX, so that copy_from_kernel_nofault() will return -ERANGE. Fixes: c33165253492 ("powerpc: use non-set_fs based maccess routines") Reported-by: Qian Cai <qcai@redhat.com> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/18bcb456d32a3e74f5ae241fd6f1580c092d07f5.1607360230.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/Makefile | 2 +- arch/powerpc/mm/maccess.c | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/mm/maccess.c (limited to 'arch') diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 5e147986400d..55b4a8bd408a 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -5,7 +5,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -obj-y := fault.o mem.o pgtable.o mmap.o \ +obj-y := fault.o mem.o pgtable.o mmap.o maccess.o \ init_$(BITS).o pgtable_$(BITS).o \ pgtable-frag.o ioremap.o ioremap_$(BITS).o \ init-common.o mmu_context.o drmem.o diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c new file mode 100644 index 000000000000..fa9a7a718fc6 --- /dev/null +++ b/arch/powerpc/mm/maccess.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/uaccess.h> +#include <linux/kernel.h> + +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) +{ + return is_kernel_addr((unsigned long)unsafe_src); +} -- cgit v1.2.3-70-g09d2 From 6220e48d9640538ff700f2e7d24c2f9277555fd6 Mon Sep 17 00:00:00 2001 From: Al Viro <viro@zeniv.linux.org.uk> Date: Tue, 8 Dec 2020 16:37:47 -0500 Subject: [regression fix] really dumb fuckup in sparc64 __csum_partial_copy() changes ~0U is -1, not 1 Reported-by: Anatoly Pugachev <matorola@gmail.com> Tested-by: Anatoly Pugachev <matorola@gmail.com> Fixes: fdf8bee96f9a "sparc64: propagate the calling convention changes down to __csum_partial_copy_...()" X-brown-paperbag: yes Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> --- arch/sparc/lib/csum_copy.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc/lib/csum_copy.S b/arch/sparc/lib/csum_copy.S index 0c0268e77155..d839956407a7 100644 --- a/arch/sparc/lib/csum_copy.S +++ b/arch/sparc/lib/csum_copy.S @@ -71,7 +71,7 @@ FUNC_NAME: /* %o0=src, %o1=dst, %o2=len */ LOAD(prefetch, %o0 + 0x000, #n_reads) xor %o0, %o1, %g1 - mov 1, %o3 + mov -1, %o3 clr %o4 andcc %g1, 0x3, %g0 bne,pn %icc, 95f -- cgit v1.2.3-70-g09d2 From a493d1ca1a03b532871f1da27f8dbda2b28b04c4 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski <luto@kernel.org> Date: Thu, 3 Dec 2020 21:07:03 -0800 Subject: x86/membarrier: Get rid of a dubious optimization sync_core_before_usermode() had an incorrect optimization. If the kernel returns from an interrupt, it can get to usermode without IRET. It just has to schedule to a different task in the same mm and do SYSRET. Fortunately, there were no callers of sync_core_before_usermode() that could have had in_irq() or in_nmi() equal to true, because it's only ever called from the scheduler. While at it, clarify a related comment. Fixes: 70216e18e519 ("membarrier: Provide core serializing command, *_SYNC_CORE") Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/5afc7632be1422f91eaf7611aaaa1b5b8580a086.1607058304.git.luto@kernel.org --- arch/x86/include/asm/sync_core.h | 9 +++++---- arch/x86/mm/tlb.c | 10 ++++++++-- 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h index 0fd4a9dfb29c..ab7382f92aff 100644 --- a/arch/x86/include/asm/sync_core.h +++ b/arch/x86/include/asm/sync_core.h @@ -98,12 +98,13 @@ static inline void sync_core_before_usermode(void) /* With PTI, we unconditionally serialize before running user code. */ if (static_cpu_has(X86_FEATURE_PTI)) return; + /* - * Return from interrupt and NMI is done through iret, which is core - * serializing. + * Even if we're in an interrupt, we might reschedule before returning, + * in which case we could switch to a different thread in the same mm + * and return using SYSRET or SYSEXIT. Instead of trying to keep + * track of our need to sync the core, just sync right away. */ - if (in_irq() || in_nmi()) - return; sync_core(); } diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 11666ba19b62..569ac1d57f55 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -474,8 +474,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, /* * The membarrier system call requires a full memory barrier and * core serialization before returning to user-space, after - * storing to rq->curr. Writing to CR3 provides that full - * memory barrier and core serializing instruction. + * storing to rq->curr, when changing mm. This is because + * membarrier() sends IPIs to all CPUs that are in the target mm + * to make them issue memory barriers. However, if another CPU + * switches to/from the target mm concurrently with + * membarrier(), it can cause that CPU not to receive an IPI + * when it really should issue a memory barrier. Writing to CR3 + * provides that full memory barrier and core serializing + * instruction. */ if (real_prev == next) { VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != -- cgit v1.2.3-70-g09d2 From 387270cb0b4035491c4812effd8b5af0e385a66c Mon Sep 17 00:00:00 2001 From: Zhen Lei <thunder.leizhen@huawei.com> Date: Mon, 7 Dec 2020 16:47:52 +0800 Subject: ARM: dts: mmp2-olpc-xo-1-75: clear the warnings when make dtbs The check_spi_bus_bridge() in scripts/dtc/checks.c requires that the node have "spi-slave" property must with "#address-cells = <0>" and "#size-cells = <0>". But currently both "#address-cells" and "#size-cells" properties are deleted, the corresponding default values are 2 and 1. As a result, the check fails and below warnings is displayed. arch/arm/boot/dts/mmp2.dtsi:472.23-480.6: Warning (spi_bus_bridge): \ /soc/apb@d4000000/spi@d4037000: incorrect #address-cells for SPI bus also defined at arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts:225.7-237.3 arch/arm/boot/dts/mmp2.dtsi:472.23-480.6: Warning (spi_bus_bridge): \ /soc/apb@d4000000/spi@d4037000: incorrect #size-cells for SPI bus also defined at arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts:225.7-237.3 arch/arm/boot/dts/mmp2-olpc-xo-1-75.dtb: Warning (spi_bus_reg): \ Failed prerequisite 'spi_bus_bridge' Because the value of "#size-cells" is already defined as zero in the node "ssp3: spi@d4037000" in arch/arm/boot/dts/mmp2.dtsi. So we only need to explicitly add "#address-cells = <0>" and keep "#size-cells" no change. Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com> Link: https://lore.kernel.org/r/20201207084752.1665-2-thunder.leizhen@huawei.com' Signed-off-by: Arnd Bergmann <arnd@arndb.de> --- arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts b/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts index adde62d6fce7..342304f5653a 100644 --- a/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts +++ b/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts @@ -223,8 +223,7 @@ }; &ssp3 { - /delete-property/ #address-cells; - /delete-property/ #size-cells; + #address-cells = <0>; spi-slave; status = "okay"; ready-gpios = <&gpio 125 GPIO_ACTIVE_HIGH>; -- cgit v1.2.3-70-g09d2 From 29ac40cbed2bc06fa218ca25d7f5e280d3d08a25 Mon Sep 17 00:00:00 2001 From: Arvind Sankar <nivedita@alum.mit.edu> Date: Wed, 11 Nov 2020 11:09:45 -0500 Subject: x86/mm/mem_encrypt: Fix definition of PMD_FLAGS_DEC_WP The PAT bit is in different locations for 4k and 2M/1G page table entries. Add a definition for _PAGE_LARGE_CACHE_MASK to represent the three caching bits (PWT, PCD, PAT), similar to _PAGE_CACHE_MASK for 4k pages, and use it in the definition of PMD_FLAGS_DEC_WP to get the correct PAT index for write-protected pages. Fixes: 6ebcb060713f ("x86/mm: Add support to encrypt the kernel in-place") Signed-off-by: Arvind Sankar <nivedita@alum.mit.edu> Signed-off-by: Borislav Petkov <bp@suse.de> Tested-by: Tom Lendacky <thomas.lendacky@amd.com> Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20201111160946.147341-1-nivedita@alum.mit.edu --- arch/x86/include/asm/pgtable_types.h | 1 + arch/x86/mm/mem_encrypt_identity.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 816b31c68550..394757ee030a 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -155,6 +155,7 @@ enum page_cache_mode { #define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) #define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT) +#define _PAGE_LARGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT_LARGE) #define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC)) #define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP)) diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 733b983f3a89..6c5eb6f3f14f 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -45,8 +45,8 @@ #define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) #define PMD_FLAGS_DEC PMD_FLAGS_LARGE -#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ - (_PAGE_PAT | _PAGE_PWT)) +#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_LARGE_CACHE_MASK) | \ + (_PAGE_PAT_LARGE | _PAGE_PWT)) #define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC) -- cgit v1.2.3-70-g09d2 From 06c5fe9b12dde1b62821f302f177c972bb1c81f9 Mon Sep 17 00:00:00 2001 From: Xiaochen Shen <xiaochen.shen@intel.com> Date: Fri, 4 Dec 2020 14:27:59 +0800 Subject: x86/resctrl: Fix incorrect local bandwidth when mba_sc is enabled The MBA software controller (mba_sc) is a feedback loop which periodically reads MBM counters and tries to restrict the bandwidth below a user-specified value. It tags along the MBM counter overflow handler to do the updates with 1s interval in mbm_update() and update_mba_bw(). The purpose of mbm_update() is to periodically read the MBM counters to make sure that the hardware counter doesn't wrap around more than once between user samplings. mbm_update() calls __mon_event_count() for local bandwidth updating when mba_sc is not enabled, but calls mbm_bw_count() instead when mba_sc is enabled. __mon_event_count() will not be called for local bandwidth updating in MBM counter overflow handler, but it is still called when reading MBM local bandwidth counter file 'mbm_local_bytes', the call path is as below: rdtgroup_mondata_show() mon_event_read() mon_event_count() __mon_event_count() In __mon_event_count(), m->chunks is updated by delta chunks which is calculated from previous MSR value (m->prev_msr) and current MSR value. When mba_sc is enabled, m->chunks is also updated in mbm_update() by mistake by the delta chunks which is calculated from m->prev_bw_msr instead of m->prev_msr. But m->chunks is not used in update_mba_bw() in the mba_sc feedback loop. When reading MBM local bandwidth counter file, m->chunks was changed unexpectedly by mbm_bw_count(). As a result, the incorrect local bandwidth counter which calculated from incorrect m->chunks is shown to the user. Fix this by removing incorrect m->chunks updating in mbm_bw_count() in MBM counter overflow handler, and always calling __mon_event_count() in mbm_update() to make sure that the hardware local bandwidth counter doesn't wrap around. Test steps: # Run workload with aggressive memory bandwidth (e.g., 10 GB/s) git clone https://github.com/intel/intel-cmt-cat && cd intel-cmt-cat && make ./tools/membw/membw -c 0 -b 10000 --read # Enable MBA software controller mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl # Create control group c1 mkdir /sys/fs/resctrl/c1 # Set MB throttle to 6 GB/s echo "MB:0=6000;1=6000" > /sys/fs/resctrl/c1/schemata # Write PID of the workload to tasks file echo `pidof membw` > /sys/fs/resctrl/c1/tasks # Read local bytes counters twice with 1s interval, the calculated # local bandwidth is not as expected (approaching to 6 GB/s): local_1=`cat /sys/fs/resctrl/c1/mon_data/mon_L3_00/mbm_local_bytes` sleep 1 local_2=`cat /sys/fs/resctrl/c1/mon_data/mon_L3_00/mbm_local_bytes` echo "local b/w (bytes/s):" `expr $local_2 - $local_1` Before fix: local b/w (bytes/s): 11076796416 After fix: local b/w (bytes/s): 5465014272 Fixes: ba0f26d8529c (x86/intel_rdt/mba_sc: Prepare for feedback loop) Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Tony Luck <tony.luck@intel.com> Cc: <stable@vger.kernel.org> Link: https://lkml.kernel.org/r/1607063279-19437-1-git-send-email-xiaochen.shen@intel.com --- arch/x86/kernel/cpu/resctrl/monitor.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 54dffe574e67..a98519a3a2e6 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -279,7 +279,6 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr) return; chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width); - m->chunks += chunks; cur_bw = (chunks * r->mon_scale) >> 20; if (m->delta_comp) @@ -450,15 +449,14 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid) } if (is_mbm_local_enabled()) { rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID; + __mon_event_count(rmid, &rr); /* * Call the MBA software controller only for the * control groups and when user has enabled * the software controller explicitly. */ - if (!is_mba_sc(NULL)) - __mon_event_count(rmid, &rr); - else + if (is_mba_sc(NULL)) mbm_bw_count(rmid, &rr); } } -- cgit v1.2.3-70-g09d2 From 190113b4c6531c8e09b31d5235f9b5175cbb0f72 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner <tglx@linutronix.de> Date: Thu, 10 Dec 2020 21:18:22 +0100 Subject: x86/apic/vector: Fix ordering in vector assignment Prarit reported that depending on the affinity setting the ' irq $N: Affinity broken due to vector space exhaustion.' message is showing up in dmesg, but the vector space on the CPUs in the affinity mask is definitely not exhausted. Shung-Hsi provided traces and analysis which pinpoints the problem: The ordering of trying to assign an interrupt vector in assign_irq_vector_any_locked() is simply wrong if the interrupt data has a valid node assigned. It does: 1) Try the intersection of affinity mask and node mask 2) Try the node mask 3) Try the full affinity mask 4) Try the full online mask Obviously #2 and #3 are in the wrong order as the requested affinity mask has to take precedence. In the observed cases #1 failed because the affinity mask did not contain CPUs from node 0. That made it allocate a vector from node 0, thereby breaking affinity and emitting the misleading message. Revert the order of #2 and #3 so the full affinity mask without the node intersection is tried before actually affinity is broken. If no node is assigned then only the full affinity mask and if that fails the full online mask is tried. Fixes: d6ffc6ac83b1 ("x86/vector: Respect affinity mask in irq descriptor") Reported-by: Prarit Bhargava <prarit@redhat.com> Reported-by: Shung-Hsi Yu <shung-hsi.yu@suse.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Shung-Hsi Yu <shung-hsi.yu@suse.com> Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/87ft4djtyp.fsf@nanos.tec.linutronix.de --- arch/x86/kernel/apic/vector.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 1eac53632786..758bbf25ef74 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -273,20 +273,24 @@ static int assign_irq_vector_any_locked(struct irq_data *irqd) const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd); int node = irq_data_get_node(irqd); - if (node == NUMA_NO_NODE) - goto all; - /* Try the intersection of @affmsk and node mask */ - cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk); - if (!assign_vector_locked(irqd, vector_searchmask)) - return 0; - /* Try the node mask */ - if (!assign_vector_locked(irqd, cpumask_of_node(node))) - return 0; -all: + if (node != NUMA_NO_NODE) { + /* Try the intersection of @affmsk and node mask */ + cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk); + if (!assign_vector_locked(irqd, vector_searchmask)) + return 0; + } + /* Try the full affinity mask */ cpumask_and(vector_searchmask, affmsk, cpu_online_mask); if (!assign_vector_locked(irqd, vector_searchmask)) return 0; + + if (node != NUMA_NO_NODE) { + /* Try the node mask */ + if (!assign_vector_locked(irqd, cpumask_of_node(node))) + return 0; + } + /* Try the full online mask */ return assign_vector_locked(irqd, cpu_online_mask); } -- cgit v1.2.3-70-g09d2 From ccbbfd1cbf365b38d014351d1482fedd26282041 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt <palmerdabbelt@google.com> Date: Wed, 25 Nov 2020 11:57:03 -0800 Subject: RISC-V: Define get_cycles64() regardless of M-mode The timer driver uses get_cycles64() unconditionally to obtain the current time. A recent refactoring lost the common definition for some configs, which is now the only one we need. Fixes: d5be89a8d118 ("RISC-V: Resurrect the MMIO timer implementation for M-mode systems") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com> --- arch/riscv/include/asm/timex.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h index ab104905d4db..81de51e6aa32 100644 --- a/arch/riscv/include/asm/timex.h +++ b/arch/riscv/include/asm/timex.h @@ -60,6 +60,8 @@ static inline u32 get_cycles_hi(void) } #define get_cycles_hi get_cycles_hi +#endif /* !CONFIG_RISCV_M_MODE */ + #ifdef CONFIG_64BIT static inline u64 get_cycles64(void) { @@ -79,8 +81,6 @@ static inline u64 get_cycles64(void) } #endif /* CONFIG_64BIT */ -#endif /* !CONFIG_RISCV_M_MODE */ - #define ARCH_HAS_READ_CURRENT_TIMER static inline int read_current_timer(unsigned long *timer_val) { -- cgit v1.2.3-70-g09d2 From 34c0f6f2695a2db81e09a3ab7bdb2853f45d4d3d Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> Date: Sat, 5 Dec 2020 01:48:08 +0100 Subject: KVM: mmu: Fix SPTE encoding of MMIO generation upper half Commit cae7ed3c2cb0 ("KVM: x86: Refactor the MMIO SPTE generation handling") cleaned up the computation of MMIO generation SPTE masks, however it introduced a bug how the upper part was encoded: SPTE bits 52-61 were supposed to contain bits 10-19 of the current generation number, however a missing shift encoded bits 1-10 there instead (mostly duplicating the lower part of the encoded generation number that then consisted of bits 1-9). In the meantime, the upper part was shrunk by one bit and moved by subsequent commits to become an upper half of the encoded generation number (bits 9-17 of bits 0-17 encoded in a SPTE). In addition to the above, commit 56871d444bc4 ("KVM: x86: fix overlap between SPTE_MMIO_MASK and generation") has changed the SPTE bit range assigned to encode the generation number and the total number of bits encoded but did not update them in the comment attached to their defines, nor in the KVM MMU doc. Let's do it here, too, since it is too trivial thing to warrant a separate commit. Fixes: cae7ed3c2cb0 ("KVM: x86: Refactor the MMIO SPTE generation handling") Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> Message-Id: <156700708db2a5296c5ed7a8b9ac71f1e9765c85.1607129096.git.maciej.szmigiero@oracle.com> Cc: stable@vger.kernel.org [Reorganize macros so that everything is computed from the bit ranges. - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- Documentation/virt/kvm/mmu.rst | 2 +- arch/x86/kvm/mmu/spte.c | 4 ++-- arch/x86/kvm/mmu/spte.h | 25 ++++++++++++++++++------- 3 files changed, 21 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/Documentation/virt/kvm/mmu.rst b/Documentation/virt/kvm/mmu.rst index 1c030dbac7c4..5bfe28b0728e 100644 --- a/Documentation/virt/kvm/mmu.rst +++ b/Documentation/virt/kvm/mmu.rst @@ -455,7 +455,7 @@ If the generation number of the spte does not equal the global generation number, it will ignore the cached MMIO information and handle the page fault through the slow path. -Since only 19 bits are used to store generation-number on mmio spte, all +Since only 18 bits are used to store generation-number on mmio spte, all pages are zapped when there is an overflow. Unfortunately, a single memory access might access kvm_memslots(kvm) multiple diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index fcac2cac78fe..c51ad544f25b 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -40,8 +40,8 @@ static u64 generation_mmio_spte_mask(u64 gen) WARN_ON(gen & ~MMIO_SPTE_GEN_MASK); BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK); - mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK; - mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK; + mask = (gen << MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_SPTE_GEN_LOW_MASK; + mask |= (gen << MMIO_SPTE_GEN_HIGH_SHIFT) & MMIO_SPTE_GEN_HIGH_MASK; return mask; } diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 5c75a451c000..2b3a30bd38b0 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -56,11 +56,11 @@ #define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1)) /* - * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of + * Due to limited space in PTEs, the MMIO generation is a 18 bit subset of * the memslots generation and is derived as follows: * * Bits 0-8 of the MMIO generation are propagated to spte bits 3-11 - * Bits 9-18 of the MMIO generation are propagated to spte bits 52-61 + * Bits 9-17 of the MMIO generation are propagated to spte bits 54-62 * * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in * the MMIO generation number, as doing so would require stealing a bit from @@ -69,18 +69,29 @@ * requires a full MMU zap). The flag is instead explicitly queried when * checking for MMIO spte cache hits. */ -#define MMIO_SPTE_GEN_MASK GENMASK_ULL(17, 0) #define MMIO_SPTE_GEN_LOW_START 3 #define MMIO_SPTE_GEN_LOW_END 11 -#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \ - MMIO_SPTE_GEN_LOW_START) #define MMIO_SPTE_GEN_HIGH_START PT64_SECOND_AVAIL_BITS_SHIFT #define MMIO_SPTE_GEN_HIGH_END 62 + +#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \ + MMIO_SPTE_GEN_LOW_START) #define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \ MMIO_SPTE_GEN_HIGH_START) +#define MMIO_SPTE_GEN_LOW_BITS (MMIO_SPTE_GEN_LOW_END - MMIO_SPTE_GEN_LOW_START + 1) +#define MMIO_SPTE_GEN_HIGH_BITS (MMIO_SPTE_GEN_HIGH_END - MMIO_SPTE_GEN_HIGH_START + 1) + +/* remember to adjust the comment above as well if you change these */ +static_assert(MMIO_SPTE_GEN_LOW_BITS == 9 && MMIO_SPTE_GEN_HIGH_BITS == 9); + +#define MMIO_SPTE_GEN_LOW_SHIFT (MMIO_SPTE_GEN_LOW_START - 0) +#define MMIO_SPTE_GEN_HIGH_SHIFT (MMIO_SPTE_GEN_HIGH_START - MMIO_SPTE_GEN_LOW_BITS) + +#define MMIO_SPTE_GEN_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_BITS + MMIO_SPTE_GEN_HIGH_BITS - 1, 0) + extern u64 __read_mostly shadow_nx_mask; extern u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ extern u64 __read_mostly shadow_user_mask; @@ -228,8 +239,8 @@ static inline u64 get_mmio_spte_generation(u64 spte) { u64 gen; - gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START; - gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START; + gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_SHIFT; + gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_SHIFT; return gen; } -- cgit v1.2.3-70-g09d2 From 0d07c0ec4381f630c801539c79ad8dcc627f6e4a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu <mhiramat@kernel.org> Date: Fri, 11 Dec 2020 16:04:17 +0900 Subject: x86/kprobes: Fix optprobe to detect INT3 padding correctly Commit 7705dc855797 ("x86/vmlinux: Use INT3 instead of NOP for linker fill bytes") changed the padding bytes between functions from NOP to INT3. However, when optprobe decodes a target function it finds INT3 and gives up the jump optimization. Instead of giving up any INT3 detection, check whether the rest of the bytes to the end of the function are INT3. If all of them are INT3, those come from the linker. In that case, continue the optprobe jump optimization. [ bp: Massage commit message. ] Fixes: 7705dc855797 ("x86/vmlinux: Use INT3 instead of NOP for linker fill bytes") Reported-by: Adam Zabrocki <pi3@pi3.com.pl> Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org> Reviewed-by: Kees Cook <keescook@chromium.org> Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/160767025681.3880685.16021570341428835411.stgit@devnote2 --- arch/x86/kernel/kprobes/opt.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 041f0b50bc27..08eb23074f92 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -272,6 +272,19 @@ static int insn_is_indirect_jump(struct insn *insn) return ret; } +static bool is_padding_int3(unsigned long addr, unsigned long eaddr) +{ + unsigned char ops; + + for (; addr < eaddr; addr++) { + if (get_kernel_nofault(ops, (void *)addr) < 0 || + ops != INT3_INSN_OPCODE) + return false; + } + + return true; +} + /* Decode whole function to ensure any instructions don't jump into target */ static int can_optimize(unsigned long paddr) { @@ -310,9 +323,14 @@ static int can_optimize(unsigned long paddr) return 0; kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); insn_get_length(&insn); - /* Another subsystem puts a breakpoint */ + /* + * In the case of detecting unknown breakpoint, this could be + * a padding INT3 between functions. Let's check that all the + * rest of the bytes are also INT3. + */ if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) - return 0; + return is_padding_int3(addr, paddr - offset + size) ? 1 : 0; + /* Recover address */ insn.kaddr = (void *)addr; insn.next_byte = (void *)(addr + insn.length); -- cgit v1.2.3-70-g09d2 From 2aa078932ff6c66bf10cc5b3144440dbfa7d813d Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc@google.com> Date: Thu, 17 Dec 2020 16:31:36 -0800 Subject: KVM: x86/mmu: Use -1 to flag an undefined spte in get_mmio_spte() Return -1 from the get_walk() helpers if the shadow walk doesn't fill at least one spte, which can theoretically happen if the walk hits a not-present PDPTR. Returning the root level in such a case will cause get_mmio_spte() to return garbage (uninitialized stack data). In practice, such a scenario should be impossible as KVM shouldn't get a reserved-bit page fault with a not-present PDPTR. Note, using mmu->root_level in get_walk() is wrong for other reasons, too, but that's now a moot point. Fixes: 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU") Cc: Ben Gardon <bgardon@google.com> Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson <seanjc@google.com> Message-Id: <20201218003139.2167891-2-seanjc@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- arch/x86/kvm/mmu/mmu.c | 7 ++++++- arch/x86/kvm/mmu/tdp_mmu.c | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 7a6ae9e90bd7..a48cd12c01d7 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3488,7 +3488,7 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) { struct kvm_shadow_walk_iterator iterator; - int leaf = vcpu->arch.mmu->root_level; + int leaf = -1; u64 spte; @@ -3532,6 +3532,11 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) else leaf = get_walk(vcpu, addr, sptes); + if (unlikely(leaf < 0)) { + *sptep = 0ull; + return reserved; + } + rsvd_check = &vcpu->arch.mmu->shadow_zero_check; for (level = root; level >= leaf; level--) { diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 84c8f06bec26..50cec7a15ddb 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1152,8 +1152,8 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) { struct tdp_iter iter; struct kvm_mmu *mmu = vcpu->arch.mmu; - int leaf = vcpu->arch.mmu->shadow_root_level; gfn_t gfn = addr >> PAGE_SHIFT; + int leaf = -1; tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { leaf = iter.level; -- cgit v1.2.3-70-g09d2 From 39b4d43e6003cee51cd119596d3c33d0449eb44c Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc@google.com> Date: Thu, 17 Dec 2020 16:31:37 -0800 Subject: KVM: x86/mmu: Get root level from walkers when retrieving MMIO SPTE Get the so called "root" level from the low level shadow page table walkers instead of manually attempting to calculate it higher up the stack, e.g. in get_mmio_spte(). When KVM is using PAE shadow paging, the starting level of the walk, from the callers perspective, is not the CR3 root but rather the PDPTR "root". Checking for reserved bits from the CR3 root causes get_mmio_spte() to consume uninitialized stack data due to indexing into sptes[] for a level that was not filled by get_walk(). This can result in false positives and/or negatives depending on what garbage happens to be on the stack. Opportunistically nuke a few extra newlines. Fixes: 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU") Reported-by: Richard Herbert <rherbert@sympatico.ca> Cc: Ben Gardon <bgardon@google.com> Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson <seanjc@google.com> Message-Id: <20201218003139.2167891-3-seanjc@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- arch/x86/kvm/mmu/mmu.c | 15 ++++++--------- arch/x86/kvm/mmu/tdp_mmu.c | 5 ++++- arch/x86/kvm/mmu/tdp_mmu.h | 4 +++- 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index a48cd12c01d7..52f36c879086 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3485,16 +3485,16 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) * Return the level of the lowest level SPTE added to sptes. * That SPTE may be non-present. */ -static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) +static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level) { struct kvm_shadow_walk_iterator iterator; int leaf = -1; u64 spte; - walk_shadow_page_lockless_begin(vcpu); - for (shadow_walk_init(&iterator, vcpu, addr); + for (shadow_walk_init(&iterator, vcpu, addr), + *root_level = iterator.level; shadow_walk_okay(&iterator); __shadow_walk_next(&iterator, spte)) { leaf = iterator.level; @@ -3504,7 +3504,6 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) if (!is_shadow_present_pte(spte)) break; - } walk_shadow_page_lockless_end(vcpu); @@ -3517,9 +3516,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { u64 sptes[PT64_ROOT_MAX_LEVEL]; struct rsvd_bits_validate *rsvd_check; - int root = vcpu->arch.mmu->shadow_root_level; - int leaf; - int level; + int root, leaf, level; bool reserved = false; if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) { @@ -3528,9 +3525,9 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) } if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa)) - leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes); + leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root); else - leaf = get_walk(vcpu, addr, sptes); + leaf = get_walk(vcpu, addr, sptes, &root); if (unlikely(leaf < 0)) { *sptep = 0ull; diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 50cec7a15ddb..a4f9447f8327 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1148,13 +1148,16 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, * Return the level of the lowest level SPTE added to sptes. * That SPTE may be non-present. */ -int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) +int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, + int *root_level) { struct tdp_iter iter; struct kvm_mmu *mmu = vcpu->arch.mmu; gfn_t gfn = addr >> PAGE_SHIFT; int leaf = -1; + *root_level = vcpu->arch.mmu->shadow_root_level; + tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { leaf = iter.level; sptes[leaf - 1] = iter.old_spte; diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 556e065503f6..cbbdbadd1526 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -44,5 +44,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn); -int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes); +int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, + int *root_level); + #endif /* __KVM_X86_MMU_TDP_MMU_H */ -- cgit v1.2.3-70-g09d2 From dde81f9477d018a96fba991c5928c6ab8cc109f8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc@google.com> Date: Thu, 17 Dec 2020 16:31:38 -0800 Subject: KVM: x86/mmu: Use raw level to index into MMIO walks' sptes array Bump the size of the sptes array by one and use the raw level of the SPTE to index into the sptes array. Using the SPTE level directly improves readability by eliminating the need to reason out why the level is being adjusted when indexing the array. The array is on the stack and is not explicitly initialized; bumping its size is nothing more than a superficial adjustment to the stack frame. Signed-off-by: Sean Christopherson <seanjc@google.com> Message-Id: <20201218003139.2167891-4-seanjc@google.com> Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- arch/x86/kvm/mmu/mmu.c | 15 +++++++-------- arch/x86/kvm/mmu/tdp_mmu.c | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 52f36c879086..4798a4472066 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3500,7 +3500,7 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level leaf = iterator.level; spte = mmu_spte_get_lockless(iterator.sptep); - sptes[leaf - 1] = spte; + sptes[leaf] = spte; if (!is_shadow_present_pte(spte)) break; @@ -3514,7 +3514,7 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level /* return true if reserved bit is detected on spte. */ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { - u64 sptes[PT64_ROOT_MAX_LEVEL]; + u64 sptes[PT64_ROOT_MAX_LEVEL + 1]; struct rsvd_bits_validate *rsvd_check; int root, leaf, level; bool reserved = false; @@ -3537,16 +3537,15 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) rsvd_check = &vcpu->arch.mmu->shadow_zero_check; for (level = root; level >= leaf; level--) { - if (!is_shadow_present_pte(sptes[level - 1])) + if (!is_shadow_present_pte(sptes[level])) break; /* * Use a bitwise-OR instead of a logical-OR to aggregate the * reserved bit and EPT's invalid memtype/XWR checks to avoid * adding a Jcc in the loop. */ - reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level - 1]) | - __is_rsvd_bits_set(rsvd_check, sptes[level - 1], - level); + reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level]) | + __is_rsvd_bits_set(rsvd_check, sptes[level], level); } if (reserved) { @@ -3554,10 +3553,10 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) __func__, addr); for (level = root; level >= leaf; level--) pr_err("------ spte 0x%llx level %d.\n", - sptes[level - 1], level); + sptes[level], level); } - *sptep = sptes[leaf - 1]; + *sptep = sptes[leaf]; return reserved; } diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index a4f9447f8327..efef571806ad 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1160,7 +1160,7 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { leaf = iter.level; - sptes[leaf - 1] = iter.old_spte; + sptes[leaf] = iter.old_spte; } return leaf; -- cgit v1.2.3-70-g09d2 From 9aa418792f5f11ef5d6f72265e1f8ae07efd5784 Mon Sep 17 00:00:00 2001 From: Sean Christopherson <seanjc@google.com> Date: Thu, 17 Dec 2020 16:31:39 -0800 Subject: KVM: x86/mmu: Optimize not-present/MMIO SPTE check in get_mmio_spte() Check only the terminal leaf for a "!PRESENT || MMIO" SPTE when looking for reserved bits on valid, non-MMIO SPTEs. The get_walk() helpers terminate their walks if a not-present or MMIO SPTE is encountered, i.e. the non-terminal SPTEs have already been verified to be regular SPTEs. This eliminates an extra check-and-branch in a relatively hot loop. Signed-off-by: Sean Christopherson <seanjc@google.com> Message-Id: <20201218003139.2167891-5-seanjc@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- arch/x86/kvm/mmu/mmu.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 4798a4472066..769855f5f0a1 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3511,7 +3511,7 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level return leaf; } -/* return true if reserved bit is detected on spte. */ +/* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { u64 sptes[PT64_ROOT_MAX_LEVEL + 1]; @@ -3534,11 +3534,20 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) return reserved; } + *sptep = sptes[leaf]; + + /* + * Skip reserved bits checks on the terminal leaf if it's not a valid + * SPTE. Note, this also (intentionally) skips MMIO SPTEs, which, by + * design, always have reserved bits set. The purpose of the checks is + * to detect reserved bits on non-MMIO SPTEs. i.e. buggy SPTEs. + */ + if (!is_shadow_present_pte(sptes[leaf])) + leaf++; + rsvd_check = &vcpu->arch.mmu->shadow_zero_check; - for (level = root; level >= leaf; level--) { - if (!is_shadow_present_pte(sptes[level])) - break; + for (level = root; level >= leaf; level--) /* * Use a bitwise-OR instead of a logical-OR to aggregate the * reserved bit and EPT's invalid memtype/XWR checks to avoid @@ -3546,7 +3555,6 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) */ reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level]) | __is_rsvd_bits_set(rsvd_check, sptes[level], level); - } if (reserved) { pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n", @@ -3556,8 +3564,6 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) sptes[level], level); } - *sptep = sptes[leaf]; - return reserved; } -- cgit v1.2.3-70-g09d2