From fbc81ec5b85d43a4b22e49ec0e643fa7dec2ea40 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Sat, 3 Oct 2020 17:28:27 +0200
Subject: efi/arm: set HSCTLR Thumb2 bit correctly for HVC calls from HYP

Commit

  db227c19e68db353 ("ARM: 8985/1: efi/decompressor: deal with HYP mode boot gracefully")

updated the EFI entry code to permit firmware to invoke the EFI stub
loader in HYP mode, with the MMU either enabled or disabled, neither
of which is permitted by the EFI spec, but which does happen in the
field.

In the MMU on case, we remain in HYP mode as configured by the firmware,
and rely on the fact that any HVC instruction issued in this mode will
be dispatched via the SVC slot in the HYP vector table. However, this
slot will point to a Thumb2 symbol if the kernel is built in Thumb2
mode, and so we have to configure HSCTLR to ensure that the exception
handlers are invoked in Thumb2 mode as well.

Fixes: db227c19e68db353 ("ARM: 8985/1: efi/decompressor: deal with HYP mode boot gracefully")
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/boot/compressed/head.S | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 2e04ec5b5446..caa27322a0ab 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -1472,6 +1472,9 @@ ENTRY(efi_enter_kernel)
 		@ issued from HYP mode take us to the correct handler code. We
 		@ will disable the MMU before jumping to the kernel proper.
 		@
+ ARM(		bic	r1, r1, #(1 << 30)	) @ clear HSCTLR.TE
+ THUMB(		orr	r1, r1, #(1 << 30)	) @ set HSCTLR.TE
+		mcr	p15, 4, r1, c1, c0, 0
 		adr	r0, __hyp_reentry_vectors
 		mcr	p15, 4, r0, c12, c0, 0	@ set HYP vector base (HVBAR)
 		isb
-- 
cgit v1.2.3-70-g09d2


From c20782ad4eb9dfa7f41cb2d85f218d0940f7cef1 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 26 Oct 2020 10:08:47 +0200
Subject: ARM: OMAP2+: Fix location for select PM_GENERIC_DOMAINS

I accidentally misplaced select PM_GENERIC_DOMAINS, it should be
selected for all the SoCs instead.

Fixes: 58cbff023bfa ("soc: ti: omap-prm: Add basic power domain support")
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index 3ee7bdff86b2..68ba5adfaa23 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -7,7 +7,6 @@ config ARCH_OMAP2
 	depends on ARCH_MULTI_V6
 	select ARCH_OMAP2PLUS
 	select CPU_V6
-	select PM_GENERIC_DOMAINS if PM
 	select SOC_HAS_OMAP2_SDRC
 
 config ARCH_OMAP3
@@ -106,6 +105,7 @@ config ARCH_OMAP2PLUS
 	select OMAP_DM_TIMER
 	select OMAP_GPMC
 	select PINCTRL
+	select PM_GENERIC_DOMAINS if PM
 	select RESET_CONTROLLER
 	select SOC_BUS
 	select TI_SYSC
-- 
cgit v1.2.3-70-g09d2


From b69fd00120f8e3348273323099669cb058668263 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 26 Oct 2020 10:08:47 +0200
Subject: ARM: OMAP2+: Fix missing select PM_GENERIC_DOMAINS_OF

We should select also PM_GENERIC_DOMAINS_OF in addition to
PM_GENERIC_DOMAINS to have device tree based PM domain providers
enabled.

Fixes: 58cbff023bfa ("soc: ti: omap-prm: Add basic power domain support")
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index 68ba5adfaa23..3f62a0c9450d 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -106,6 +106,7 @@ config ARCH_OMAP2PLUS
 	select OMAP_GPMC
 	select PINCTRL
 	select PM_GENERIC_DOMAINS if PM
+	select PM_GENERIC_DOMAINS_OF if PM
 	select RESET_CONTROLLER
 	select SOC_BUS
 	select TI_SYSC
-- 
cgit v1.2.3-70-g09d2


From a2089ac7f8dc682ef52ed74b52997d36cde76d05 Mon Sep 17 00:00:00 2001
From: Clément Péron <peron.clem@gmail.com>
Date: Sun, 11 Oct 2020 23:15:14 +0200
Subject: arm64: dts: allwinner: pinetab: Drop unnecessary address/size-cells
 information
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

make dtbs_check warm about unknown address/size-cells property in the
pinetab device-tree.

This is because these information are not necessary.

Drop them.

Signed-off-by: Clément Péron <peron.clem@gmail.com>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://lore.kernel.org/r/20201011211514.155266-1-peron.clem@gmail.com
---
 arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
index 3ab0f0347bc9..0494bfaf2ffa 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
@@ -122,9 +122,6 @@
 	status = "okay";
 
 	port {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
 		csi_ep: endpoint {
 			remote-endpoint = <&ov5640_ep>;
 			bus-width = <8>;
-- 
cgit v1.2.3-70-g09d2


From 97a38c1c213b162aa577299de698f39c18ba696b Mon Sep 17 00:00:00 2001
From: Clément Péron <peron.clem@gmail.com>
Date: Sun, 18 Oct 2020 19:24:09 +0200
Subject: arm64: dts: allwinner: beelink-gs1: Enable both RGMII RX/TX delay
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before the commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx
delay config"), the software overwrite for RX/TX delays of the RTL8211e
were not working properly and the Beelink GS1 had both RX/TX delay of RGMII
interface set using pull-up on the TXDLY and RXDLY pins.

Now that these delays are working properly they overwrite the HW
config and set this to 'rgmii' meaning no delay on both RX/TX.
This makes the ethernet of this board not working anymore.

Set the phy-mode to 'rgmii-id' meaning RGMII with RX/TX delays
in the device-tree to keep the correct configuration.

Fixes: 089bee8dd119 ("arm64: dts: allwinner: h6: Introduce Beelink GS1 board")
Signed-off-by: Clément Péron <peron.clem@gmail.com>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Link: https://lore.kernel.org/r/20201018172409.1754775-1-peron.clem@gmail.com
---
 arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts
index 3f7ceeb1a767..7c9dbde645b5 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts
@@ -97,7 +97,7 @@
 &emac {
 	pinctrl-names = "default";
 	pinctrl-0 = <&ext_rgmii_pins>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-handle = <&ext_rgmii_phy>;
 	phy-supply = <&reg_aldo2>;
 	status = "okay";
-- 
cgit v1.2.3-70-g09d2


From 419c65f5000a6c25597ea52488528d75b287cbd0 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Mon, 19 Oct 2020 06:34:49 +0000
Subject: arm64: dts: allwinner: Pine H64: Enable both RGMII RX/TX delay

Since commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config"),
the network is unusable on PineH64 model A.

This is due to phy-mode incorrectly set to rgmii instead of rgmii-id.

Fixes: 729e1ffcf47e ("arm64: allwinner: h6: add support for the Ethernet on Pine H64")
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://lore.kernel.org/r/20201019063449.33316-1-clabbe@baylibre.com
---
 arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts
index af85b2074867..961732c52aa0 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts
@@ -100,7 +100,7 @@
 &emac {
 	pinctrl-names = "default";
 	pinctrl-0 = <&ext_rgmii_pins>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-handle = <&ext_rgmii_phy>;
 	phy-supply = <&reg_gmac_3v3>;
 	allwinner,rx-delay-ps = <200>;
-- 
cgit v1.2.3-70-g09d2


From d7cdff444579e6659459b2fe04340ebb27628d5e Mon Sep 17 00:00:00 2001
From: Jernej Skrabec <jernej.skrabec@siol.net>
Date: Thu, 22 Oct 2020 20:58:39 +0200
Subject: arm64: dts: allwinner: a64: OrangePi Win: Fix ethernet node

RX/TX delay on OrangePi Win board is set on PHY. Reflect that in
ethernet node.

Fixes: 93d6a27cfcc0 ("arm64: dts: allwinner: a64: Orange Pi Win: Add Ethernet node")
Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://lore.kernel.org/r/20201022185839.2779245-1-jernej.skrabec@siol.net
---
 arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts
index d894ec5fa8a1..70e31743f0ba 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts
@@ -120,7 +120,7 @@
 &emac {
 	pinctrl-names = "default";
 	pinctrl-0 = <&rgmii_pins>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-handle = <&ext_rgmii_phy>;
 	phy-supply = <&reg_gmac_3v3>;
 	status = "okay";
-- 
cgit v1.2.3-70-g09d2


From 927f42fcc1b4f7d04a2ac5cf02f25612aa8923a4 Mon Sep 17 00:00:00 2001
From: Jernej Skrabec <jernej.skrabec@siol.net>
Date: Thu, 22 Oct 2020 23:13:01 +0200
Subject: arm64: dts: allwinner: a64: Pine64 Plus: Fix ethernet node

According to board schematic, PHY provides both, RX and TX delays.
However, according to "fix" Realtek provided for this board, only TX
delay should be provided by PHY.
Tests show that both variants work but TX only PHY delay works
slightly better.

Update ethernet node to reflect the fact that PHY provides TX delay.

Fixes: 94f442886711 ("arm64: dts: allwinner: A64: Restore EMAC changes")
Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://lore.kernel.org/r/20201022211301.3548422-1-jernej.skrabec@siol.net
---
 arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts
index b26181cf9095..b54099b654c8 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts
@@ -13,7 +13,7 @@
 &emac {
 	pinctrl-names = "default";
 	pinctrl-0 = <&rgmii_pins>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-txid";
 	phy-handle = <&ext_rgmii_phy>;
 	status = "okay";
 };
-- 
cgit v1.2.3-70-g09d2


From b34bf9f6a623ddb82600a5ed5c644224122395e1 Mon Sep 17 00:00:00 2001
From: Jernej Skrabec <jernej.skrabec@siol.net>
Date: Fri, 23 Oct 2020 20:48:58 +0200
Subject: arm64: dts: allwinner: h5: OrangePi PC2: Fix ethernet node

RX and TX delay are provided by ethernet PHY. Reflect that in ethernet
node.

Fixes: 44a94c7ef989 ("arm64: dts: allwinner: H5: Restore EMAC changes")
Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://lore.kernel.org/r/20201023184858.3272918-1-jernej.skrabec@siol.net
---
 arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts
index 7d7aad18f078..8bf2db9dcbda 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts
@@ -123,7 +123,7 @@
 	pinctrl-0 = <&emac_rgmii_pins>;
 	phy-supply = <&reg_gmac_3v3>;
 	phy-handle = <&ext_rgmii_phy>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	status = "okay";
 };
 
-- 
cgit v1.2.3-70-g09d2


From b3eec3212e66ece33f69be0de98d54e67834e798 Mon Sep 17 00:00:00 2001
From: Jernej Skrabec <jernej.skrabec@siol.net>
Date: Sun, 25 Oct 2020 09:19:49 +0100
Subject: ARM: dts: sun8i: r40: bananapi-m2-ultra: Fix ethernet node

Ethernet PHY on BananaPi M2 Ultra provides RX and TX delays. Fix
ethernet node to reflect that fact.

Fixes: c36fd5a48bd2 ("ARM: dts: sun8i: r40: bananapi-m2-ultra: Enable GMAC ethernet controller")
Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://lore.kernel.org/r/20201025081949.783443-1-jernej.skrabec@siol.net
---
 arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts b/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts
index 2fc62ef0cb3e..a6a1087a0c9b 100644
--- a/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts
+++ b/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts
@@ -129,7 +129,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&gmac_rgmii_pins>;
 	phy-handle = <&phy1>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-supply = <&reg_dc1sw>;
 	status = "okay";
 };
-- 
cgit v1.2.3-70-g09d2


From 8d80e2f00a42ef10b54e1b2d9e97314f8fd046c0 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Sun, 25 Oct 2020 00:25:06 +0800
Subject: Revert "arm: sun8i: orangepi-pc-plus: Set EMAC activity LEDs to
 active high"

This reverts commit 75ee680cbd2e4d0156b94f9fec50076361ab12f2.

Turns out the activity and link LEDs on the RJ45 port are active low,
just like on the Orange Pi PC.

Revert the commit that says otherwise.

Fixes: 75ee680cbd2e ("arm: sun8i: orangepi-pc-plus: Set EMAC activity LEDs to active high")
Fixes: 4904337fe34f ("ARM: dts: sunxi: Restore EMAC changes (boards)")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Tested-by: Jernej Skrabec <jernej.skrabec@siol.net>
Acked-by: Jernej Skrabec <jernej.skrabec@siol.net>
Link: https://lore.kernel.org/r/20201024162515.30032-1-wens@kernel.org
---
 arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts
index 71fb73208939..babf4cf1b2f6 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts
@@ -53,11 +53,6 @@
 	};
 };
 
-&emac {
-	/* LEDs changed to active high on the plus */
-	/delete-property/ allwinner,leds-active-low;
-};
-
 &mmc1 {
 	vmmc-supply = <&reg_vcc3v3>;
 	bus-width = <4>;
-- 
cgit v1.2.3-70-g09d2


From e76724153f5b4539802cc21b2c6131058668a1c6 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Sun, 25 Oct 2020 00:25:07 +0800
Subject: ARM: dts: sun6i: a31-hummingbird: Enable RGMII RX/TX delay on
 Ethernet PHY

The Ethernet PHY on the A31 Hummingbird has the RX and TX delays
enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins.

Fix the phy-mode description to correct reflect this so that the
implementation doesn't reconfigure the delays incorrectly. This
happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e
rx/tx delay config").

Fixes: c220aec2bb79 ("ARM: dts: sun6i: Add Merrii A31 Hummingbird support")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Acked-by: Jernej Skrabec <jernej.skrabec@siol.net>
Link: https://lore.kernel.org/r/20201024162515.30032-2-wens@kernel.org
---
 arch/arm/boot/dts/sun6i-a31-hummingbird.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts
index 049e6ab3cf56..73de34ae37fd 100644
--- a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts
+++ b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts
@@ -154,7 +154,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&gmac_rgmii_pins>;
 	phy-handle = <&phy1>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	status = "okay";
 };
 
-- 
cgit v1.2.3-70-g09d2


From 353c3de1303fc93032164402c0eb8550ecd6f154 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Sun, 25 Oct 2020 00:25:08 +0800
Subject: ARM: dts: sun7i: cubietruck: Enable RGMII RX/TX delay on Ethernet PHY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Ethernet PHY on the Cubietruck has the RX and TX delays
enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins.

Fix the phy-mode description to correct reflect this so that the
implementation doesn't reconfigure the delays incorrectly. This
happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e
rx/tx delay config").

Fixes: 67073d97672d ("ARM: dts: sun7i: cubietruck: Enable the GMAC")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Tested-by: Emilio López <emilio@elopez.com.ar>
Reviewed-by: Emilio López <emilio@elopez.com.ar>
Link: https://lore.kernel.org/r/20201024162515.30032-3-wens@kernel.org
---
 arch/arm/boot/dts/sun7i-a20-cubietruck.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun7i-a20-cubietruck.dts b/arch/arm/boot/dts/sun7i-a20-cubietruck.dts
index 8c8dee6ea461..9109ca0919ad 100644
--- a/arch/arm/boot/dts/sun7i-a20-cubietruck.dts
+++ b/arch/arm/boot/dts/sun7i-a20-cubietruck.dts
@@ -151,7 +151,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&gmac_rgmii_pins>;
 	phy-handle = <&phy1>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	status = "okay";
 };
 
-- 
cgit v1.2.3-70-g09d2


From f94f78bd93f567c022f594589dbeecdf59931365 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Sun, 25 Oct 2020 00:25:09 +0800
Subject: ARM: dts: sun7i: bananapi-m1-plus: Enable RGMII RX/TX delay on
 Ethernet PHY

The Ethernet PHY on the Bananapi M1+ has the RX and TX delays
enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins.

Fix the phy-mode description to correct reflect this so that the
implementation doesn't reconfigure the delays incorrectly. This
happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e
rx/tx delay config").

Fixes: 04c85ecad32a ("ARM: dts: sun7i: Add dts file for Bananapi M1 Plus board")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Acked-by: Jernej Skrabec <jernej.skrabec@siol.net>
Link: https://lore.kernel.org/r/20201024162515.30032-4-wens@kernel.org
---
 arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts b/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts
index 32d5d45a35c0..8945dbb114a2 100644
--- a/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts
+++ b/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts
@@ -130,7 +130,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&gmac_rgmii_pins>;
 	phy-handle = <&phy1>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-supply = <&reg_gmac_3v3>;
 	status = "okay";
 };
-- 
cgit v1.2.3-70-g09d2


From e080ab31a0aa126b0a7e4f67f2b01b371b852c88 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Sun, 25 Oct 2020 00:25:10 +0800
Subject: ARM: dts: sun8i: h3: orangepi-plus2e: Enable RGMII RX/TX delay on
 Ethernet PHY

The Ethernet PHY on the Orange Pi Plus 2E has the RX and TX delays
enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins.

Fix the phy-mode description to correct reflect this so that the
implementation doesn't reconfigure the delays incorrectly. This
happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e
rx/tx delay config").

Fixes: 4904337fe34f ("ARM: dts: sunxi: Restore EMAC changes (boards)")
Fixes: 7a78ef92cdc5 ("ARM: sun8i: h3: Enable EMAC with external PHY on Orange Pi Plus 2E")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Tested-by: Jernej Skrabec <jernej.skrabec@siol.net>
Acked-by: Jernej Skrabec <jernej.skrabec@siol.net>
Link: https://lore.kernel.org/r/20201024162515.30032-5-wens@kernel.org
---
 arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts
index 6dbf7b2e0c13..b6ca45d18e51 100644
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts
@@ -67,7 +67,7 @@
 	pinctrl-0 = <&emac_rgmii_pins>;
 	phy-supply = <&reg_gmac_3v3>;
 	phy-handle = <&ext_rgmii_phy>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	status = "okay";
 };
 
-- 
cgit v1.2.3-70-g09d2


From 57dbe558457bf4042169bc1f334e3b53a8480a1c Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Sun, 25 Oct 2020 00:25:11 +0800
Subject: ARM: dts: sun8i: a83t: Enable both RGMII RX/TX delay on Ethernet PHY

The Ethernet PHY on the Bananapi M3 and Cubietruck Plus have the RX
and TX delays enabled on the PHY, using pull-ups on the RXDLY and
TXDLY pins.

Fix the phy-mode description to correct reflect this so that the
implementation doesn't reconfigure the delays incorrectly. This
happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e
rx/tx delay config").

Fixes: 039359948a4b ("ARM: dts: sun8i: a83t: Enable Ethernet on two boards")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Acked-by: Jernej Skrabec <jernej.skrabec@siol.net>
Link: https://lore.kernel.org/r/20201024162515.30032-6-wens@kernel.org
---
 arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts     | 2 +-
 arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts
index 9d34eabba121..431f70234d36 100644
--- a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts
+++ b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts
@@ -131,7 +131,7 @@
 	pinctrl-0 = <&emac_rgmii_pins>;
 	phy-supply = <&reg_sw>;
 	phy-handle = <&rgmii_phy>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	allwinner,rx-delay-ps = <700>;
 	allwinner,tx-delay-ps = <700>;
 	status = "okay";
diff --git a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts
index d9be511f054f..d8326a5c681d 100644
--- a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts
+++ b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts
@@ -183,7 +183,7 @@
 	pinctrl-0 = <&emac_rgmii_pins>;
 	phy-supply = <&reg_dldo4>;
 	phy-handle = <&rgmii_phy>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	status = "okay";
 };
 
-- 
cgit v1.2.3-70-g09d2


From b1064037e8ecf09d587b7b4966eebe0c362908e5 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Sun, 25 Oct 2020 00:25:12 +0800
Subject: ARM: dts: sun9i: Enable both RGMII RX/TX delay on Ethernet PHY

The Ethernet PHY on the Cubieboard 4 and A80 Optimus have the RX
and TX delays enabled on the PHY, using pull-ups on the RXDLY and
TXDLY pins.

Fix the phy-mode description to correct reflect this so that the
implementation doesn't reconfigure the delays incorrectly. This
happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e
rx/tx delay config").

Fixes: 98048143b7f8 ("ARM: dts: sun9i: cubieboard4: Enable GMAC")
Fixes: bc9bd03a44f9 ("ARM: dts: sun9i: a80-optimus: Enable GMAC")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Acked-by: Jernej Skrabec <jernej.skrabec@siol.net>
Link: https://lore.kernel.org/r/20201024162515.30032-7-wens@kernel.org
---
 arch/arm/boot/dts/sun9i-a80-cubieboard4.dts | 2 +-
 arch/arm/boot/dts/sun9i-a80-optimus.dts     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts b/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts
index d3b337b043a1..484b93df20cb 100644
--- a/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts
+++ b/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts
@@ -129,7 +129,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&gmac_rgmii_pins>;
 	phy-handle = <&phy1>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-supply = <&reg_cldo1>;
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/sun9i-a80-optimus.dts b/arch/arm/boot/dts/sun9i-a80-optimus.dts
index bbc6335e5631..5c3580d712e4 100644
--- a/arch/arm/boot/dts/sun9i-a80-optimus.dts
+++ b/arch/arm/boot/dts/sun9i-a80-optimus.dts
@@ -124,7 +124,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&gmac_rgmii_pins>;
 	phy-handle = <&phy1>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-supply = <&reg_cldo1>;
 	status = "okay";
 };
-- 
cgit v1.2.3-70-g09d2


From 3914160ffc0bf762d6d605d4b27036b7b89367ea Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Sun, 25 Oct 2020 00:25:13 +0800
Subject: ARM: dts: sunxi: bananapi-m2-plus: Enable RGMII RX/TX delay on
 Ethernet PHY

The Ethernet PHY on the Bananapi M2+ has the RX and TX delays
enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins.

Fix the phy-mode description to correct reflect this so that the
implementation doesn't reconfigure the delays incorrectly. This
happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e
rx/tx delay config").

Fixes: 8c7ba536e709 ("ARM: sun8i: bananapi-m2-plus: Enable dwmac-sun8i")
Fixes: 4904337fe34f ("ARM: dts: sunxi: Restore EMAC changes (boards)")
Fixes: aa8fee415f46 ("ARM: dts: sun8i: h3: Split out non-SoC-specific parts of Bananapi M2 Plus")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Tested-by: Jernej Skrabec <jernej.skrabec@siol.net>
Acked-by: Jernej Skrabec <jernej.skrabec@siol.net>
Link: https://lore.kernel.org/r/20201024162515.30032-8-wens@kernel.org
---
 arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi b/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi
index 39263e74fbb5..8e5cb3b3fd68 100644
--- a/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi
+++ b/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi
@@ -126,7 +126,7 @@
 	pinctrl-0 = <&emac_rgmii_pins>;
 	phy-supply = <&reg_gmac_3v3>;
 	phy-handle = <&ext_rgmii_phy>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 
 	status = "okay";
 };
-- 
cgit v1.2.3-70-g09d2


From 2bd8570d20c88909b8be3251727a26476b02652c Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Sun, 25 Oct 2020 00:25:14 +0800
Subject: arm64: dts: allwinner: h5: libretech-all-h5-cc: Enable RGMII RX/TX
 delay on PHY

The Ethernet PHY on the Libre Computer ALL-H5-CC has the RX and TX
delays enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins.

Fix the phy-mode description to correct reflect this so that the
implementation doesn't reconfigure the delays incorrectly. This
happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e
rx/tx delay config").

Fixes: 60d0426d7603 ("arm64: dts: allwinner: h5: Add Libre Computer ALL-H5-CC H5 board")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Acked-by: Jernej Skrabec <jernej.skrabec@siol.net>
Link: https://lore.kernel.org/r/20201024162515.30032-9-wens@kernel.org
---
 arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts
index df1b9263ad0e..6e30a564c87f 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts
@@ -36,7 +36,7 @@
 	pinctrl-0 = <&emac_rgmii_pins>;
 	phy-supply = <&reg_gmac_3v3>;
 	phy-handle = <&ext_rgmii_phy>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	/delete-property/ allwinner,leds-active-low;
 	status = "okay";
 };
-- 
cgit v1.2.3-70-g09d2


From 1a9a8910b2153cd3c4f3f2f8defcb853ead3b1fd Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Sun, 25 Oct 2020 00:25:15 +0800
Subject: arm64: dts: allwinner: a64: bananapi-m64: Enable RGMII RX/TX delay on
 PHY

The Ethernet PHY on the Bananapi M64 has the RX and TX delays
enabled on the PHY, using pull-ups on the RXDLY and TXDLY pins.

Fix the phy-mode description to correct reflect this so that the
implementation doesn't reconfigure the delays incorrectly. This
happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e
rx/tx delay config").

Fixes: e7295499903d ("arm64: allwinner: bananapi-m64: Enable dwmac-sun8i")
Fixes: 94f442886711 ("arm64: dts: allwinner: A64: Restore EMAC changes")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Tested-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Acked-by: Jernej Skrabec <jernej.skrabec@siol.net>
Link: https://lore.kernel.org/r/20201024162515.30032-10-wens@kernel.org
---
 arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
index 3ea5182ca489..e5e840b9fbb4 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
@@ -105,7 +105,7 @@
 &emac {
 	pinctrl-names = "default";
 	pinctrl-0 = <&rgmii_pins>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-handle = <&ext_rgmii_phy>;
 	phy-supply = <&reg_dc1sw>;
 	status = "okay";
-- 
cgit v1.2.3-70-g09d2


From 294a3317bef52b189139c813b50dd14d344fa9ec Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 28 Oct 2020 08:03:17 +0200
Subject: ARM: OMAP2+: Manage MPU state properly for omap_enter_idle_coupled()

Based on more testing, commit 8ca5ee624b4c ("ARM: OMAP2+: Restore MPU
power domain if cpu_cluster_pm_enter() fails") is a poor fix for handling
cpu_cluster_pm_enter() returned errors.

We should not override the cpuidle states with a hardcoded PWRDM_POWER_ON
value. Instead, we should use a configured idle state that does not cause
the context to be lost. Otherwise we end up configuring a potentially
improper state for the MPUSS. We also want to update the returned state
index for the selected state.

Let's just select the highest power idle state C1 to ensure no context
loss is allowed on cpu_cluster_pm_enter() errors. With these changes we
can now unconditionally call omap4_enter_lowpower() for WFI like we did
earlier before commit 55be2f50336f ("ARM: OMAP2+: Handle errors for
cpu_pm"). And we can return the selected state index.

Fixes: 8f04aea048d5 ("ARM: OMAP2+: Restore MPU power domain if cpu_cluster_pm_enter() fails")
Fixes: 55be2f50336f ("ARM: OMAP2+: Handle errors for cpu_pm")
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/cpuidle44xx.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c
index a92d277f81a0..c8d317fafe2e 100644
--- a/arch/arm/mach-omap2/cpuidle44xx.c
+++ b/arch/arm/mach-omap2/cpuidle44xx.c
@@ -175,8 +175,11 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
 		if (mpuss_can_lose_context) {
 			error = cpu_cluster_pm_enter();
 			if (error) {
-				omap_set_pwrdm_state(mpu_pd, PWRDM_POWER_ON);
-				goto cpu_cluster_pm_out;
+				index = 0;
+				cx = state_ptr + index;
+				pwrdm_set_logic_retst(mpu_pd, cx->mpu_logic_state);
+				omap_set_pwrdm_state(mpu_pd, cx->mpu_state);
+				mpuss_can_lose_context = 0;
 			}
 		}
 	}
@@ -184,7 +187,6 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
 	omap4_enter_lowpower(dev->cpu, cx->cpu_state);
 	cpu_done[dev->cpu] = true;
 
-cpu_cluster_pm_out:
 	/* Wakeup CPU1 only if it is not offlined */
 	if (dev->cpu == 0 && cpumask_test_cpu(1, cpu_online_mask)) {
 
-- 
cgit v1.2.3-70-g09d2


From 31b4d8e172f614adc53ddecb4b6b2f6411a49b84 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 23 Oct 2020 12:44:40 -0700
Subject: MIPS: export has_transparent_hugepage() for modules

MIPS should export its local version of "has_transparent_hugepage"
so that loadable modules (dax) can use it.

Fixes this build error:
ERROR: modpost: "has_transparent_hugepage" [drivers/dax/dax.ko] undefined!

Fixes: fd8cfd300019 ("arch: fix has_transparent_hugepage()")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: linux-mips@vger.kernel.org
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: linux-nvdimm@lists.01.org
Cc: Hugh Dickins <hughd@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/mm/tlb-r4k.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index 38e2894d5fa3..1b939abbe4ca 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -438,6 +438,7 @@ int has_transparent_hugepage(void)
 	}
 	return mask == PM_HUGE_MASK;
 }
+EXPORT_SYMBOL(has_transparent_hugepage);
 
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE  */
 
-- 
cgit v1.2.3-70-g09d2


From 107954afc5df667da438644aa4982606663f9b17 Mon Sep 17 00:00:00 2001
From: Nenad Peric <nperic@gmail.com>
Date: Wed, 28 Oct 2020 12:58:17 +0100
Subject: arm64: dts: allwinner: h5: OrangePi Prime: Fix ethernet node

RX and TX delay are provided by ethernet PHY. Reflect that in ethernet
node.

Fixes: 44a94c7ef989 ("arm64: dts: allwinner: H5: Restore EMAC changes")
Signed-off-by: Nenad Peric <nperic@gmail.com>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Acked-by: Jernej Skrabec <jernej.skrabec@siol.net>
Link: https://lore.kernel.org/r/20201028115817.68113-1-nperic@gmail.com
---
 arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts
index cb44bfa5981f..33ab44072e6d 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts
@@ -124,7 +124,7 @@
 	pinctrl-0 = <&emac_rgmii_pins>;
 	phy-supply = <&reg_gmac_3v3>;
 	phy-handle = <&ext_rgmii_phy>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	status = "okay";
 };
 
-- 
cgit v1.2.3-70-g09d2


From 00203737867c8b63ca247e71ada1b32bb0b0dd3d Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Sun, 27 Sep 2020 18:59:42 +0200
Subject: arm64: dts: imx8mm-var-som: fix missing PMIC's interrupt line pull-up

The PMIC's interrupt is level low and should be pulled up.  The PMIC's
device node had pinctrl-0 property but it lacked pinctrl-names which
is required to apply the pin configuration.  The actual problem in DTS
was pointed out by Felix Radensky from Variscite.

Reported-by: Felix Radensky <felix.r@variscite.com>
Fixes: 5f67317bd967 ("arm64: dts: imx8mm: correct interrupt flags")
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Robin Gong <yibin.gong@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi
index 4107fe914d08..49082529764f 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi
@@ -135,13 +135,10 @@
 	pmic@4b {
 		compatible = "rohm,bd71847";
 		reg = <0x4b>;
+		pinctrl-names = "default";
 		pinctrl-0 = <&pinctrl_pmic>;
 		interrupt-parent = <&gpio2>;
-		/*
-		 * The interrupt is not correct. It should be level low,
-		 * however with internal pull up this causes IRQ storm.
-		 */
-		interrupts = <8 IRQ_TYPE_EDGE_RISING>;
+		interrupts = <8 IRQ_TYPE_LEVEL_LOW>;
 		rohm,reset-snvs-powered;
 
 		#clock-cells = <0>;
@@ -398,7 +395,7 @@
 
 	pinctrl_pmic: pmicirqgrp {
 		fsl,pins = <
-			MX8MM_IOMUXC_SD1_DATA6_GPIO2_IO8	0x41
+			MX8MM_IOMUXC_SD1_DATA6_GPIO2_IO8	0x141
 		>;
 	};
 
-- 
cgit v1.2.3-70-g09d2


From 0710e4385c9c978952333393396061ed1672d145 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Sun, 27 Sep 2020 18:59:43 +0200
Subject: arm64: dts: imx8mm-beacon-som: fix missing PMIC's interrupt line
 pull-up

The PMIC's interrupt is level low and should be pulled up.  The PMIC's
device node had pinctrl-0 property but it lacked pinctrl-names which
is required to apply the pin configuration.

Fixes: 5f67317bd967 ("arm64: dts: imx8mm: correct interrupt flags")
Fixes: 593816fa2f35 ("arm64: dts: imx: Add Beacon i.MX8m-Mini development kit")
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Tested-by: Adam Ford <aford173@gmail.com>
Reviewed-by: Robin Gong <yibin.gong@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi
index 6de86a4f0ec4..55b36bddd513 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi
@@ -72,6 +72,7 @@
 	pmic@4b {
 		compatible = "rohm,bd71847";
 		reg = <0x4b>;
+		pinctrl-names = "default";
 		pinctrl-0 = <&pinctrl_pmic>;
 		interrupt-parent = <&gpio1>;
 		interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
-- 
cgit v1.2.3-70-g09d2


From ce6fc31f388d45b9f7135169f911cd27f4d21126 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Sun, 27 Sep 2020 18:59:44 +0200
Subject: arm64: dts: imx8mm-evk: fix missing PMIC's interrupt line pull-up

The PMIC's interrupt is level low and should be pulled up.  The PMIC's
device node had pinctrl-0 property but it lacked pinctrl-names which
is required to apply the pin configuration.

Fixes: 5f67317bd967 ("arm64: dts: imx8mm: correct interrupt flags")
Fixes: aa71d0648318 ("arm64: dts: imx8mm: Split the imx8mm evk board dts to a common dtsi")
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Robin Gong <yibin.gong@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi
index f305a530ff6f..521eb3a5a12e 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi
@@ -121,6 +121,7 @@
 	pmic@4b {
 		compatible = "rohm,bd71847";
 		reg = <0x4b>;
+		pinctrl-names = "default";
 		pinctrl-0 = <&pinctrl_pmic>;
 		interrupt-parent = <&gpio1>;
 		interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
-- 
cgit v1.2.3-70-g09d2


From 34a1c5e39b670fd7a324b5620c9ad4ac80c2f018 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Sun, 27 Sep 2020 18:59:45 +0200
Subject: arm64: dts: imx8mn-var-som: fix missing PMIC's interrupt line pull-up

The PMIC's interrupt is level low and should be pulled up.  The PMIC's
device node had pinctrl-0 property but it lacked pinctrl-names which
is required to apply the pin configuration.  The actual problem in DTS
was pointed out by Felix Radensky from Variscite.

Reported-by: Felix Radensky <felix.r@variscite.com>
Fixes: ade0176dd8a0 ("arm64: dts: imx8mn-var-som: Add Variscite VAR-SOM-MX8MN System on Module")
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Robin Gong <yibin.gong@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
index a2d0190921e4..7f356edf9f91 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
@@ -116,13 +116,10 @@
 	pmic@4b {
 		compatible = "rohm,bd71847";
 		reg = <0x4b>;
+		pinctrl-names = "default";
 		pinctrl-0 = <&pinctrl_pmic>;
 		interrupt-parent = <&gpio2>;
-		/*
-		 * The interrupt is not correct. It should be level low,
-		 * however with internal pull up this causes IRQ storm.
-		 */
-		interrupts = <8 IRQ_TYPE_EDGE_RISING>;
+		interrupts = <8 IRQ_TYPE_LEVEL_LOW>;
 		rohm,reset-snvs-powered;
 
 		regulators {
@@ -388,7 +385,7 @@
 
 	pinctrl_pmic: pmicirqgrp {
 		fsl,pins = <
-			MX8MN_IOMUXC_SD1_DATA6_GPIO2_IO8	0x101
+			MX8MN_IOMUXC_SD1_DATA6_GPIO2_IO8	0x141
 		>;
 	};
 
-- 
cgit v1.2.3-70-g09d2


From 4d20fa1dac2e3cf5aa0cd317b3436f4fda680b04 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Sun, 27 Sep 2020 18:59:46 +0200
Subject: arm64: dts: imx8mn-ddr4-evk: fix missing PMIC's interrupt line
 pull-up

The PMIC's interrupt is level low and should be pulled up.  The PMIC's
device node had pinctrl-0 property but it lacked pinctrl-names which
is required to apply the pin configuration.

Fixes: 4153f7811a9b ("arm64: dts: imx8mn: correct interrupt flags")
Fixes: 3e44dd09736d ("arm64: dts: imx8mn-ddr4-evk: Add rohm,bd71847 PMIC support")
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Robin Gong <yibin.gong@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts
index 46e76cf32b2f..7dfee715a2c4 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts
@@ -53,6 +53,7 @@
 	pmic@4b {
 		compatible = "rohm,bd71847";
 		reg = <0x4b>;
+		pinctrl-names = "default";
 		pinctrl-0 = <&pinctrl_pmic>;
 		interrupt-parent = <&gpio1>;
 		interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
-- 
cgit v1.2.3-70-g09d2


From 6efb099a1da4e954409e241b47257a637120e5c2 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Sun, 27 Sep 2020 18:59:47 +0200
Subject: arm64: dts: imx8mn-evk: fix missing PMIC's interrupt line pull-up

The PMIC's interrupt is level low and should be pulled up.  The PMIC's
device node had pinctrl-0 property but it lacked pinctrl-names which
is required to apply the pin configuration.

Fixes: 4153f7811a9b ("arm64: dts: imx8mn: correct interrupt flags")
Fixes: 6386156eb279 ("arm64: dts: imx8mn-evk: add pca9450 for i.mx8mn-evk board")
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Reviewed-by: Robin Gong <yibin.gong@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm64/boot/dts/freescale/imx8mn-evk.dts | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/freescale/imx8mn-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-evk.dts
index 707d8486b4d8..8311b95dee49 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mn-evk.dts
@@ -18,6 +18,7 @@
 	pmic: pmic@25 {
 		compatible = "nxp,pca9450b";
 		reg = <0x25>;
+		pinctrl-names = "default";
 		pinctrl-0 = <&pinctrl_pmic>;
 		interrupt-parent = <&gpio1>;
 		interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
-- 
cgit v1.2.3-70-g09d2


From d92454287ee25d78f1caac3734a1864f8a5a5275 Mon Sep 17 00:00:00 2001
From: Biwen Li <biwen.li@nxp.com>
Date: Tue, 29 Sep 2020 09:30:21 +0800
Subject: arm64: dts: fsl: fix endianness issue of rcpm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add little-endian property to RCPM node (for ls1028a,ls1088a,ls208xa),
otherwise RCPM driver will program hardware with incorrect setting,
causing system (such as LS1028ARDB) failed to be waked by wakeup source.

Fixes: 791c88ca5713 (“arm64: dts: ls1028a: Add ftm_alarm0 DT node”)
Fixes: f4fe3a8665495 (“arm64: dts: layerscape: add ftm_alarm0 node”)
Signed-off-by: Biwen Li <biwen.li@nxp.com>
Signed-off-by: Ran Wang <ran.wang_1@nxp.com>
Acked-by: Li Yang <leoyang.li@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi | 1 +
 arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi | 1 +
 arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi | 1 +
 3 files changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index 73e4f9466887..7a6fb7e1fb82 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -1012,6 +1012,7 @@
 			compatible = "fsl,ls1028a-rcpm", "fsl,qoriq-rcpm-2.1+";
 			reg = <0x0 0x1e34040 0x0 0x1c>;
 			#fsl,rcpm-wakeup-cells = <7>;
+			little-endian;
 		};
 
 		ftm_alarm0: timer@2800000 {
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
index ff5805206a28..692d8f4a206d 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi
@@ -805,6 +805,7 @@
 			compatible = "fsl,ls1088a-rcpm", "fsl,qoriq-rcpm-2.1+";
 			reg = <0x0 0x1e34040 0x0 0x18>;
 			#fsl,rcpm-wakeup-cells = <6>;
+			little-endian;
 		};
 
 		ftm_alarm0: timer@2800000 {
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
index bf72918fe545..e7abb74bd816 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
@@ -892,6 +892,7 @@
 			compatible = "fsl,ls208xa-rcpm", "fsl,qoriq-rcpm-2.1+";
 			reg = <0x0 0x1e34040 0x0 0x18>;
 			#fsl,rcpm-wakeup-cells = <6>;
+			little-endian;
 		};
 
 		ftm_alarm0: timer@2800000 {
-- 
cgit v1.2.3-70-g09d2


From 054b5d97448714ae4a0bcd6f36b0515ac7aed21e Mon Sep 17 00:00:00 2001
From: Madalin Bucur <madalin.bucur@oss.nxp.com>
Date: Mon, 5 Oct 2020 15:46:39 +0300
Subject: arm64: dts: fsl: DPAA FMan DMA operations are coherent

Although the DPAA 1 FMan operations are coherent, the device tree
node for the FMan does not indicate that, resulting in a needless
loss of performance. Adding the missing dma-coherent property.

Fixes: 1ffbecdd8321 ("arm64: dts: add DPAA FMan nodes")
Signed-off-by: Madalin Bucur <madalin.bucur@oss.nxp.com>
Tested-by: Camelia Groza <camelia.groza@oss.nxp.com>
Acked-by: Li Yang <leoyang.li@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi b/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi
index 8bc6caa9167d..4338db14c5da 100644
--- a/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi
+++ b/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi
@@ -19,6 +19,7 @@ fman0: fman@1a00000 {
 	clock-names = "fmanclk";
 	fsl,qman-channel-range = <0x800 0x10>;
 	ptimer-handle = <&ptp_timer0>;
+	dma-coherent;
 
 	muram@0 {
 		compatible = "fsl,fman-muram";
-- 
cgit v1.2.3-70-g09d2


From 587258edd94c305077923ec458e04c032fca83e6 Mon Sep 17 00:00:00 2001
From: Adam Ford <aford173@gmail.com>
Date: Wed, 7 Oct 2020 08:02:37 -0500
Subject: arm64: dts: imx8mm-beacon-som: Fix Choppy BT audio

When streaming bluetooth audio, the sound is choppy due to the
fact that the default baud rate of the HCI interface is too slow
to handle 16-bit stereo at 48KHz.

The Bluetooth chip is capable of up to 4M baud on the serial port,
so this patch sets the max-speed to 4000000 in order to properly
stream audio over the Bluetooth.

Fixes: 593816fa2f35 ("arm64: dts: imx: Add Beacon i.MX8m-Mini development kit")
Signed-off-by: Adam Ford <aford173@gmail.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi
index 55b36bddd513..b88c3c99b007 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi
@@ -211,6 +211,7 @@
 		host-wakeup-gpios = <&gpio2 8 GPIO_ACTIVE_HIGH>;
 		device-wakeup-gpios = <&gpio2 7 GPIO_ACTIVE_HIGH>;
 		clocks = <&osc_32k>;
+		max-speed = <4000000>;
 		clock-names = "extclk";
 	};
 };
-- 
cgit v1.2.3-70-g09d2


From cf5abb0132193767c07c83e06f91b777d22ba495 Mon Sep 17 00:00:00 2001
From: Adam Ford <aford173@gmail.com>
Date: Thu, 8 Oct 2020 13:33:00 -0500
Subject: arm64: dts imx8mn: Remove non-existent USB OTG2

According to the i.MX8MN TRM, there is only one OTG port.  The
address for OTG2 is reserved on Nano.

This patch removes the non-existent OTG2, usbphynop2, and the usbmisc2
nodes.

Fixes: 6c3debcbae47 ("arm64: dts: freescale: Add i.MX8MN dtsi support")
Signed-off-by: Adam Ford <aford173@gmail.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm64/boot/dts/freescale/imx8mn.dtsi | 30 ------------------------------
 1 file changed, 30 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
index 746faf1cf2fb..16c7202885d7 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
@@ -790,28 +790,6 @@
 				#index-cells = <1>;
 				reg = <0x32e40200 0x200>;
 			};
-
-			usbotg2: usb@32e50000 {
-				compatible = "fsl,imx8mn-usb", "fsl,imx7d-usb";
-				reg = <0x32e50000 0x200>;
-				interrupts = <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>;
-				clocks = <&clk IMX8MN_CLK_USB1_CTRL_ROOT>;
-				clock-names = "usb1_ctrl_root_clk";
-				assigned-clocks = <&clk IMX8MN_CLK_USB_BUS>,
-						  <&clk IMX8MN_CLK_USB_CORE_REF>;
-				assigned-clock-parents = <&clk IMX8MN_SYS_PLL2_500M>,
-							 <&clk IMX8MN_SYS_PLL1_100M>;
-				fsl,usbphy = <&usbphynop2>;
-				fsl,usbmisc = <&usbmisc2 0>;
-				status = "disabled";
-			};
-
-			usbmisc2: usbmisc@32e50200 {
-				compatible = "fsl,imx8mn-usbmisc", "fsl,imx7d-usbmisc";
-				#index-cells = <1>;
-				reg = <0x32e50200 0x200>;
-			};
-
 		};
 
 		dma_apbh: dma-controller@33000000 {
@@ -876,12 +854,4 @@
 		assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_100M>;
 		clock-names = "main_clk";
 	};
-
-	usbphynop2: usbphynop2 {
-		compatible = "usb-nop-xceiv";
-		clocks = <&clk IMX8MN_CLK_USB_PHY_REF>;
-		assigned-clocks = <&clk IMX8MN_CLK_USB_PHY_REF>;
-		assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_100M>;
-		clock-names = "main_clk";
-	};
 };
-- 
cgit v1.2.3-70-g09d2


From b0c0aa7aa4b919e02e0a24aa3a46dfbf2bbc34dc Mon Sep 17 00:00:00 2001
From: David Bauer <mail@david-bauer.net>
Date: Mon, 26 Oct 2020 17:27:21 +0100
Subject: arm64: dts: rockchip: fix NanoPi R2S GMAC clock name

This commit fixes the name for the GMAC clock to gmac_clkin, as this is
the name of the clock provided by the rk3328-clk driver.

Without this commit, the GMAC will not work in TX direction.

Fixes: f1ec83f880db ("arm64: dts: rockchip: Add support for FriendlyARM NanoPi R2S")
Suggested-by: Tobias Waldvogel <tobias.waldvogel@gmail.com>
Signed-off-by: David Bauer <mail@david-bauer.net>
Link: https://lore.kernel.org/r/20201026162721.70672-1-mail@david-bauer.net
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts b/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts
index be7a31d81632..2ee07d15a6e3 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts
@@ -20,7 +20,7 @@
 	gmac_clk: gmac-clock {
 		compatible = "fixed-clock";
 		clock-frequency = <125000000>;
-		clock-output-names = "gmac_clk";
+		clock-output-names = "gmac_clkin";
 		#clock-cells = <0>;
 	};
 
-- 
cgit v1.2.3-70-g09d2


From 01fe332800d0d2f94337b45c1973f4cf28ae6195 Mon Sep 17 00:00:00 2001
From: Maciej Matuszczyk <maccraft123mc@gmail.com>
Date: Fri, 23 Oct 2020 20:16:29 +0200
Subject: arm64: dts: rockchip: Remove system-power-controller from pmic on
 Odroid Go Advance

This fixes a poweroff issue when this is supposed to happen
via PSCI.

Signed-off-by: Maciej Matuszczyk <maccraft123mc@gmail.com>
Link: https://lore.kernel.org/r/20201023181629.119727-1-maccraft123mc@gmail.com
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts
index 35bd6b904b9c..337681038519 100644
--- a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts
@@ -243,7 +243,6 @@
 		interrupts = <RK_PB2 IRQ_TYPE_LEVEL_LOW>;
 		pinctrl-names = "default";
 		pinctrl-0 = <&pmic_int>;
-		rockchip,system-power-controller;
 		wakeup-source;
 		#clock-cells = <1>;
 		clock-output-names = "rk808-clkout1", "xin32k";
-- 
cgit v1.2.3-70-g09d2


From 7dd8f0ba88fce98e2953267a66af74c6f4792a56 Mon Sep 17 00:00:00 2001
From: Sergey Matyukevich <geomatsi@gmail.com>
Date: Sat, 24 Oct 2020 23:11:20 +0300
Subject: arm: dts: imx6qdl-udoo: fix rgmii phy-mode for ksz9031 phy

Commit bcf3440c6dd7 ("net: phy: micrel: add phy-mode support for the
KSZ9031 PHY") fixed micrel phy driver adding proper support for phy
modes. Adapt imx6q-udoo board phy settings : explicitly set required
delay configuration using "rgmii-id".

Fixes: cbd54fe0b2bc ("ARM: dts: imx6dl-udoo: Add board support based off imx6q-udoo")
Signed-off-by: Sergey Matyukevich <geomatsi@gmail.com>
Reviewed-by: Fabio Estevam <festevam@gmail.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx6qdl-udoo.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi b/arch/arm/boot/dts/imx6qdl-udoo.dtsi
index 828dd20cd27d..d07d8f83456d 100644
--- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi
@@ -98,7 +98,7 @@
 &fec {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_enet>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	status = "okay";
 };
 
-- 
cgit v1.2.3-70-g09d2


From f8b5a33707c9a19ec905d2826be0acd151997a09 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Fri, 30 Oct 2020 01:55:28 +0100
Subject: ARM: dts: vf610-zii-dev-rev-b: Fix MDIO over clocking

The ZII devel B board has two generations of Marvell Switches.  The
mv88e6352 supports an MDIO clock of 12MHz. However the older 88e6185
does not like 12MHz, and often fails to probe.

Reduce the clock speed to 5MHz, which seems to work reliably.

Cc: Chris Healy <cphealy@gmail.com>
Fixes: b955387667ec ("ARM: dts: ZII: update MDIO speed and preamble")
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Chris Healy <cphealy@gmail.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/vf610-zii-dev-rev-b.dts | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts
index e500911ce0a5..6f1e0f0d4f0a 100644
--- a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts
+++ b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts
@@ -406,6 +406,9 @@
 	};
 };
 
+&mdio1 {
+	clock-frequency = <5000000>;
+};
 
 &iomuxc {
 	pinctrl_gpio_e6185_eeprom_sel: pinctrl-gpio-e6185-eeprom-spi0 {
-- 
cgit v1.2.3-70-g09d2


From e402599e5e5e0b2758d7766fd9f6d7953d4ccd85 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 12 Oct 2020 09:18:16 +0200
Subject: ARM: dts: imx6q-prti6q: fix PHY address

Due to bug in the bootloader, the PHY has floating address and may
randomly change on each PHY reset. To avoid it, the updated bootloader
with the following patch[0] should be used:

| ARM: protonic: disable on-die termination to fix PHY bootstrapping
|
| If on-die termination is enabled, the RXC pin of iMX6 will be pulled
| high. Since we already have an 10K pull-down on board, the RXC level on
| PHY reset will be ~800mV, which is mostly interpreted as 1. On some
| reboots we get 0 instead and kernel can't detect the PHY properly.
|
| Since the default 0x020e07ac value is 0, it is sufficient to remove this
| entry from the affected imxcfg files.
|
| Since we get stable 0 on pin PHYADDR[2], the PHY address is changed from
| 4 to 0.

With latest bootloader update, the PHY address will be fixed to "0".

[0] https://git.pengutronix.de/cgit/barebox/commit/?id=93f7dcf631edfcda19e7757b28d66017ea274b81

Fixes: 0d446a505592 ("ARM: dts: add Protonic PRTI6Q board")
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx6q-prti6q.dts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/imx6q-prti6q.dts b/arch/arm/boot/dts/imx6q-prti6q.dts
index d112b50f8c5d..b4605edfd2ab 100644
--- a/arch/arm/boot/dts/imx6q-prti6q.dts
+++ b/arch/arm/boot/dts/imx6q-prti6q.dts
@@ -213,8 +213,8 @@
 		#size-cells = <0>;
 
 		/* Microchip KSZ9031RNX PHY */
-		rgmii_phy: ethernet-phy@4 {
-			reg = <4>;
+		rgmii_phy: ethernet-phy@0 {
+			reg = <0>;
 			interrupts-extended = <&gpio1 28 IRQ_TYPE_LEVEL_LOW>;
 			reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>;
 			reset-assert-us = <10000>;
-- 
cgit v1.2.3-70-g09d2


From 544cc3f8573bf9a82e8f348741f2f68d2a8376fb Mon Sep 17 00:00:00 2001
From: Jernej Skrabec <jernej.skrabec@siol.net>
Date: Sun, 1 Nov 2020 08:26:09 +0100
Subject: arm64: dts: allwinner: h6: orangepi-one-plus: Fix ethernet

RX/TX delay on OrangePi One Plus board is set on PHY. Reflect that in
ethernet node.

Fixes: 7ee32a17e0d6 ("arm64: dts: allwinner: h6: orangepi-one-plus: Enable ethernet")
Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Tested-by: Marcus Cooper <codekipper@gmail.com>
Link: https://lore.kernel.org/r/20201101072609.1681891-1-jernej.skrabec@siol.net
---
 arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts
index fceb298bfd53..29a081e72a9b 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts
@@ -27,7 +27,7 @@
 &emac {
 	pinctrl-names = "default";
 	pinctrl-0 = <&ext_rgmii_pins>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-handle = <&ext_rgmii_phy>;
 	phy-supply = <&reg_gmac_3v3>;
 	allwinner,rx-delay-ps = <200>;
-- 
cgit v1.2.3-70-g09d2


From ad2091f893bd5dfe2824f0d6819600d120698e9f Mon Sep 17 00:00:00 2001
From: Paul Kocialkowski <contact@paulk.fr>
Date: Sat, 31 Oct 2020 19:21:29 +0100
Subject: ARM: sunxi: Add machine match for the Allwinner V3 SoC

The Allwinner V3 SoC shares the same base as the V3s but comes with
extra pins and features available. As a result, it has its dedicated
compatible string (already used in device trees), which is added here.

Signed-off-by: Paul Kocialkowski <contact@paulk.fr>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://lore.kernel.org/r/20201031182137.1879521-2-contact@paulk.fr
---
 arch/arm/mach-sunxi/sunxi.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c
index 06da2747a90b..19635721013d 100644
--- a/arch/arm/mach-sunxi/sunxi.c
+++ b/arch/arm/mach-sunxi/sunxi.c
@@ -66,6 +66,7 @@ static const char * const sun8i_board_dt_compat[] = {
 	"allwinner,sun8i-h2-plus",
 	"allwinner,sun8i-h3",
 	"allwinner,sun8i-r40",
+	"allwinner,sun8i-v3",
 	"allwinner,sun8i-v3s",
 	NULL,
 };
-- 
cgit v1.2.3-70-g09d2


From 6ab48105aae79b9d8062e9bc922baaeff80918d7 Mon Sep 17 00:00:00 2001
From: Matteo Scordino <matteo.scordino@gmail.com>
Date: Fri, 30 Oct 2020 23:43:25 +0000
Subject: ARM: dts: s3: pinecube: align compatible property to other S3 boards

The compatible string in the Pine64 Pinecube dts diverges from the ones
used in other S3 based boards, like the LicheePi and the Elimo Impetus
and Initium. Discussion on LKML decided the PineCube should align to the
others.

Signed-off-by: Matteo Scordino <matteo.scordino@gmail.com>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://lore.kernel.org/r/20201030234325.5865-7-matteo.scordino@gmail.com
---
 arch/arm/boot/dts/sun8i-s3-pinecube.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun8i-s3-pinecube.dts b/arch/arm/boot/dts/sun8i-s3-pinecube.dts
index 9bab6b7f4014..4aa0ee897a0a 100644
--- a/arch/arm/boot/dts/sun8i-s3-pinecube.dts
+++ b/arch/arm/boot/dts/sun8i-s3-pinecube.dts
@@ -10,7 +10,7 @@
 
 / {
 	model = "PineCube IP Camera";
-	compatible = "pine64,pinecube", "allwinner,sun8i-s3";
+	compatible = "pine64,pinecube", "sochip,s3", "allwinner,sun8i-v3";
 
 	aliases {
 		serial0 = &uart2;
-- 
cgit v1.2.3-70-g09d2


From 8c9cb4094ccf242eddd140efba13872c55f68a87 Mon Sep 17 00:00:00 2001
From: Pablo Greco <pgreco@centosproject.org>
Date: Mon, 2 Nov 2020 11:16:40 -0300
Subject: ARM: dts: sun7i: bananapi: Enable RGMII RX/TX delay on Ethernet PHY

The Ethernet PHY on the Bananapi M1 has the RX and TX delays enabled on
the PHY, using pull-ups on the RXDLY and TXDLY pins.

Fix the phy-mode description to correct reflect this so that the
implementation doesn't reconfigure the delays incorrectly. This
happened with commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e
rx/tx delay config").

Fixes: 8a5b272fbf44 ("ARM: dts: sun7i: Add Banana Pi board")
Signed-off-by: Pablo Greco <pgreco@centosproject.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://lore.kernel.org/r/1604326600-39544-1-git-send-email-pgreco@centosproject.org
---
 arch/arm/boot/dts/sun7i-a20-bananapi.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun7i-a20-bananapi.dts b/arch/arm/boot/dts/sun7i-a20-bananapi.dts
index bb3987e101c2..0b3d9ae75650 100644
--- a/arch/arm/boot/dts/sun7i-a20-bananapi.dts
+++ b/arch/arm/boot/dts/sun7i-a20-bananapi.dts
@@ -132,7 +132,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&gmac_rgmii_pins>;
 	phy-handle = <&phy1>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-supply = <&reg_gmac_3v3>;
 	status = "okay";
 };
-- 
cgit v1.2.3-70-g09d2


From bd5cdcdc66e1f7179ff6d172d1e5f55e43403aa8 Mon Sep 17 00:00:00 2001
From: Pablo Greco <pgreco@centosproject.org>
Date: Mon, 2 Nov 2020 11:19:14 -0300
Subject: ARM: dts: sun8i: r40: bananapi-m2-berry: Fix dcdc1 regulator

DCDC1 regulator powers many different subsystems. While some of them can
work at 3.0 V, some of them can not. For example, VCC-HDMI can only work
between 3.24 V and 3.36 V. According to OS images provided by the board
manufacturer this regulator should be set to 3.3 V.

Set DCDC1 and DCDC1SW to 3.3 V in order to fix this.

Fixes: 23edc168bd98 ("ARM: dts: sun8i: Add board dts file for Banana Pi M2 Berry")
Fixes: 27e81e1970a8 ("ARM: dts: sun8i: v40: bananapi-m2-berry: Enable GMAC ethernet controller")
Signed-off-by: Pablo Greco <pgreco@centosproject.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://lore.kernel.org/r/1604326755-39742-1-git-send-email-pgreco@centosproject.org
---
 arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts b/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts
index 15c22b06fc4b..84eb08295718 100644
--- a/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts
+++ b/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts
@@ -198,16 +198,16 @@
 };
 
 &reg_dc1sw {
-	regulator-min-microvolt = <3000000>;
-	regulator-max-microvolt = <3000000>;
+	regulator-min-microvolt = <3300000>;
+	regulator-max-microvolt = <3300000>;
 	regulator-name = "vcc-gmac-phy";
 };
 
 &reg_dcdc1 {
 	regulator-always-on;
-	regulator-min-microvolt = <3000000>;
-	regulator-max-microvolt = <3000000>;
-	regulator-name = "vcc-3v0";
+	regulator-min-microvolt = <3300000>;
+	regulator-max-microvolt = <3300000>;
+	regulator-name = "vcc-3v3";
 };
 
 &reg_dcdc2 {
-- 
cgit v1.2.3-70-g09d2


From 8a82d91fa275aaea49be06d7f5b1407ce1c0dd4b Mon Sep 17 00:00:00 2001
From: Pablo Greco <pgreco@centosproject.org>
Date: Mon, 2 Nov 2020 11:19:29 -0300
Subject: ARM: dts: sun8i: v40: bananapi-m2-berry: Fix ethernet node

Ethernet PHY on BananaPi M2 Berry provides RX and TX delays. Fix ethernet
node to reflect that fact.

Fixes: 27e81e1970a8 ("ARM: dts: sun8i: v40: bananapi-m2-berry: Enable GMAC ethernet controller")
Signed-off-by: Pablo Greco <pgreco@centosproject.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://lore.kernel.org/r/1604326769-39802-1-git-send-email-pgreco@centosproject.org
---
 arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts b/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts
index 84eb08295718..47954551f573 100644
--- a/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts
+++ b/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts
@@ -120,7 +120,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&gmac_rgmii_pins>;
 	phy-handle = <&phy1>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-supply = <&reg_dc1sw>;
 	status = "okay";
 };
-- 
cgit v1.2.3-70-g09d2


From f126b6702e7354d6247a36f20b9172457af5c15a Mon Sep 17 00:00:00 2001
From: Dinh Nguyen <dinguyen@kernel.org>
Date: Sun, 1 Nov 2020 14:02:56 -0600
Subject: arm64: dts: agilex/stratix10: Fix qspi node compatible

The QSPI flash node needs to have the required "jedec,spi-nor"
in the compatible string.

Fixes: 0cb140d07fc7 ("arm64: dts: stratix10: Add QSPI support for Stratix10")
Cc: stable@vger.kernel.org
Suggested-by: Vignesh Raghavendra <vigneshr@ti.com>
Signed-off-by: Dinh Nguyen <dinguyen@kernel.org>
---
 arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts      | 2 +-
 arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts | 2 +-
 arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
index feadd21bc0dc..46e558ab7729 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
@@ -159,7 +159,7 @@
 	flash@0 {
 		#address-cells = <1>;
 		#size-cells = <1>;
-		compatible = "n25q00a";
+		compatible = "micron,mt25qu02g", "jedec,spi-nor";
 		reg = <0>;
 		spi-max-frequency = <100000000>;
 
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
index c07966740e14..f9b4a39683cf 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
@@ -192,7 +192,7 @@
 	flash@0 {
 		#address-cells = <1>;
 		#size-cells = <1>;
-		compatible = "n25q00a";
+		compatible = "micron,mt25qu02g", "jedec,spi-nor";
 		reg = <0>;
 		spi-max-frequency = <100000000>;
 
diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts b/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts
index 96c50d48289d..a7a83f29f00b 100644
--- a/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts
+++ b/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts
@@ -110,7 +110,7 @@
 	flash@0 {
 		#address-cells = <1>;
 		#size-cells = <1>;
-		compatible = "mt25qu02g";
+		compatible = "micron,mt25qu02g", "jedec,spi-nor";
 		reg = <0>;
 		spi-max-frequency = <100000000>;
 
-- 
cgit v1.2.3-70-g09d2


From 0011c6d182774fc781fb9e115ebe8baa356029ae Mon Sep 17 00:00:00 2001
From: Markus Reichl <m.reichl@fivetechno.de>
Date: Wed, 4 Nov 2020 17:23:55 +0100
Subject: arm64: dts: rockchip: Assign a fixed index to mmc devices on rk3399
 boards.

Recently introduced async probe on mmc devices can shuffle block IDs.
Pin them to fixed values to ease booting in environments where UUIDs
are not practical. Use newly introduced aliases for mmcblk devices from [1].

[1]
https://patchwork.kernel.org/patch/11747669/

Signed-off-by: Markus Reichl <m.reichl@fivetechno.de>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20201104162356.1251-1-m.reichl@fivetechno.de
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/arm64/boot/dts/rockchip/rk3399.dtsi | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
index ada724b12f01..7a9a7aca86c6 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi
@@ -29,6 +29,9 @@
 		i2c6 = &i2c6;
 		i2c7 = &i2c7;
 		i2c8 = &i2c8;
+		mmc0 = &sdio0;
+		mmc1 = &sdmmc;
+		mmc2 = &sdhci;
 		serial0 = &uart0;
 		serial1 = &uart1;
 		serial2 = &uart2;
-- 
cgit v1.2.3-70-g09d2


From 7327c8b98e2e14c47021eea14d1ab268086a6408 Mon Sep 17 00:00:00 2001
From: Markus Reichl <m.reichl@fivetechno.de>
Date: Wed, 4 Nov 2020 20:29:31 +0100
Subject: arm64: dts: rockchip: Reorder LED triggers from mmc devices on
 rk3399-roc-pc.

After patch [1] SD-card becomes mmc1 and eMMC becomes mmc2.
Correct trigger of LEDs accordingly.

[1]
https://patchwork.kernel.org/patch/11881427

Signed-off-by: Markus Reichl <m.reichl@fivetechno.de>
Link: https://lore.kernel.org/r/20201104192933.1001-1-m.reichl@fivetechno.de
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi
index e7a459fa4322..20309076dbac 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi
@@ -74,14 +74,14 @@
 			label = "red:diy";
 			gpios = <&gpio0 RK_PB5 GPIO_ACTIVE_HIGH>;
 			default-state = "off";
-			linux,default-trigger = "mmc1";
+			linux,default-trigger = "mmc2";
 		};
 
 		yellow_led: led-2 {
 			label = "yellow:yellow-led";
 			gpios = <&gpio0 RK_PA2 GPIO_ACTIVE_HIGH>;
 			default-state = "off";
-			linux,default-trigger = "mmc0";
+			linux,default-trigger = "mmc1";
 		};
 	};
 
-- 
cgit v1.2.3-70-g09d2


From 01776f070ffcbf336be3bf1672bd3c589548d6c4 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Sat, 7 Nov 2020 09:07:40 +0000
Subject: powerpc/32s: Use relocation offset when setting early hash table

When calling early_hash_table(), the kernel hasn't been yet
relocated to its linking address, so data must be addressed
with relocation offset.

Add relocation offset to write into Hash in early_hash_table().

Fixes: 69a1593abdbc ("powerpc/32s: Setup the early hash table at all time.")
Reported-by: Erhard Furtner <erhard_f@mailbox.org>
Reported-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Tested-by: Serge Belyshev <belyshev@depni.sinp.msu.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/9e225a856a8b22e0e77587ee22ab7a2f5bca8753.1604740029.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/head_book3s_32.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 2aa16d5368e1..a0dda2a1f2df 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -156,6 +156,7 @@ __after_mmu_off:
 	bl	initial_bats
 	bl	load_segment_registers
 BEGIN_MMU_FTR_SECTION
+	bl	reloc_offset
 	bl	early_hash_table
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
 #if defined(CONFIG_BOOTX_TEXT)
@@ -920,7 +921,7 @@ early_hash_table:
 	ori	r6, r6, 3	/* 256kB table */
 	mtspr	SPRN_SDR1, r6
 	lis	r6, early_hash@h
-	lis	r3, Hash@ha
+	addis	r3, r3, Hash@ha
 	stw	r6, Hash@l(r3)
 	blr
 
-- 
cgit v1.2.3-70-g09d2


From ce9dfafe29bed86fe3cda330ac6072ce84e1ff81 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Tue, 3 Nov 2020 16:55:43 +0100
Subject: s390: fix system call exit path

The system call exit path is running with interrupts enabled while
checking for TIF/PIF/CIF bits which require special handling. If all
bits have been checked interrupts are disabled and the kernel exits to
user space.
The problem is that after checking all bits and before interrupts are
disabled bits can be set already again, due to interrupt handling.

This means that the kernel can exit to user space with some
TIF/PIF/CIF bits set, which should never happen. E.g. TIF_NEED_RESCHED
might be set, which might lead to additional latencies, since that bit
will only be recognized with next exit to user space.

Fix this by checking the corresponding bits only when interrupts are
disabled.

Fixes: 0b0ed657fe00 ("s390: remove critical section cleanup from entry.S")
Cc: <stable@vger.kernel.org> # 5.8
Acked-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/entry.S | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 86235919c2d1..5346545b9860 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -422,6 +422,7 @@ ENTRY(system_call)
 #endif
 	LOCKDEP_SYS_EXIT
 .Lsysc_tif:
+	DISABLE_INTS
 	TSTMSK	__PT_FLAGS(%r11),_PIF_WORK
 	jnz	.Lsysc_work
 	TSTMSK	__TI_flags(%r12),_TIF_WORK
@@ -444,6 +445,7 @@ ENTRY(system_call)
 # One of the work bits is on. Find out which one.
 #
 .Lsysc_work:
+	ENABLE_INTS
 	TSTMSK	__TI_flags(%r12),_TIF_NEED_RESCHED
 	jo	.Lsysc_reschedule
 	TSTMSK	__PT_FLAGS(%r11),_PIF_SYSCALL_RESTART
-- 
cgit v1.2.3-70-g09d2


From 7de8bfaa095fcbc2db2952d4b561be102a41c2a6 Mon Sep 17 00:00:00 2001
From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Date: Wed, 4 Nov 2020 10:55:08 +0000
Subject: arm64: dts: renesas: r8a774e1: Add missing audio_clk_b

Add audio_clk_b configured as 0 Hz, this will be overridden by the
boards providing the audio clock.

Fixes: 8183a7938cfec ("arm64: dts: renesas: r8a774e1: Add audio support")
Reported-by: Nobuhiro Iwamatsu <nobuhiro1.iwamatsu@toshiba.co.jp>
Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Link: https://lore.kernel.org/r/20201104105508.21197-1-prabhakar.mahadev-lad.rj@bp.renesas.com
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 arch/arm64/boot/dts/renesas/r8a774e1.dtsi | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
index 9cbf963aa068..c29643442e91 100644
--- a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
@@ -28,6 +28,12 @@
 		clock-frequency = <0>;
 	};
 
+	audio_clk_b: audio_clk_b {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <0>;
+	};
+
 	audio_clk_c: audio_clk_c {
 		compatible = "fixed-clock";
 		#clock-cells = <0>;
-- 
cgit v1.2.3-70-g09d2


From 52d9edbe6efc5042cf57fae6a25d07572ddf398b Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Thu, 8 Oct 2020 21:35:38 +0200
Subject: ARM: dts: stm32: Fix TA3-GPIO-C key on STM32MP1 DHCOM PDK2

On the prototype DHCOM, the TA3-GPIO-C button was connected to pin PI11 of
the STM32MP15xx, however on the production SoM this was changed to pin PG0
to free up the IRQ line 11 for LAN8710i PHY IRQ. Update the connection in
the DT. Since the IRQ line 0 is used for PMIC as well and cannot be shared
with the button, make the button polled.

Fixes: 87cabf9405cb ("ARM: dts: stm32: Add GPIO keys for STM32MP1 DHCOM PDK2")
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Patrice Chotard <patrice.chotard@st.com>
Cc: Patrick Delaunay <patrick.delaunay@st.com>
Cc: linux-stm32@st-md-mailman.stormreply.com
To: linux-arm-kernel@lists.infradead.org
Signed-off-by: Alexandre Torgue <alexandre.torgue@st.com>
---
 arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi
index 5dff24e39af8..9a0a59678097 100644
--- a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi
+++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi
@@ -46,6 +46,16 @@
 			linux,code = <KEY_A>;
 			gpios = <&gpiof 3 GPIO_ACTIVE_LOW>;
 		};
+
+		/*
+		 * The EXTi IRQ line 0 is shared with PMIC,
+		 * so mark this as polled GPIO key.
+		 */
+		button-2 {
+			label = "TA3-GPIO-C";
+			linux,code = <KEY_C>;
+			gpios = <&gpiog 0 GPIO_ACTIVE_LOW>;
+		};
 	};
 
 	gpio-keys {
@@ -59,13 +69,6 @@
 			wakeup-source;
 		};
 
-		button-2 {
-			label = "TA3-GPIO-C";
-			linux,code = <KEY_C>;
-			gpios = <&gpioi 11 GPIO_ACTIVE_LOW>;
-			wakeup-source;
-		};
-
 		button-3 {
 			label = "TA4-GPIO-D";
 			linux,code = <KEY_D>;
-- 
cgit v1.2.3-70-g09d2


From 7e5f3155dcbb4d724386b30cc232002d9b9d81f5 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Wed, 28 Oct 2020 21:46:17 +0100
Subject: ARM: dts: stm32: Fix LED5 on STM32MP1 DHCOM PDK2

On the prototype DHCOM, the LED5 was connected to pin PG2 of the
STM32MP15xx, however on the production SoM this was changed to pin
PC6. Update the connection in the DT.

Fixes: 81d5fc719798 ("ARM: dts: stm32: Add GPIO LEDs for STM32MP1 DHCOM PDK2")
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Patrice Chotard <patrice.chotard@st.com>
Cc: Patrick Delaunay <patrick.delaunay@st.com>
Cc: linux-stm32@st-md-mailman.stormreply.com
To: linux-arm-kernel@lists.infradead.org
Signed-off-by: Alexandre Torgue <alexandre.torgue@st.com>
---
 arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi
index 9a0a59678097..8456f172d4b1 100644
--- a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi
+++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi
@@ -82,7 +82,7 @@
 
 		led-0 {
 			label = "green:led5";
-			gpios = <&gpiog 2 GPIO_ACTIVE_HIGH>;
+			gpios = <&gpioc 6 GPIO_ACTIVE_HIGH>;
 			default-state = "off";
 		};
 
-- 
cgit v1.2.3-70-g09d2


From 1f3d7fc279b1a299bb8b1b225d80309a2062ab8a Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Thu, 29 Oct 2020 20:46:17 +0100
Subject: ARM: dts: stm32: Define VIO regulator supply on DHCOM

The VIO regulator is supplied by PMIC Buck3, describe this in the DT.

Fixes: 34e0c7847dcf ("ARM: dts: stm32: Add DH Electronics DHCOM STM32MP1 SoM and PDK2 board")
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Patrice Chotard <patrice.chotard@st.com>
Cc: Patrick Delaunay <patrick.delaunay@st.com>
Cc: linux-stm32@st-md-mailman.stormreply.com
To: linux-arm-kernel@lists.infradead.org
Signed-off-by: Alexandre Torgue <alexandre.torgue@st.com>
---
 arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi
index b4b52cf634af..c89ebfaa115d 100644
--- a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi
+++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi
@@ -68,6 +68,7 @@
 		gpio = <&gpiog 3 GPIO_ACTIVE_LOW>;
 		regulator-always-on;
 		regulator-boot-on;
+		vin-supply = <&vdd>;
 	};
 };
 
-- 
cgit v1.2.3-70-g09d2


From e5ace7f62695656ef8a66ad5a4c3edd055894876 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Thu, 24 Sep 2020 01:25:35 +0200
Subject: ARM: dts: stm32: Enable thermal sensor support on stm32mp15xx-dhcor

Enable STM32 Digital Thermal Sensor driver for stm32mp15xx-dhcor SoMs.

Fixes: 94cafe1b6482 ("ARM: dts: stm32: Add Avenger96 devicetree support based on STM32MP157A")
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Cc: linux-stm32@st-md-mailman.stormreply.com
To: linux-arm-kernel@lists.infradead.org
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Alexandre Torgue <alexandre.torgue@st.com>
---
 arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi
index 04fbb324a541..803eb8bc9c85 100644
--- a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi
+++ b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi
@@ -21,6 +21,10 @@
 	};
 };
 
+&dts {
+	status = "okay";
+};
+
 &i2c4 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&i2c4_pins_a>;
-- 
cgit v1.2.3-70-g09d2


From f4c7fa39415da6db1fa0bc26162ac23a0fbae8bb Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Thu, 29 Oct 2020 20:46:52 +0100
Subject: ARM: dts: stm32: Keep VDDA LDO1 always on on DHCOM

The VDDA LDO1 PMIC output supplies the analog VDDA input of the
STM32MP1 on DHCOM, keep it always on, otherwise there could be
leakage through the SoC.

Fixes: 34e0c7847dcf ("ARM: dts: stm32: Add DH Electronics DHCOM STM32MP1 SoM and PDK2 board")
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Patrice Chotard <patrice.chotard@st.com>
Cc: Patrick Delaunay <patrick.delaunay@st.com>
Cc: linux-stm32@st-md-mailman.stormreply.com
To: linux-arm-kernel@lists.infradead.org
Signed-off-by: Alexandre Torgue <alexandre.torgue@st.com>
---
 arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi
index c89ebfaa115d..f796a6150313 100644
--- a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi
+++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi
@@ -203,6 +203,7 @@
 
 			vdda: ldo1 {
 				regulator-name = "vdda";
+				regulator-always-on;
 				regulator-min-microvolt = <2900000>;
 				regulator-max-microvolt = <2900000>;
 				interrupts = <IT_CURLIM_LDO1 0>;
-- 
cgit v1.2.3-70-g09d2


From 65cae18882f943215d0505ddc7e70495877308e6 Mon Sep 17 00:00:00 2001
From: Brian Masney <bmasney@redhat.com>
Date: Fri, 6 Nov 2020 20:11:19 -0500
Subject: x86/xen: don't unbind uninitialized lock_kicker_irq

When booting a hyperthreaded system with the kernel parameter
'mitigations=auto,nosmt', the following warning occurs:

    WARNING: CPU: 0 PID: 1 at drivers/xen/events/events_base.c:1112 unbind_from_irqhandler+0x4e/0x60
    ...
    Hardware name: Xen HVM domU, BIOS 4.2.amazon 08/24/2006
    ...
    Call Trace:
     xen_uninit_lock_cpu+0x28/0x62
     xen_hvm_cpu_die+0x21/0x30
     takedown_cpu+0x9c/0xe0
     ? trace_suspend_resume+0x60/0x60
     cpuhp_invoke_callback+0x9a/0x530
     _cpu_up+0x11a/0x130
     cpu_up+0x7e/0xc0
     bringup_nonboot_cpus+0x48/0x50
     smp_init+0x26/0x79
     kernel_init_freeable+0xea/0x229
     ? rest_init+0xaa/0xaa
     kernel_init+0xa/0x106
     ret_from_fork+0x35/0x40

The secondary CPUs are not activated with the nosmt mitigations and only
the primary thread on each CPU core is used. In this situation,
xen_hvm_smp_prepare_cpus(), and more importantly xen_init_lock_cpu(), is
not called, so the lock_kicker_irq is not initialized for the secondary
CPUs. Let's fix this by exiting early in xen_uninit_lock_cpu() if the
irq is not set to avoid the warning from above for each secondary CPU.

Signed-off-by: Brian Masney <bmasney@redhat.com>
Link: https://lore.kernel.org/r/20201107011119.631442-1-bmasney@redhat.com
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 arch/x86/xen/spinlock.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 799f4eba0a62..043c73dfd2c9 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -93,10 +93,20 @@ void xen_init_lock_cpu(int cpu)
 
 void xen_uninit_lock_cpu(int cpu)
 {
+	int irq;
+
 	if (!xen_pvspin)
 		return;
 
-	unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL);
+	/*
+	 * When booting the kernel with 'mitigations=auto,nosmt', the secondary
+	 * CPUs are not activated, and lock_kicker_irq is not initialized.
+	 */
+	irq = per_cpu(lock_kicker_irq, cpu);
+	if (irq == -1)
+		return;
+
+	unbind_from_irqhandler(irq, NULL);
 	per_cpu(lock_kicker_irq, cpu) = -1;
 	kfree(per_cpu(irq_name, cpu));
 	per_cpu(irq_name, cpu) = NULL;
-- 
cgit v1.2.3-70-g09d2


From d19d2152ca055baf20339cfacbf039c2cfb8d936 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Thu, 5 Nov 2020 18:06:12 +0100
Subject: arm64: dts: imx8mm: fix voltage for 1.6GHz CPU operating point

The datasheet for both the industrial and consumer variant of the
SoC lists a typical voltage of 0.95V for the 1.6GHz CPU operating
point.

Fixes: e85c9d0faa75 (arm64: dts: imx8mm: Add cpufreq properties)
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Reviewed-by: Fabio Estevam <festevam@gmail.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm64/boot/dts/freescale/imx8mm.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
index b83f400def8b..05ee062548e4 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
@@ -129,7 +129,7 @@
 
 		opp-1600000000 {
 			opp-hz = /bits/ 64 <1600000000>;
-			opp-microvolt = <900000>;
+			opp-microvolt = <950000>;
 			opp-supported-hw = <0xc>, <0x7>;
 			clock-latency-ns = <150000>;
 			opp-suspend;
-- 
cgit v1.2.3-70-g09d2


From 33d0d843872c5ddbe28457a92fc6f2487315fb9f Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Thu, 5 Nov 2020 18:13:20 -0300
Subject: ARM: dts: imx50-evk: Fix the chip select 1 IOMUX

The SPI chip selects are represented as:

cs-gpios = <&gpio4 11 GPIO_ACTIVE_LOW>, <&gpio4 13 GPIO_ACTIVE_LOW>;

, which means that they are used in GPIO function instead of native
SPI mode.

Fix the IOMUX for the chip select 1 to use GPIO4_13 instead of
the native CSPI_SSI function.

Fixes: c605cbf5e135 ("ARM: dts: imx: add device tree support for Freescale imx50evk board")
Signed-off-by: Fabio Estevam <festevam@gmail.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx50-evk.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/imx50-evk.dts b/arch/arm/boot/dts/imx50-evk.dts
index 878e89c20190..4ea5c23f181b 100644
--- a/arch/arm/boot/dts/imx50-evk.dts
+++ b/arch/arm/boot/dts/imx50-evk.dts
@@ -59,7 +59,7 @@
 				MX50_PAD_CSPI_MISO__CSPI_MISO		0x00
 				MX50_PAD_CSPI_MOSI__CSPI_MOSI		0x00
 				MX50_PAD_CSPI_SS0__GPIO4_11		0xc4
-				MX50_PAD_ECSPI1_MOSI__CSPI_SS1		0xf4
+				MX50_PAD_ECSPI1_MOSI__GPIO4_13		0x84
 			>;
 		};
 
-- 
cgit v1.2.3-70-g09d2


From c2fe61d8be491ff8188edaf22e838f819999146b Mon Sep 17 00:00:00 2001
From: Arvind Sankar <nivedita@alum.mit.edu>
Date: Tue, 10 Nov 2020 11:39:19 -0500
Subject: efi/x86: Free efi_pgd with free_pages()

Commit

  d9e9a6418065 ("x86/mm/pti: Allocate a separate user PGD")

changed the PGD allocation to allocate PGD_ALLOCATION_ORDER pages, so in
the error path it should be freed using free_pages() rather than
free_page().

Commit

    06ace26f4e6f ("x86/efi: Free efi_pgd with free_pages()")

fixed one instance of this, but missed another.

Move the freeing out-of-line to avoid code duplication and fix this bug.

Fixes: d9e9a6418065 ("x86/mm/pti: Allocate a separate user PGD")
Link: https://lore.kernel.org/r/20201110163919.1134431-1-nivedita@alum.mit.edu
Signed-off-by: Arvind Sankar <nivedita@alum.mit.edu>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/x86/platform/efi/efi_64.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 8f5759df7776..e1e8d4e3a213 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -78,28 +78,30 @@ int __init efi_alloc_page_tables(void)
 	gfp_mask = GFP_KERNEL | __GFP_ZERO;
 	efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
 	if (!efi_pgd)
-		return -ENOMEM;
+		goto fail;
 
 	pgd = efi_pgd + pgd_index(EFI_VA_END);
 	p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END);
-	if (!p4d) {
-		free_page((unsigned long)efi_pgd);
-		return -ENOMEM;
-	}
+	if (!p4d)
+		goto free_pgd;
 
 	pud = pud_alloc(&init_mm, p4d, EFI_VA_END);
-	if (!pud) {
-		if (pgtable_l5_enabled())
-			free_page((unsigned long) pgd_page_vaddr(*pgd));
-		free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER);
-		return -ENOMEM;
-	}
+	if (!pud)
+		goto free_p4d;
 
 	efi_mm.pgd = efi_pgd;
 	mm_init_cpumask(&efi_mm);
 	init_new_context(NULL, &efi_mm);
 
 	return 0;
+
+free_p4d:
+	if (pgtable_l5_enabled())
+		free_page((unsigned long)pgd_page_vaddr(*pgd));
+free_pgd:
+	free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER);
+fail:
+	return -ENOMEM;
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 99fba3205cd499255a36fd87f1d6064adc622a5b Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 1 Oct 2020 22:20:23 +0300
Subject: ARM: dts: am437x-l4: fix compatible for cpsw switch dt node

Fix compatible the new CPSW switchdev DT node to avoid probing of legacy
CPSW driver which fails:
[    2.781009] cpsw 4a100000.switch: invalid resource

Fixes: 7bf8f37aea82 ("ARM: dts: am437x-l4: add dt node for new cpsw switchdev driver")
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am437x-l4.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/am437x-l4.dtsi b/arch/arm/boot/dts/am437x-l4.dtsi
index c220dc3c4e0f..243e35f7a56c 100644
--- a/arch/arm/boot/dts/am437x-l4.dtsi
+++ b/arch/arm/boot/dts/am437x-l4.dtsi
@@ -521,7 +521,7 @@
 			ranges = <0x0 0x100000 0x8000>;
 
 			mac_sw: switch@0 {
-				compatible = "ti,am4372-cpsw","ti,cpsw-switch";
+				compatible = "ti,am4372-cpsw-switch", "ti,cpsw-switch";
 				reg = <0x0 0x4000>;
 				ranges = <0 0 0x4000>;
 				clocks = <&cpsw_125mhz_gclk>, <&dpll_clksel_mac_clk>;
-- 
cgit v1.2.3-70-g09d2


From 1ed576a20cd5c93295f57d6b7400357bd8d01b21 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Tue, 20 Oct 2020 06:12:07 -0400
Subject: KVM: s390: pv: Mark mm as protected after the set secure parameters
 and improve cleanup

We can only have protected guest pages after a successful set secure
parameters call as only then the UV allows imports and unpacks.

By moving the test we can now also check for it in s390_reset_acc()
and do an early return if it is 0.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Fixes: 29b40f105ec8 ("KVM: s390: protvirt: Add initial vm and cpu lifecycle handling")
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 2 +-
 arch/s390/kvm/pv.c       | 3 ++-
 arch/s390/mm/gmap.c      | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6b74b92c1a58..08ea6c4735cd 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2312,7 +2312,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
 		struct kvm_s390_pv_unp unp = {};
 
 		r = -EINVAL;
-		if (!kvm_s390_pv_is_protected(kvm))
+		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
 			break;
 
 		r = -EFAULT;
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index eb99e2f95ebe..f5847f9dec7c 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -208,7 +208,6 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
 		return -EIO;
 	}
 	kvm->arch.gmap->guest_handle = uvcb.guest_handle;
-	atomic_set(&kvm->mm->context.is_protected, 1);
 	return 0;
 }
 
@@ -228,6 +227,8 @@ int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
 	*rrc = uvcb.header.rrc;
 	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
 		     *rc, *rrc);
+	if (!cc)
+		atomic_set(&kvm->mm->context.is_protected, 1);
 	return cc ? -EINVAL : 0;
 }
 
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index cfb0017f33a7..64795d034926 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2690,6 +2690,8 @@ static const struct mm_walk_ops reset_acc_walk_ops = {
 #include <linux/sched/mm.h>
 void s390_reset_acc(struct mm_struct *mm)
 {
+	if (!mm_is_protected(mm))
+		return;
 	/*
 	 * we might be called during
 	 * reset:                             we walk the pages and clear
-- 
cgit v1.2.3-70-g09d2


From 6cbf1e960fa52e4c63a6dfa4cda8736375b34ccc Mon Sep 17 00:00:00 2001
From: Collin Walling <walling@linux.ibm.com>
Date: Wed, 4 Nov 2020 13:10:32 -0500
Subject: KVM: s390: remove diag318 reset code

The diag318 data must be set to 0 by VM-wide reset events
triggered by diag308. As such, KVM should not handle
resetting this data via the VCPU ioctls.

Fixes: 23a60f834406 ("s390/kvm: diagnose 0x318 sync and reset")
Signed-off-by: Collin Walling <walling@linux.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Link: https://lore.kernel.org/r/20201104181032.109800-1-walling@linux.ibm.com
---
 arch/s390/kvm/kvm-s390.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 08ea6c4735cd..425d3d75320b 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -3564,7 +3564,6 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
 		vcpu->arch.sie_block->pp = 0;
 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
 		vcpu->arch.sie_block->todpr = 0;
-		vcpu->arch.sie_block->cpnc = 0;
 	}
 }
 
@@ -3582,7 +3581,6 @@ static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
 
 	regs->etoken = 0;
 	regs->etoken_extension = 0;
-	regs->diag318 = 0;
 }
 
 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-- 
cgit v1.2.3-70-g09d2


From 966e7ea434484a006700c144bca629a14f93530c Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Tue, 10 Nov 2020 14:46:55 +0100
Subject: s390: update defconfigs

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/configs/debug_defconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index a4d3c578fbd8..fe6f529ac82c 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -1,3 +1,4 @@
+CONFIG_UAPI_HEADER_TEST=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_WATCH_QUEUE=y
-- 
cgit v1.2.3-70-g09d2


From 78d732e1f326f74f240d416af9484928303d9951 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Wed, 11 Nov 2020 16:26:25 +0100
Subject: s390/cpum_sf.c: fix file permission for cpum_sfb_size

This file is installed by the s390 CPU Measurement sampling
facility device driver to export supported minimum and
maximum sample buffer sizes.
This file is read by lscpumf tool to display the details
of the device driver capabilities. The lscpumf tool might
be invoked by a non-root user. In this case it does not
print anything because the file contents can not be read.

Fix this by allowing read access for all users. Reading
the file contents is ok, changing the file contents is
left to the root user only.

For further reference and details see:
 [1] https://github.com/ibm-s390-tools/s390-tools/issues/97

Fixes: 69f239ed335a ("s390/cpum_sf: Dynamically extend the sampling buffer if overflows occur")
Cc: <stable@vger.kernel.org> # 3.14
Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Acked-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 4f9e4626df55..f100c9209743 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -2228,4 +2228,4 @@ out:
 }
 
 arch_initcall(init_cpum_sampling_pmu);
-core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640);
+core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644);
-- 
cgit v1.2.3-70-g09d2


From 53bf2776e31376f0b6a1fd7c9e1abc61241825a2 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Thu, 5 Nov 2020 22:47:02 +0100
Subject: ARM: dts: exynos: revert "add input clock to CMU in Exynos4412
 Odroid"

This reverts commit eaf2d2f6895d676dda6c95a652b58594f2887720.

The commit eaf2d2f6895d ("ARM: dts: exynos: add input clock to CMU in
Exynos4412 Odroid") breaks probing of usb3503 USB hub on Odroid U3.

It changes the order of clock drivers probe: the clkout (Exynos PMU)
driver is probed before the main clk-exynos4 driver.  The clkout driver
on Exynos4412 depends on clk-exynos4 but it does not support deferred
probe, therefore this dependency and changed probe order causes probe
failure.

The usb3503 USB hub on Odroid U3 on the other hand requires clkout
clock.  This can be seen in logs:

    [    5.007442] usb3503 0-0008: unable to request refclk (-517)

Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Link: https://lore.kernel.org/r/20200921174818.15525-1-krzk@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/boot/dts/exynos4412-odroid-common.dtsi | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
index ab291cec650a..2983e91bc7dd 100644
--- a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
+++ b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
@@ -122,7 +122,6 @@
 };
 
 &clock {
-	clocks = <&clock CLK_XUSBXTI>;
 	assigned-clocks = <&clock CLK_FOUT_EPLL>;
 	assigned-clock-rates = <45158401>;
 };
-- 
cgit v1.2.3-70-g09d2


From 575cba20c421ecb6b563ae352e4e0468e4ca8b3c Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 14 Nov 2020 21:47:43 +1000
Subject: powerpc/64s: Fix KVM system reset handling when CONFIG_PPC_PSERIES=y

pseries guest kernels have a FWNMI handler for SRESET and MCE NMIs,
which is basically the same as the regular handlers for those
interrupts.

The system reset FWNMI handler did not have a KVM guest test in it,
although it probably should have because the guest can itself run
guests.

Commit 4f50541f6703b ("powerpc/64s/exception: Move all interrupt
handlers to new style code gen macros") convert the handler faithfully
to avoid a KVM test with a "clever" trick to modify the IKVM_REAL
setting to 0 when the fwnmi handler is to be generated (PPC_PSERIES=y).
This worked when the KVM test was generated in the interrupt entry
handlers, but a later patch moved the KVM test to the common handler,
and the common handler macro is expanded below the fwnmi entry. This
prevents the KVM test from being generated even for the 0x100 entry
point as well.

The result is NMI IPIs in the host kernel when a guest is running will
use gest registers. This goes particularly badly when an HPT guest is
running and the MMU is set to guest mode.

Remove this trickery and just generate the test always.

Fixes: 9600f261acaa ("powerpc/64s/exception: Move KVM test to common code")
Cc: stable@vger.kernel.org # v5.7+
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201114114743.3306283-1-npiggin@gmail.com
---
 arch/powerpc/kernel/exceptions-64s.S | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index f7d748b88705..07d64883c0b5 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1000,8 +1000,6 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake)
  * Vectors for the FWNMI option.  Share common code.
  */
 TRAMP_REAL_BEGIN(system_reset_fwnmi)
-	/* XXX: fwnmi guest could run a nested/PR guest, so why no test?  */
-	__IKVM_REAL(system_reset)=0
 	GEN_INT_ENTRY system_reset, virt=0
 
 #endif /* CONFIG_PPC_PSERIES */
-- 
cgit v1.2.3-70-g09d2


From 7bab16a6075b7b94999666355ab532c3dabb94f9 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@nuviainc.com>
Date: Fri, 13 Nov 2020 15:04:06 +0000
Subject: KVM: arm64: Correctly align nVHE percpu data

The nVHE percpu data is partially linked but the nVHE linker script did
not align the percpu section.  The PERCPU_INPUT macro would then align
the data to a page boundary:

  #define PERCPU_INPUT(cacheline)					\
  	__per_cpu_start = .;						\
  	*(.data..percpu..first)						\
  	. = ALIGN(PAGE_SIZE);						\
  	*(.data..percpu..page_aligned)					\
  	. = ALIGN(cacheline);						\
  	*(.data..percpu..read_mostly)					\
  	. = ALIGN(cacheline);						\
  	*(.data..percpu)						\
  	*(.data..percpu..shared_aligned)				\
  	PERCPU_DECRYPTED_SECTION					\
  	__per_cpu_end = .;

but then when the final vmlinux linking happens the hypervisor percpu
data is included after page alignment and so the offsets potentially
don't match.  On my build I saw that the .hyp.data..percpu section was
at address 0x20 and then the percpu data would begin at 0x1000 (because
of the page alignment in PERCPU_INPUT), but when linked into vmlinux,
everything would be shifted down by 0x20 bytes.

This manifests as one of the CPUs getting lost when running
kvm-unit-tests or starting any VM and subsequent soft lockup on a Cortex
A72 device.

Fixes: 30c953911c43 ("kvm: arm64: Set up hyp percpu data for nVHE")
Signed-off-by: Jamie Iles <jamie@nuviainc.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Acked-by: David Brazdil <dbrazdil@google.com>
Cc: David Brazdil <dbrazdil@google.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201113150406.14314-1-jamie@nuviainc.com
---
 arch/arm64/kvm/hyp/nvhe/hyp.lds.S | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
index bb2d986ff696..a797abace13f 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S
@@ -13,6 +13,11 @@
 
 SECTIONS {
 	HYP_SECTION(.text)
+	/*
+	 * .hyp..data..percpu needs to be page aligned to maintain the same
+	 * alignment for when linking into vmlinux.
+	 */
+	. = ALIGN(PAGE_SIZE);
 	HYP_SECTION_NAME(.data..percpu) : {
 		PERCPU_INPUT(L1_CACHE_BYTES)
 	}
-- 
cgit v1.2.3-70-g09d2


From 481535c5b41d191b22775a6873de5ec0e1cdced1 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Mon, 16 Nov 2020 01:25:56 -0800
Subject: xtensa: fix TLBTEMP area placement

fast_second_level_miss handler for the TLBTEMP area has an assumption
that page table directory entry for the TLBTEMP address range is 0. For
it to be true the TLBTEMP area must be aligned to 4MB boundary and not
share its 4MB region with anything that may use a page table. This is
not true currently: TLBTEMP shares space with vmalloc space which
results in the following kinds of runtime errors when
fast_second_level_miss loads page table directory entry for the vmalloc
space instead of fixing up the TLBTEMP area:

 Unable to handle kernel paging request at virtual address c7ff0e00
  pc = d0009275, ra = 90009478
 Oops: sig: 9 [#1] PREEMPT
 CPU: 1 PID: 61 Comm: kworker/u9:2 Not tainted 5.10.0-rc3-next-20201110-00007-g1fe4962fa983-dirty #58
 Workqueue: xprtiod xs_stream_data_receive_workfn
 a00: 90009478 d11e1dc0 c7ff0e00 00000020 c7ff0000 00000001 7f8b8107 00000000
 a08: 900c5992 d11e1d90 d0cc88b8 5506e97c 00000000 5506e97c d06c8074 d11e1d90
 pc: d0009275, ps: 00060310, depc: 00000014, excvaddr: c7ff0e00
 lbeg: d0009275, lend: d0009287 lcount: 00000003, sar: 00000010
 Call Trace:
   xs_stream_data_receive_workfn+0x43c/0x770
   process_one_work+0x1a1/0x324
   worker_thread+0x1cc/0x3c0
   kthread+0x10d/0x124
   ret_from_kernel_thread+0xc/0x18

Cc: stable@vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 Documentation/xtensa/mmu.rst      | 9 ++++++---
 arch/xtensa/include/asm/pgtable.h | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/Documentation/xtensa/mmu.rst b/Documentation/xtensa/mmu.rst
index e52a12960fdc..450573afa31a 100644
--- a/Documentation/xtensa/mmu.rst
+++ b/Documentation/xtensa/mmu.rst
@@ -82,7 +82,8 @@ Default MMUv2-compatible layout::
   +------------------+
   | VMALLOC area     |  VMALLOC_START            0xc0000000  128MB - 64KB
   +------------------+  VMALLOC_END
-  | Cache aliasing   |  TLBTEMP_BASE_1           0xc7ff0000  DCACHE_WAY_SIZE
+  +------------------+
+  | Cache aliasing   |  TLBTEMP_BASE_1           0xc8000000  DCACHE_WAY_SIZE
   | remap area 1     |
   +------------------+
   | Cache aliasing   |  TLBTEMP_BASE_2                       DCACHE_WAY_SIZE
@@ -124,7 +125,8 @@ Default MMUv2-compatible layout::
   +------------------+
   | VMALLOC area     |  VMALLOC_START            0xa0000000  128MB - 64KB
   +------------------+  VMALLOC_END
-  | Cache aliasing   |  TLBTEMP_BASE_1           0xa7ff0000  DCACHE_WAY_SIZE
+  +------------------+
+  | Cache aliasing   |  TLBTEMP_BASE_1           0xa8000000  DCACHE_WAY_SIZE
   | remap area 1     |
   +------------------+
   | Cache aliasing   |  TLBTEMP_BASE_2                       DCACHE_WAY_SIZE
@@ -167,7 +169,8 @@ Default MMUv2-compatible layout::
   +------------------+
   | VMALLOC area     |  VMALLOC_START            0x90000000  128MB - 64KB
   +------------------+  VMALLOC_END
-  | Cache aliasing   |  TLBTEMP_BASE_1           0x97ff0000  DCACHE_WAY_SIZE
+  +------------------+
+  | Cache aliasing   |  TLBTEMP_BASE_1           0x98000000  DCACHE_WAY_SIZE
   | remap area 1     |
   +------------------+
   | Cache aliasing   |  TLBTEMP_BASE_2                       DCACHE_WAY_SIZE
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index fa054a1772e1..4dc04e6c01d7 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -69,7 +69,7 @@
  */
 #define VMALLOC_START		(XCHAL_KSEG_CACHED_VADDR - 0x10000000)
 #define VMALLOC_END		(VMALLOC_START + 0x07FEFFFF)
-#define TLBTEMP_BASE_1		(VMALLOC_END + 1)
+#define TLBTEMP_BASE_1		(VMALLOC_START + 0x08000000)
 #define TLBTEMP_BASE_2		(TLBTEMP_BASE_1 + DCACHE_WAY_SIZE)
 #if 2 * DCACHE_WAY_SIZE > ICACHE_WAY_SIZE
 #define TLBTEMP_SIZE		(2 * DCACHE_WAY_SIZE)
-- 
cgit v1.2.3-70-g09d2


From 3a860d165eb5f4d7cf0bf81ef6a5b5c5e1754422 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Mon, 16 Nov 2020 01:38:59 -0800
Subject: xtensa: disable preemption around cache alias management calls

Although cache alias management calls set up and tear down TLB entries
and fast_second_level_miss is able to restore TLB entry should it be
evicted they absolutely cannot preempt each other because they use the
same TLBTEMP area for different purposes.
Disable preemption around all cache alias management calls to enforce
that.

Cc: stable@vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/mm/cache.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'arch')

diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c
index 5835406b3cec..085b8c77b9d9 100644
--- a/arch/xtensa/mm/cache.c
+++ b/arch/xtensa/mm/cache.c
@@ -70,8 +70,10 @@ static inline void kmap_invalidate_coherent(struct page *page,
 			kvaddr = TLBTEMP_BASE_1 +
 				(page_to_phys(page) & DCACHE_ALIAS_MASK);
 
+			preempt_disable();
 			__invalidate_dcache_page_alias(kvaddr,
 						       page_to_phys(page));
+			preempt_enable();
 		}
 	}
 }
@@ -156,6 +158,7 @@ void flush_dcache_page(struct page *page)
 		if (!alias && !mapping)
 			return;
 
+		preempt_disable();
 		virt = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK);
 		__flush_invalidate_dcache_page_alias(virt, phys);
 
@@ -166,6 +169,7 @@ void flush_dcache_page(struct page *page)
 
 		if (mapping)
 			__invalidate_icache_page_alias(virt, phys);
+		preempt_enable();
 	}
 
 	/* There shouldn't be an entry in the cache for this page anymore. */
@@ -199,8 +203,10 @@ void local_flush_cache_page(struct vm_area_struct *vma, unsigned long address,
 	unsigned long phys = page_to_phys(pfn_to_page(pfn));
 	unsigned long virt = TLBTEMP_BASE_1 + (address & DCACHE_ALIAS_MASK);
 
+	preempt_disable();
 	__flush_invalidate_dcache_page_alias(virt, phys);
 	__invalidate_icache_page_alias(virt, phys);
+	preempt_enable();
 }
 EXPORT_SYMBOL(local_flush_cache_page);
 
@@ -227,11 +233,13 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep)
 		unsigned long phys = page_to_phys(page);
 		unsigned long tmp;
 
+		preempt_disable();
 		tmp = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK);
 		__flush_invalidate_dcache_page_alias(tmp, phys);
 		tmp = TLBTEMP_BASE_1 + (addr & DCACHE_ALIAS_MASK);
 		__flush_invalidate_dcache_page_alias(tmp, phys);
 		__invalidate_icache_page_alias(tmp, phys);
+		preempt_enable();
 
 		clear_bit(PG_arch_1, &page->flags);
 	}
@@ -265,7 +273,9 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 
 	if (alias) {
 		unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK);
+		preempt_disable();
 		__flush_invalidate_dcache_page_alias(t, phys);
+		preempt_enable();
 	}
 
 	/* Copy data */
@@ -280,9 +290,11 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 	if (alias) {
 		unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK);
 
+		preempt_disable();
 		__flush_invalidate_dcache_range((unsigned long) dst, len);
 		if ((vma->vm_flags & VM_EXEC) != 0)
 			__invalidate_icache_page_alias(t, phys);
+		preempt_enable();
 
 	} else if ((vma->vm_flags & VM_EXEC) != 0) {
 		__flush_dcache_range((unsigned long)dst,len);
@@ -304,7 +316,9 @@ extern void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
 
 	if (alias) {
 		unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK);
+		preempt_disable();
 		__flush_invalidate_dcache_page_alias(t, phys);
+		preempt_enable();
 	}
 
 	memcpy(dst, src, len);
-- 
cgit v1.2.3-70-g09d2


From 75b49620267c700f0a07fec7f27f69852db70e46 Mon Sep 17 00:00:00 2001
From: Cédric Le Goater <clg@kaod.org>
Date: Thu, 5 Nov 2020 14:47:13 +0100
Subject: KVM: PPC: Book3S HV: XIVE: Fix possible oops when accessing ESB page
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When accessing the ESB page of a source interrupt, the fault handler
will retrieve the page address from the XIVE interrupt 'xive_irq_data'
structure. If the associated KVM XIVE interrupt is not valid, that is
not allocated at the HW level for some reason, the fault handler will
dereference a NULL pointer leading to the oops below :

  WARNING: CPU: 40 PID: 59101 at arch/powerpc/kvm/book3s_xive_native.c:259 xive_native_esb_fault+0xe4/0x240 [kvm]
  CPU: 40 PID: 59101 Comm: qemu-system-ppc Kdump: loaded Tainted: G        W        --------- -  - 4.18.0-240.el8.ppc64le #1
  NIP:  c00800000e949fac LR: c00000000044b164 CTR: c00800000e949ec8
  REGS: c000001f69617840 TRAP: 0700   Tainted: G        W        --------- -  -  (4.18.0-240.el8.ppc64le)
  MSR:  9000000000029033 <SF,HV,EE,ME,IR,DR,RI,LE>  CR: 44044282  XER: 00000000
  CFAR: c00000000044b160 IRQMASK: 0
  GPR00: c00000000044b164 c000001f69617ac0 c00800000e96e000 c000001f69617c10
  GPR04: 05faa2b21e000080 0000000000000000 0000000000000005 ffffffffffffffff
  GPR08: 0000000000000000 0000000000000001 0000000000000000 0000000000000001
  GPR12: c00800000e949ec8 c000001ffffd3400 0000000000000000 0000000000000000
  GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
  GPR20: 0000000000000000 0000000000000000 c000001f5c065160 c000000001c76f90
  GPR24: c000001f06f20000 c000001f5c065100 0000000000000008 c000001f0eb98c78
  GPR28: c000001dcab40000 c000001dcab403d8 c000001f69617c10 0000000000000011
  NIP [c00800000e949fac] xive_native_esb_fault+0xe4/0x240 [kvm]
  LR [c00000000044b164] __do_fault+0x64/0x220
  Call Trace:
  [c000001f69617ac0] [0000000137a5dc20] 0x137a5dc20 (unreliable)
  [c000001f69617b50] [c00000000044b164] __do_fault+0x64/0x220
  [c000001f69617b90] [c000000000453838] do_fault+0x218/0x930
  [c000001f69617bf0] [c000000000456f50] __handle_mm_fault+0x350/0xdf0
  [c000001f69617cd0] [c000000000457b1c] handle_mm_fault+0x12c/0x310
  [c000001f69617d10] [c00000000007ef44] __do_page_fault+0x264/0xbb0
  [c000001f69617df0] [c00000000007f8c8] do_page_fault+0x38/0xd0
  [c000001f69617e30] [c00000000000a714] handle_page_fault+0x18/0x38
  Instruction dump:
  40c2fff0 7c2004ac 2fa90000 409e0118 73e90001 41820080 e8bd0008 7c2004ac
  7ca90074 39400000 915c0000 7929d182 <0b090000> 2fa50000 419e0080 e89e0018
  ---[ end trace 66c6ff034c53f64f ]---
  xive-kvm: xive_native_esb_fault: accessing invalid ESB page for source 8 !

Fix that by checking the validity of the KVM XIVE interrupt structure.

Fixes: 6520ca64cde7 ("KVM: PPC: Book3S HV: XIVE: Add a mapping for the source ESB pages")
Cc: stable@vger.kernel.org # v5.2+
Reported-by: Greg Kurz <groug@kaod.org>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Tested-by: Greg Kurz <groug@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201105134713.656160-1-clg@kaod.org
---
 arch/powerpc/kvm/book3s_xive_native.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index d0c2db0e07fa..a59a94f02733 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -251,6 +251,13 @@ static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)
 	}
 
 	state = &sb->irq_state[src];
+
+	/* Some sanity checking */
+	if (!state->valid) {
+		pr_devel("%s: source %lx invalid !\n", __func__, irq);
+		return VM_FAULT_SIGBUS;
+	}
+
 	kvmppc_xive_select_irq(state, &hw_num, &xd);
 
 	arch_spin_lock(&sb->lock);
-- 
cgit v1.2.3-70-g09d2


From cef397038167ac15d085914493d6c86385773709 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Nov 2020 17:52:58 +0100
Subject: arch: pgtable: define MAX_POSSIBLE_PHYSMEM_BITS where needed

Stefan Agner reported a bug when using zsram on 32-bit Arm machines
with RAM above the 4GB address boundary:

  Unable to handle kernel NULL pointer dereference at virtual address 00000000
  pgd = a27bd01c
  [00000000] *pgd=236a0003, *pmd=1ffa64003
  Internal error: Oops: 207 [#1] SMP ARM
  Modules linked in: mdio_bcm_unimac(+) brcmfmac cfg80211 brcmutil raspberrypi_hwmon hci_uart crc32_arm_ce bcm2711_thermal phy_generic genet
  CPU: 0 PID: 123 Comm: mkfs.ext4 Not tainted 5.9.6 #1
  Hardware name: BCM2711
  PC is at zs_map_object+0x94/0x338
  LR is at zram_bvec_rw.constprop.0+0x330/0xa64
  pc : [<c0602b38>]    lr : [<c0bda6a0>]    psr: 60000013
  sp : e376bbe0  ip : 00000000  fp : c1e2921c
  r10: 00000002  r9 : c1dda730  r8 : 00000000
  r7 : e8ff7a00  r6 : 00000000  r5 : 02f9ffa0  r4 : e3710000
  r3 : 000fdffe  r2 : c1e0ce80  r1 : ebf979a0  r0 : 00000000
  Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment user
  Control: 30c5383d  Table: 235c2a80  DAC: fffffffd
  Process mkfs.ext4 (pid: 123, stack limit = 0x495a22e6)
  Stack: (0xe376bbe0 to 0xe376c000)

As it turns out, zsram needs to know the maximum memory size, which
is defined in MAX_PHYSMEM_BITS when CONFIG_SPARSEMEM is set, or in
MAX_POSSIBLE_PHYSMEM_BITS on the x86 architecture.

The same problem will be hit on all 32-bit architectures that have a
physical address space larger than 4GB and happen to not enable sparsemem
and include asm/sparsemem.h from asm/pgtable.h.

After the initial discussion, I suggested just always defining
MAX_POSSIBLE_PHYSMEM_BITS whenever CONFIG_PHYS_ADDR_T_64BIT is
set, or provoking a build error otherwise. This addresses all
configurations that can currently have this runtime bug, but
leaves all other configurations unchanged.

I looked up the possible number of bits in source code and
datasheets, here is what I found:

 - on ARC, CONFIG_ARC_HAS_PAE40 controls whether 32 or 40 bits are used
 - on ARM, CONFIG_LPAE enables 40 bit addressing, without it we never
   support more than 32 bits, even though supersections in theory allow
   up to 40 bits as well.
 - on MIPS, some MIPS32r1 or later chips support 36 bits, and MIPS32r5
   XPA supports up to 60 bits in theory, but 40 bits are more than
   anyone will ever ship
 - On PowerPC, there are three different implementations of 36 bit
   addressing, but 32-bit is used without CONFIG_PTE_64BIT
 - On RISC-V, the normal page table format can support 34 bit
   addressing. There is no highmem support on RISC-V, so anything
   above 2GB is unused, but it might be useful to eventually support
   CONFIG_ZRAM for high pages.

Fixes: 61989a80fb3a ("staging: zsmalloc: zsmalloc memory allocation library")
Fixes: 02390b87a945 ("mm/zsmalloc: Prepare to variable MAX_PHYSMEM_BITS")
Acked-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Reviewed-by: Stefan Agner <stefan@agner.ch>
Tested-by: Stefan Agner <stefan@agner.ch>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/linux-mm/bdfa44bf1c570b05d6c70898e2bbb0acf234ecdf.1604762181.git.stefan@agner.ch/
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arc/include/asm/pgtable.h               |  2 ++
 arch/arm/include/asm/pgtable-2level.h        |  2 ++
 arch/arm/include/asm/pgtable-3level.h        |  2 ++
 arch/mips/include/asm/pgtable-32.h           |  3 +++
 arch/powerpc/include/asm/book3s/32/pgtable.h |  2 ++
 arch/powerpc/include/asm/nohash/32/pgtable.h |  2 ++
 arch/riscv/include/asm/pgtable-32.h          |  2 ++
 include/linux/pgtable.h                      | 13 +++++++++++++
 8 files changed, 28 insertions(+)

(limited to 'arch')

diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index f1ed17edb085..163641726a2b 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -134,8 +134,10 @@
 
 #ifdef CONFIG_ARC_HAS_PAE40
 #define PTE_BITS_NON_RWX_IN_PD1	(0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE)
+#define MAX_POSSIBLE_PHYSMEM_BITS 40
 #else
 #define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE)
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
 #endif
 
 /**************************************************************************
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index 3502c2f746ca..baf7d0204eb5 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -75,6 +75,8 @@
 #define PTE_HWTABLE_OFF		(PTE_HWTABLE_PTRS * sizeof(pte_t))
 #define PTE_HWTABLE_SIZE	(PTRS_PER_PTE * sizeof(u32))
 
+#define MAX_POSSIBLE_PHYSMEM_BITS	32
+
 /*
  * PMD_SHIFT determines the size of the area a second-level page table can map
  * PGDIR_SHIFT determines what a third-level page table entry can map
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index fbb6693c3352..2b85d175e999 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -25,6 +25,8 @@
 #define PTE_HWTABLE_OFF		(0)
 #define PTE_HWTABLE_SIZE	(PTRS_PER_PTE * sizeof(u64))
 
+#define MAX_POSSIBLE_PHYSMEM_BITS 40
+
 /*
  * PGDIR_SHIFT determines the size a top-level page table entry can map.
  */
diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h
index a950fc1ddb4d..6c0532d7b211 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -154,6 +154,7 @@ static inline void pmd_clear(pmd_t *pmdp)
 
 #if defined(CONFIG_XPA)
 
+#define MAX_POSSIBLE_PHYSMEM_BITS 40
 #define pte_pfn(x)		(((unsigned long)((x).pte_high >> _PFN_SHIFT)) | (unsigned long)((x).pte_low << _PAGE_PRESENT_SHIFT))
 static inline pte_t
 pfn_pte(unsigned long pfn, pgprot_t prot)
@@ -169,6 +170,7 @@ pfn_pte(unsigned long pfn, pgprot_t prot)
 
 #elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
 
+#define MAX_POSSIBLE_PHYSMEM_BITS 36
 #define pte_pfn(x)		((unsigned long)((x).pte_high >> 6))
 
 static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
@@ -183,6 +185,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
 
 #else
 
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
 #ifdef CONFIG_CPU_VR41XX
 #define pte_pfn(x)		((unsigned long)((x).pte >> (PAGE_SHIFT + 2)))
 #define pfn_pte(pfn, prot)	__pte(((pfn) << (PAGE_SHIFT + 2)) | pgprot_val(prot))
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 36443cda8dcf..1376be95e975 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -36,8 +36,10 @@ static inline bool pte_user(pte_t pte)
  */
 #ifdef CONFIG_PTE_64BIT
 #define PTE_RPN_MASK	(~((1ULL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 36
 #else
 #define PTE_RPN_MASK	(~((1UL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
 #endif
 
 /*
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index ee2243ba96cf..96522f7f0618 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -153,8 +153,10 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
  */
 #if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
 #define PTE_RPN_MASK	(~((1ULL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 36
 #else
 #define PTE_RPN_MASK	(~((1UL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
 #endif
 
 /*
diff --git a/arch/riscv/include/asm/pgtable-32.h b/arch/riscv/include/asm/pgtable-32.h
index b0ab66e5fdb1..5b2e79e5bfa5 100644
--- a/arch/riscv/include/asm/pgtable-32.h
+++ b/arch/riscv/include/asm/pgtable-32.h
@@ -14,4 +14,6 @@
 #define PGDIR_SIZE      (_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK      (~(PGDIR_SIZE - 1))
 
+#define MAX_POSSIBLE_PHYSMEM_BITS 34
+
 #endif /* _ASM_RISCV_PGTABLE_32_H */
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 71125a4676c4..e237004d498d 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1427,6 +1427,19 @@ typedef unsigned int pgtbl_mod_mask;
 
 #endif /* !__ASSEMBLY__ */
 
+#if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT)
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+/*
+ * ZSMALLOC needs to know the highest PFN on 32-bit architectures
+ * with physical address space extension, but falls back to
+ * BITS_PER_LONG otherwise.
+ */
+#error Missing MAX_POSSIBLE_PHYSMEM_BITS definition
+#else
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
+#endif
+#endif
+
 #ifndef has_transparent_hugepage
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define has_transparent_hugepage() 1
-- 
cgit v1.2.3-70-g09d2


From 854c57f02bc718b0653bc467073b4541b8155a36 Mon Sep 17 00:00:00 2001
From: Ashish Kalra <ashish.kalra@amd.com>
Date: Tue, 10 Nov 2020 22:42:05 +0000
Subject: KVM: SVM: Fix offset computation bug in __sev_dbg_decrypt().

Fix offset computation in __sev_dbg_decrypt() to include the
source paddr before it is rounded down to be aligned to 16 bytes
as required by SEV API. This fixes incorrect guest memory dumps
observed when using qemu monitor.

Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Message-Id: <20201110224205.29444-1-Ashish.Kalra@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/sev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index c0b14106258a..566f4d18185b 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -642,8 +642,8 @@ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
 	 * Its safe to read more than we are asked, caller should ensure that
 	 * destination has enough space.
 	 */
-	src_paddr = round_down(src_paddr, 16);
 	offset = src_paddr & 15;
+	src_paddr = round_down(src_paddr, 16);
 	sz = round_up(sz + offset, 16);
 
 	return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
-- 
cgit v1.2.3-70-g09d2


From 054409ab253d9f31bec5760105144166b4b71e22 Mon Sep 17 00:00:00 2001
From: Chen Zhou <chenzhou10@huawei.com>
Date: Tue, 17 Nov 2020 10:54:26 +0800
Subject: KVM: SVM: fix error return code in svm_create_vcpu()

Fix to return a negative error code from the error handling case
instead of 0 in function svm_create_vcpu(), as done elsewhere in this
function.

Fixes: f4c847a95654 ("KVM: SVM: refactor msr permission bitmap allocation")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
Message-Id: <20201117025426.167824-1-chenzhou10@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 1e81cfebd491..79b3a564f1c9 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1309,8 +1309,10 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 		svm->avic_is_running = true;
 
 	svm->msrpm = svm_vcpu_alloc_msrpm();
-	if (!svm->msrpm)
+	if (!svm->msrpm) {
+		err = -ENOMEM;
 		goto error_free_vmcb_page;
+	}
 
 	svm_vcpu_init_msrpm(vcpu, svm->msrpm);
 
-- 
cgit v1.2.3-70-g09d2


From 61a2f1aecf6052f7bcf900829ca2b9d74437ec07 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Mon, 16 Nov 2020 18:45:15 +0100
Subject: MIPS: kernel: Fix for_each_memblock conversion

The loop over all memblocks works with PFNs and not physical
addresses, so we need for_each_mem_pfn_range().

Fixes: b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()")
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
---
 arch/mips/kernel/setup.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 0d4253208bde..ca579deef939 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -262,8 +262,8 @@ static void __init bootmem_init(void)
 static void __init bootmem_init(void)
 {
 	phys_addr_t ramstart, ramend;
-	phys_addr_t start, end;
-	u64 i;
+	unsigned long start, end;
+	int i;
 
 	ramstart = memblock_start_of_DRAM();
 	ramend = memblock_end_of_DRAM();
@@ -300,7 +300,7 @@ static void __init bootmem_init(void)
 
 	min_low_pfn = ARCH_PFN_OFFSET;
 	max_pfn = PFN_DOWN(ramend);
-	for_each_mem_range(i, &start, &end) {
+	for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
 		/*
 		 * Skip highmem here so we get an accurate max_low_pfn if low
 		 * memory stops short of high memory.
-- 
cgit v1.2.3-70-g09d2


From 1a371e67dc77125736cc56d3a0893f06b75855b6 Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Fri, 13 Nov 2020 09:59:23 +0800
Subject: x86/microcode/intel: Check patch signature before saving microcode
 for early loading

Currently, scan_microcode() leverages microcode_matches() to check
if the microcode matches the CPU by comparing the family and model.
However, the processor stepping and flags of the microcode signature
should also be considered when saving a microcode patch for early
update.

Use find_matching_signature() in scan_microcode() and get rid of the
now-unused microcode_matches() which is a good cleanup in itself.

Complete the verification of the patch being saved for early loading in
save_microcode_patch() directly. This needs to be done there too because
save_mc_for_early() will call save_microcode_patch() too.

The second reason why this needs to be done is because the loader still
tries to support, at least hypothetically, mixed-steppings systems and
thus adds all patches to the cache that belong to the same CPU model
albeit with different steppings.

For example:

  microcode: CPU: sig=0x906ec, pf=0x2, rev=0xd6
  microcode: mc_saved[0]: sig=0x906e9, pf=0x2a, rev=0xd6, total size=0x19400, date = 2020-04-23
  microcode: mc_saved[1]: sig=0x906ea, pf=0x22, rev=0xd6, total size=0x19000, date = 2020-04-27
  microcode: mc_saved[2]: sig=0x906eb, pf=0x2, rev=0xd6, total size=0x19400, date = 2020-04-23
  microcode: mc_saved[3]: sig=0x906ec, pf=0x22, rev=0xd6, total size=0x19000, date = 2020-04-27
  microcode: mc_saved[4]: sig=0x906ed, pf=0x22, rev=0xd6, total size=0x19400, date = 2020-04-23

The patch which is being saved for early loading, however, can only be
the one which fits the CPU this runs on so do the signature verification
before saving.

 [ bp: Do signature verification in save_microcode_patch()
       and rewrite commit message. ]

Fixes: ec400ddeff20 ("x86/microcode_intel_early.c: Early update ucode on Intel's CPU")
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org
Link: https://bugzilla.kernel.org/show_bug.cgi?id=208535
Link: https://lkml.kernel.org/r/20201113015923.13960-1-yu.c.chen@intel.com
---
 arch/x86/kernel/cpu/microcode/intel.c | 63 ++++++-----------------------------
 1 file changed, 10 insertions(+), 53 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 6a99535d7f37..7e8e07bddd5f 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -100,53 +100,6 @@ static int has_newer_microcode(void *mc, unsigned int csig, int cpf, int new_rev
 	return find_matching_signature(mc, csig, cpf);
 }
 
-/*
- * Given CPU signature and a microcode patch, this function finds if the
- * microcode patch has matching family and model with the CPU.
- *
- * %true - if there's a match
- * %false - otherwise
- */
-static bool microcode_matches(struct microcode_header_intel *mc_header,
-			      unsigned long sig)
-{
-	unsigned long total_size = get_totalsize(mc_header);
-	unsigned long data_size = get_datasize(mc_header);
-	struct extended_sigtable *ext_header;
-	unsigned int fam_ucode, model_ucode;
-	struct extended_signature *ext_sig;
-	unsigned int fam, model;
-	int ext_sigcount, i;
-
-	fam   = x86_family(sig);
-	model = x86_model(sig);
-
-	fam_ucode   = x86_family(mc_header->sig);
-	model_ucode = x86_model(mc_header->sig);
-
-	if (fam == fam_ucode && model == model_ucode)
-		return true;
-
-	/* Look for ext. headers: */
-	if (total_size <= data_size + MC_HEADER_SIZE)
-		return false;
-
-	ext_header   = (void *) mc_header + data_size + MC_HEADER_SIZE;
-	ext_sig      = (void *)ext_header + EXT_HEADER_SIZE;
-	ext_sigcount = ext_header->count;
-
-	for (i = 0; i < ext_sigcount; i++) {
-		fam_ucode   = x86_family(ext_sig->sig);
-		model_ucode = x86_model(ext_sig->sig);
-
-		if (fam == fam_ucode && model == model_ucode)
-			return true;
-
-		ext_sig++;
-	}
-	return false;
-}
-
 static struct ucode_patch *memdup_patch(void *data, unsigned int size)
 {
 	struct ucode_patch *p;
@@ -164,7 +117,7 @@ static struct ucode_patch *memdup_patch(void *data, unsigned int size)
 	return p;
 }
 
-static void save_microcode_patch(void *data, unsigned int size)
+static void save_microcode_patch(struct ucode_cpu_info *uci, void *data, unsigned int size)
 {
 	struct microcode_header_intel *mc_hdr, *mc_saved_hdr;
 	struct ucode_patch *iter, *tmp, *p = NULL;
@@ -210,6 +163,9 @@ static void save_microcode_patch(void *data, unsigned int size)
 	if (!p)
 		return;
 
+	if (!find_matching_signature(p->data, uci->cpu_sig.sig, uci->cpu_sig.pf))
+		return;
+
 	/*
 	 * Save for early loading. On 32-bit, that needs to be a physical
 	 * address as the APs are running from physical addresses, before
@@ -344,13 +300,14 @@ scan_microcode(void *data, size_t size, struct ucode_cpu_info *uci, bool save)
 
 		size -= mc_size;
 
-		if (!microcode_matches(mc_header, uci->cpu_sig.sig)) {
+		if (!find_matching_signature(data, uci->cpu_sig.sig,
+					     uci->cpu_sig.pf)) {
 			data += mc_size;
 			continue;
 		}
 
 		if (save) {
-			save_microcode_patch(data, mc_size);
+			save_microcode_patch(uci, data, mc_size);
 			goto next;
 		}
 
@@ -483,14 +440,14 @@ static void show_saved_mc(void)
  * Save this microcode patch. It will be loaded early when a CPU is
  * hot-added or resumes.
  */
-static void save_mc_for_early(u8 *mc, unsigned int size)
+static void save_mc_for_early(struct ucode_cpu_info *uci, u8 *mc, unsigned int size)
 {
 	/* Synchronization during CPU hotplug. */
 	static DEFINE_MUTEX(x86_cpu_microcode_mutex);
 
 	mutex_lock(&x86_cpu_microcode_mutex);
 
-	save_microcode_patch(mc, size);
+	save_microcode_patch(uci, mc, size);
 	show_saved_mc();
 
 	mutex_unlock(&x86_cpu_microcode_mutex);
@@ -935,7 +892,7 @@ static enum ucode_state generic_load_microcode(int cpu, struct iov_iter *iter)
 	 * permanent memory. So it will be loaded early when a CPU is hot added
 	 * or resumes.
 	 */
-	save_mc_for_early(new_mc, new_mc_size);
+	save_mc_for_early(uci, new_mc, new_mc_size);
 
 	pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
 		 cpu, new_rev, uci->cpu_sig.rev);
-- 
cgit v1.2.3-70-g09d2


From e02152ba2810f7c88cb54e71cda096268dfa9241 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 16 Nov 2020 23:09:13 +1100
Subject: powerpc: Drop -me200 addition to build flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently a build with CONFIG_E200=y will fail with:

  Error: invalid switch -me200
  Error: unrecognized option -me200

Upstream binutils has never supported an -me200 option. Presumably it
was supported at some point by either a fork or Freescale internal
binutils.

We can't support code that we can't even build test, so drop the
addition of -me200 to the build flags, so we can at least build with
CONFIG_E200=y.

Reported-by: Németh Márton <nm127@freemail.hu>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Scott Wood <oss@buserror.net>
Link: https://lore.kernel.org/r/20201116120913.165317-1-mpe@ellerman.id.au
---
 arch/powerpc/Makefile | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index a4d56f0a41d9..16b8336f91dd 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -248,7 +248,6 @@ KBUILD_CFLAGS		+= $(call cc-option,-mno-string)
 cpu-as-$(CONFIG_40x)		+= -Wa,-m405
 cpu-as-$(CONFIG_44x)		+= -Wa,-m440
 cpu-as-$(CONFIG_ALTIVEC)	+= $(call as-option,-Wa$(comma)-maltivec)
-cpu-as-$(CONFIG_E200)		+= -Wa,-me200
 cpu-as-$(CONFIG_E500)		+= -Wa,-me500
 
 # When using '-many -mpower4' gas will first try and find a matching power4
-- 
cgit v1.2.3-70-g09d2


From ac3b57adf87ad9bac7e33ca26bbbb13fae1ed62b Mon Sep 17 00:00:00 2001
From: Zhang Qilong <zhangqilong3@huawei.com>
Date: Fri, 13 Nov 2020 21:18:56 +0800
Subject: MIPS: Alchemy: Fix memleak in alchemy_clk_setup_cpu

If the clk_register fails, we should free h before
function returns to prevent memleak.

Fixes: 474402291a0ad ("MIPS: Alchemy: clock framework integration of onchip clocks")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/alchemy/common/clock.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c
index a95a894aceaf..f0c830337104 100644
--- a/arch/mips/alchemy/common/clock.c
+++ b/arch/mips/alchemy/common/clock.c
@@ -152,6 +152,7 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name,
 {
 	struct clk_init_data id;
 	struct clk_hw *h;
+	struct clk *clk;
 
 	h = kzalloc(sizeof(*h), GFP_KERNEL);
 	if (!h)
@@ -164,7 +165,13 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name,
 	id.ops = &alchemy_clkops_cpu;
 	h->init = &id;
 
-	return clk_register(NULL, h);
+	clk = clk_register(NULL, h);
+	if (IS_ERR(clk)) {
+		pr_err("failed to register clock\n");
+		kfree(h);
+	}
+
+	return clk;
 }
 
 /* AUXPLLs ************************************************************/
-- 
cgit v1.2.3-70-g09d2


From ebd19fc372e3e78bf165f230e7c084e304441c08 Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 13 Nov 2020 10:31:26 -0800
Subject: perf/x86: fix sysfs type mismatches

This change switches rapl to use PMU_FORMAT_ATTR, and fixes two other
macros to use device_attribute instead of kobj_attribute to avoid
callback type mismatches that trip indirect call checking with Clang's
Control-Flow Integrity (CFI).

Reported-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lkml.kernel.org/r/20201113183126.1239404-1-samitolvanen@google.com
---
 arch/x86/events/intel/cstate.c |  6 +++---
 arch/x86/events/intel/uncore.c |  4 ++--
 arch/x86/events/intel/uncore.h | 12 ++++++------
 arch/x86/events/rapl.c         | 14 +-------------
 4 files changed, 12 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 442e1ed4acd4..4eb7ee5fed72 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -107,14 +107,14 @@
 MODULE_LICENSE("GPL");
 
 #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format)		\
-static ssize_t __cstate_##_var##_show(struct kobject *kobj,	\
-				struct kobj_attribute *attr,	\
+static ssize_t __cstate_##_var##_show(struct device *dev,	\
+				struct device_attribute *attr,	\
 				char *page)			\
 {								\
 	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);		\
 	return sprintf(page, _format "\n");			\
 }								\
-static struct kobj_attribute format_attr_##_var =		\
+static struct device_attribute format_attr_##_var =		\
 	__ATTR(_name, 0444, __cstate_##_var##_show, NULL)
 
 static ssize_t cstate_get_attr_cpumask(struct device *dev,
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 86d012b3e0b4..80d52cbe2fde 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -94,8 +94,8 @@ end:
 	return map;
 }
 
-ssize_t uncore_event_show(struct kobject *kobj,
-			  struct kobj_attribute *attr, char *buf)
+ssize_t uncore_event_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
 {
 	struct uncore_event_desc *event =
 		container_of(attr, struct uncore_event_desc, attr);
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 83d2a7d490e0..9efea154349d 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -157,7 +157,7 @@ struct intel_uncore_box {
 #define UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS	2
 
 struct uncore_event_desc {
-	struct kobj_attribute attr;
+	struct device_attribute attr;
 	const char *config;
 };
 
@@ -179,8 +179,8 @@ struct pci2phy_map {
 struct pci2phy_map *__find_pci2phy_map(int segment);
 int uncore_pcibus_to_physid(struct pci_bus *bus);
 
-ssize_t uncore_event_show(struct kobject *kobj,
-			  struct kobj_attribute *attr, char *buf);
+ssize_t uncore_event_show(struct device *dev,
+			  struct device_attribute *attr, char *buf);
 
 static inline struct intel_uncore_pmu *dev_to_uncore_pmu(struct device *dev)
 {
@@ -201,14 +201,14 @@ extern int __uncore_max_dies;
 }
 
 #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format)			\
-static ssize_t __uncore_##_var##_show(struct kobject *kobj,		\
-				struct kobj_attribute *attr,		\
+static ssize_t __uncore_##_var##_show(struct device *dev,		\
+				struct device_attribute *attr,		\
 				char *page)				\
 {									\
 	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\
 	return sprintf(page, _format "\n");				\
 }									\
-static struct kobj_attribute format_attr_##_var =			\
+static struct device_attribute format_attr_##_var =			\
 	__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
 
 static inline bool uncore_pmc_fixed(int idx)
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 7c0120e2e957..7dbbeaacd995 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -93,18 +93,6 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
  * any other bit is reserved
  */
 #define RAPL_EVENT_MASK	0xFFULL
-
-#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format)		\
-static ssize_t __rapl_##_var##_show(struct kobject *kobj,	\
-				struct kobj_attribute *attr,	\
-				char *page)			\
-{								\
-	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);		\
-	return sprintf(page, _format "\n");			\
-}								\
-static struct kobj_attribute format_attr_##_var =		\
-	__ATTR(_name, 0444, __rapl_##_var##_show, NULL)
-
 #define RAPL_CNTR_WIDTH 32
 
 #define RAPL_EVENT_ATTR_STR(_name, v, str)					\
@@ -441,7 +429,7 @@ static struct attribute_group rapl_pmu_events_group = {
 	.attrs = attrs_empty,
 };
 
-DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7");
+PMU_FORMAT_ATTR(event, "config:0-7");
 static struct attribute *rapl_formats_attr[] = {
 	&format_attr_event.attr,
 	NULL,
-- 
cgit v1.2.3-70-g09d2


From dc293f2106903ab9c24e9cea18c276e32c394c33 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 1 Sep 2020 00:09:37 +0300
Subject: xtensa: uaccess: Add missing __user to strncpy_from_user() prototype

When adding __user annotations in commit 2adf5352a34a, the
strncpy_from_user() function declaration for the
CONFIG_GENERIC_STRNCPY_FROM_USER case was missed. Fix it.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Message-Id: <20200831210937.17938-1-laurent.pinchart@ideasonboard.com>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h
index b9758119feca..5c9fb8005aa8 100644
--- a/arch/xtensa/include/asm/uaccess.h
+++ b/arch/xtensa/include/asm/uaccess.h
@@ -302,7 +302,7 @@ strncpy_from_user(char *dst, const char __user *src, long count)
 	return -EFAULT;
 }
 #else
-long strncpy_from_user(char *dst, const char *src, long count);
+long strncpy_from_user(char *dst, const char __user *src, long count);
 #endif
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 2013a4b684b6eb614ee5c9a3c07b0ae6f5ca96d9 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Fri, 16 Oct 2020 17:08:32 +0800
Subject: arm64: dts: broadcom: clear the warnings caused by empty dma-ranges

The scripts/dtc/checks.c requires that the node have empty "dma-ranges"
property must have the same "#address-cells" and "#size-cells" values as
the parent node. Otherwise, the following warnings is reported:

arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi:7.3-14: Warning \
(dma_ranges_format): /usb:dma-ranges: empty "dma-ranges" property but \
its #address-cells (1) differs from / (2)
arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi:7.3-14: Warning \
(dma_ranges_format): /usb:dma-ranges: empty "dma-ranges" property but \
its #size-cells (1) differs from / (2)

Arnd Bergmann figured out why it's necessary:
Also note that the #address-cells=<1> means that any device under
this bus is assumed to only support 32-bit addressing, and DMA will
have to go through a slow swiotlb in the absence of an IOMMU.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Link: https://lore.kernel.org/r/20201016090833.1892-2-thunder.leizhen@huawei.com'
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 .../boot/dts/broadcom/stingray/stingray-usb.dtsi     | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi
index 55259f973b5a..aef8f2b00778 100644
--- a/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi
+++ b/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi
@@ -5,20 +5,20 @@
 	usb {
 		compatible = "simple-bus";
 		dma-ranges;
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges = <0x0 0x0 0x68500000 0x00400000>;
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges = <0x0 0x0 0x0 0x68500000 0x0 0x00400000>;
 
 		usbphy0: usb-phy@0 {
 			compatible = "brcm,sr-usb-combo-phy";
-			reg = <0x00000000 0x100>;
+			reg = <0x0 0x00000000 0x0 0x100>;
 			#phy-cells = <1>;
 			status = "disabled";
 		};
 
 		xhci0: usb@1000 {
 			compatible = "generic-xhci";
-			reg = <0x00001000 0x1000>;
+			reg = <0x0 0x00001000 0x0 0x1000>;
 			interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>;
 			phys = <&usbphy0 1>, <&usbphy0 0>;
 			phy-names = "phy0", "phy1";
@@ -28,7 +28,7 @@
 
 		bdc0: usb@2000 {
 			compatible = "brcm,bdc-v0.16";
-			reg = <0x00002000 0x1000>;
+			reg = <0x0 0x00002000 0x0 0x1000>;
 			interrupts = <GIC_SPI 259 IRQ_TYPE_LEVEL_HIGH>;
 			phys = <&usbphy0 0>, <&usbphy0 1>;
 			phy-names = "phy0", "phy1";
@@ -38,21 +38,21 @@
 
 		usbphy1: usb-phy@10000 {
 			compatible = "brcm,sr-usb-combo-phy";
-			reg = <0x00010000 0x100>;
+			reg = <0x0 0x00010000 0x0 0x100>;
 			#phy-cells = <1>;
 			status = "disabled";
 		};
 
 		usbphy2: usb-phy@20000 {
 			compatible = "brcm,sr-usb-hs-phy";
-			reg = <0x00020000 0x100>;
+			reg = <0x0 0x00020000 0x0 0x100>;
 			#phy-cells = <0>;
 			status = "disabled";
 		};
 
 		xhci1: usb@11000 {
 			compatible = "generic-xhci";
-			reg = <0x00011000 0x1000>;
+			reg = <0x0 0x00011000 0x0 0x1000>;
 			interrupts = <GIC_SPI 263 IRQ_TYPE_LEVEL_HIGH>;
 			phys = <&usbphy1 1>, <&usbphy2>, <&usbphy1 0>;
 			phy-names = "phy0", "phy1", "phy2";
@@ -62,7 +62,7 @@
 
 		bdc1: usb@21000 {
 			compatible = "brcm,bdc-v0.16";
-			reg = <0x00021000 0x1000>;
+			reg = <0x0 0x00021000 0x0 0x1000>;
 			interrupts = <GIC_SPI 266 IRQ_TYPE_LEVEL_HIGH>;
 			phys = <&usbphy2>;
 			phy-names = "phy0";
-- 
cgit v1.2.3-70-g09d2


From e3389b0a14952aac7f2998bb98f633afb21eaa92 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Fri, 16 Oct 2020 17:08:33 +0800
Subject: arm64: dts: qcom: clear the warnings caused by empty dma-ranges

The scripts/dtc/checks.c requires that the node have empty "dma-ranges"
property must have the same "#address-cells" and "#size-cells" values as
the parent node. Otherwise, the following warnings is reported:

arch/arm64/boot/dts/qcom/ipq6018.dtsi:185.3-14: Warning \
(dma_ranges_format): /soc:dma-ranges: empty "dma-ranges" property but \
its #address-cells (1) differs from / (2)
arch/arm64/boot/dts/qcom/ipq6018.dtsi:185.3-14: Warning \
(dma_ranges_format): /soc:dma-ranges: empty "dma-ranges" property but \
its #size-cells (1) differs from / (2)

Arnd Bergmann figured out why it's necessary:
Also note that the #address-cells=<1> means that any device under
this bus is assumed to only support 32-bit addressing, and DMA will
have to go through a slow swiotlb in the absence of an IOMMU.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20201016090833.1892-3-thunder.leizhen@huawei.com'
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm64/boot/dts/qcom/ipq6018.dtsi | 72 +++++++++++++++++------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi
index a94dac76bf3f..59e0cbfa2214 100644
--- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi
+++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi
@@ -179,22 +179,22 @@
 	};
 
 	soc: soc {
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges = <0 0 0 0xffffffff>;
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges = <0 0 0 0 0x0 0xffffffff>;
 		dma-ranges;
 		compatible = "simple-bus";
 
 		prng: qrng@e1000 {
 			compatible = "qcom,prng-ee";
-			reg = <0xe3000 0x1000>;
+			reg = <0x0 0xe3000 0x0 0x1000>;
 			clocks = <&gcc GCC_PRNG_AHB_CLK>;
 			clock-names = "core";
 		};
 
 		cryptobam: dma@704000 {
 			compatible = "qcom,bam-v1.7.0";
-			reg = <0x00704000 0x20000>;
+			reg = <0x0 0x00704000 0x0 0x20000>;
 			interrupts = <GIC_SPI 207 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&gcc GCC_CRYPTO_AHB_CLK>;
 			clock-names = "bam_clk";
@@ -206,7 +206,7 @@
 
 		crypto: crypto@73a000 {
 			compatible = "qcom,crypto-v5.1";
-			reg = <0x0073a000 0x6000>;
+			reg = <0x0 0x0073a000 0x0 0x6000>;
 			clocks = <&gcc GCC_CRYPTO_AHB_CLK>,
 				<&gcc GCC_CRYPTO_AXI_CLK>,
 				<&gcc GCC_CRYPTO_CLK>;
@@ -217,7 +217,7 @@
 
 		tlmm: pinctrl@1000000 {
 			compatible = "qcom,ipq6018-pinctrl";
-			reg = <0x01000000 0x300000>;
+			reg = <0x0 0x01000000 0x0 0x300000>;
 			interrupts = <GIC_SPI 208 IRQ_TYPE_LEVEL_HIGH>;
 			gpio-controller;
 			#gpio-cells = <2>;
@@ -235,7 +235,7 @@
 
 		gcc: gcc@1800000 {
 			compatible = "qcom,gcc-ipq6018";
-			reg = <0x01800000 0x80000>;
+			reg = <0x0 0x01800000 0x0 0x80000>;
 			clocks = <&xo>, <&sleep_clk>;
 			clock-names = "xo", "sleep_clk";
 			#clock-cells = <1>;
@@ -244,17 +244,17 @@
 
 		tcsr_mutex_regs: syscon@1905000 {
 			compatible = "syscon";
-			reg = <0x01905000 0x8000>;
+			reg = <0x0 0x01905000 0x0 0x8000>;
 		};
 
 		tcsr_q6: syscon@1945000 {
 			compatible = "syscon";
-			reg = <0x01945000 0xe000>;
+			reg = <0x0 0x01945000 0x0 0xe000>;
 		};
 
 		blsp_dma: dma@7884000 {
 			compatible = "qcom,bam-v1.7.0";
-			reg = <0x07884000 0x2b000>;
+			reg = <0x0 0x07884000 0x0 0x2b000>;
 			interrupts = <GIC_SPI 238 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&gcc GCC_BLSP1_AHB_CLK>;
 			clock-names = "bam_clk";
@@ -264,7 +264,7 @@
 
 		blsp1_uart3: serial@78b1000 {
 			compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm";
-			reg = <0x078b1000 0x200>;
+			reg = <0x0 0x078b1000 0x0 0x200>;
 			interrupts = <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&gcc GCC_BLSP1_UART3_APPS_CLK>,
 				<&gcc GCC_BLSP1_AHB_CLK>;
@@ -276,7 +276,7 @@
 			compatible = "qcom,spi-qup-v2.2.1";
 			#address-cells = <1>;
 			#size-cells = <0>;
-			reg = <0x078b5000 0x600>;
+			reg = <0x0 0x078b5000 0x0 0x600>;
 			interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>;
 			spi-max-frequency = <50000000>;
 			clocks = <&gcc GCC_BLSP1_QUP1_SPI_APPS_CLK>,
@@ -291,7 +291,7 @@
 			compatible = "qcom,spi-qup-v2.2.1";
 			#address-cells = <1>;
 			#size-cells = <0>;
-			reg = <0x078b6000 0x600>;
+			reg = <0x0 0x078b6000 0x0 0x600>;
 			interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;
 			spi-max-frequency = <50000000>;
 			clocks = <&gcc GCC_BLSP1_QUP2_SPI_APPS_CLK>,
@@ -306,7 +306,7 @@
 			compatible = "qcom,i2c-qup-v2.2.1";
 			#address-cells = <1>;
 			#size-cells = <0>;
-			reg = <0x078b6000 0x600>;
+			reg = <0x0 0x078b6000 0x0 0x600>;
 			interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&gcc GCC_BLSP1_AHB_CLK>,
 				<&gcc GCC_BLSP1_QUP2_I2C_APPS_CLK>;
@@ -321,7 +321,7 @@
 			compatible = "qcom,i2c-qup-v2.2.1";
 			#address-cells = <1>;
 			#size-cells = <0>;
-			reg = <0x078b7000 0x600>;
+			reg = <0x0 0x078b7000 0x0 0x600>;
 			interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&gcc GCC_BLSP1_AHB_CLK>,
 				<&gcc GCC_BLSP1_QUP3_I2C_APPS_CLK>;
@@ -336,24 +336,24 @@
 			compatible = "qcom,msm-qgic2";
 			interrupt-controller;
 			#interrupt-cells = <0x3>;
-			reg =   <0x0b000000 0x1000>,  /*GICD*/
-				<0x0b002000 0x1000>,  /*GICC*/
-				<0x0b001000 0x1000>,  /*GICH*/
-				<0x0b004000 0x1000>;  /*GICV*/
+			reg =   <0x0 0x0b000000 0x0 0x1000>,  /*GICD*/
+				<0x0 0x0b002000 0x0 0x1000>,  /*GICC*/
+				<0x0 0x0b001000 0x0 0x1000>,  /*GICH*/
+				<0x0 0x0b004000 0x0 0x1000>;  /*GICV*/
 			interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;
 		};
 
 		watchdog@b017000 {
 			compatible = "qcom,kpss-wdt";
 			interrupts = <GIC_SPI 3 IRQ_TYPE_EDGE_RISING>;
-			reg = <0x0b017000 0x40>;
+			reg = <0x0 0x0b017000 0x0 0x40>;
 			clocks = <&sleep_clk>;
 			timeout-sec = <10>;
 		};
 
 		apcs_glb: mailbox@b111000 {
 			compatible = "qcom,ipq6018-apcs-apps-global";
-			reg = <0x0b111000 0x1000>;
+			reg = <0x0 0x0b111000 0x0 0x1000>;
 			#clock-cells = <1>;
 			clocks = <&a53pll>, <&xo>;
 			clock-names = "pll", "xo";
@@ -362,7 +362,7 @@
 
 		a53pll: clock@b116000 {
 			compatible = "qcom,ipq6018-a53pll";
-			reg = <0x0b116000 0x40>;
+			reg = <0x0 0x0b116000 0x0 0x40>;
 			#clock-cells = <0>;
 			clocks = <&xo>;
 			clock-names = "xo";
@@ -377,68 +377,68 @@
 		};
 
 		timer@b120000 {
-			#address-cells = <1>;
-			#size-cells = <1>;
+			#address-cells = <2>;
+			#size-cells = <2>;
 			ranges;
 			compatible = "arm,armv7-timer-mem";
-			reg = <0x0b120000 0x1000>;
+			reg = <0x0 0x0b120000 0x0 0x1000>;
 			clock-frequency = <19200000>;
 
 			frame@b120000 {
 				frame-number = <0>;
 				interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
 					     <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
-				reg = <0x0b121000 0x1000>,
-				      <0x0b122000 0x1000>;
+				reg = <0x0 0x0b121000 0x0 0x1000>,
+				      <0x0 0x0b122000 0x0 0x1000>;
 			};
 
 			frame@b123000 {
 				frame-number = <1>;
 				interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>;
-				reg = <0xb123000 0x1000>;
+				reg = <0x0 0xb123000 0x0 0x1000>;
 				status = "disabled";
 			};
 
 			frame@b124000 {
 				frame-number = <2>;
 				interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
-				reg = <0x0b124000 0x1000>;
+				reg = <0x0 0x0b124000 0x0 0x1000>;
 				status = "disabled";
 			};
 
 			frame@b125000 {
 				frame-number = <3>;
 				interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
-				reg = <0x0b125000 0x1000>;
+				reg = <0x0 0x0b125000 0x0 0x1000>;
 				status = "disabled";
 			};
 
 			frame@b126000 {
 				frame-number = <4>;
 				interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
-				reg = <0x0b126000 0x1000>;
+				reg = <0x0 0x0b126000 0x0 0x1000>;
 				status = "disabled";
 			};
 
 			frame@b127000 {
 				frame-number = <5>;
 				interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
-				reg = <0x0b127000 0x1000>;
+				reg = <0x0 0x0b127000 0x0 0x1000>;
 				status = "disabled";
 			};
 
 			frame@b128000 {
 				frame-number = <6>;
 				interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
-				reg = <0x0b128000 0x1000>;
+				reg = <0x0 0x0b128000 0x0 0x1000>;
 				status = "disabled";
 			};
 		};
 
 		q6v5_wcss: remoteproc@cd00000 {
 			compatible = "qcom,ipq8074-wcss-pil";
-			reg = <0x0cd00000 0x4040>,
-				<0x004ab000 0x20>;
+			reg = <0x0 0x0cd00000 0x0 0x4040>,
+			      <0x0 0x004ab000 0x0 0x20>;
 			reg-names = "qdsp6",
 				    "rmb";
 			interrupts-extended = <&intc GIC_SPI 325 IRQ_TYPE_EDGE_RISING>,
-- 
cgit v1.2.3-70-g09d2


From 23bde34771f1ea92fb5e6682c0d8c04304d34b3b Mon Sep 17 00:00:00 2001
From: Zenghui Yu <yuzenghui@huawei.com>
Date: Tue, 17 Nov 2020 23:16:29 +0800
Subject: KVM: arm64: vgic-v3: Drop the reporting of GICR_TYPER.Last for
 userspace

It was recently reported that if GICR_TYPER is accessed before the RD base
address is set, we'll suffer from the unset @rdreg dereferencing. Oops...

	gpa_t last_rdist_typer = rdreg->base + GICR_TYPER +
			(rdreg->free_index - 1) * KVM_VGIC_V3_REDIST_SIZE;

It's "expected" that users will access registers in the redistributor if
the RD has been properly configured (e.g., the RD base address is set). But
it hasn't yet been covered by the existing documentation.

Per discussion on the list [1], the reporting of the GICR_TYPER.Last bit
for userspace never actually worked. And it's difficult for us to emulate
it correctly given that userspace has the flexibility to access it any
time. Let's just drop the reporting of the Last bit for userspace for now
(userspace should have full knowledge about it anyway) and it at least
prevents kernel from panic ;-)

[1] https://lore.kernel.org/kvmarm/c20865a267e44d1e2c0d52ce4e012263@kernel.org/

Fixes: ba7b3f1275fd ("KVM: arm/arm64: Revisit Redistributor TYPER last bit computation")
Reported-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Zenghui Yu <yuzenghui@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Link: https://lore.kernel.org/r/20201117151629.1738-1-yuzenghui@huawei.com
Cc: stable@vger.kernel.org
---
 arch/arm64/kvm/vgic/vgic-mmio-v3.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index 52d6f24f65dc..15a6c98ee92f 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -273,6 +273,23 @@ static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
 	return extract_bytes(value, addr & 7, len);
 }
 
+static unsigned long vgic_uaccess_read_v3r_typer(struct kvm_vcpu *vcpu,
+						 gpa_t addr, unsigned int len)
+{
+	unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
+	int target_vcpu_id = vcpu->vcpu_id;
+	u64 value;
+
+	value = (u64)(mpidr & GENMASK(23, 0)) << 32;
+	value |= ((target_vcpu_id & 0xffff) << 8);
+
+	if (vgic_has_its(vcpu->kvm))
+		value |= GICR_TYPER_PLPIS;
+
+	/* reporting of the Last bit is not supported for userspace */
+	return extract_bytes(value, addr & 7, len);
+}
+
 static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu,
 					     gpa_t addr, unsigned int len)
 {
@@ -593,8 +610,9 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
 	REGISTER_DESC_WITH_LENGTH(GICR_IIDR,
 		vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4,
 		VGIC_ACCESS_32bit),
-	REGISTER_DESC_WITH_LENGTH(GICR_TYPER,
-		vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8,
+	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_TYPER,
+		vgic_mmio_read_v3r_typer, vgic_mmio_write_wi,
+		vgic_uaccess_read_v3r_typer, vgic_mmio_uaccess_write_wi, 8,
 		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
 	REGISTER_DESC_WITH_LENGTH(GICR_WAKER,
 		vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
-- 
cgit v1.2.3-70-g09d2


From cd81acc600a9684ea4b4d25a47900d38a3890eab Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 17 Nov 2020 23:56:17 +1000
Subject: powerpc/64s/exception: KVM Fix for host DSI being taken in HPT guest
 MMU context

Commit 2284ffea8f0c ("powerpc/64s/exception: Only test KVM in SRR
interrupts when PR KVM is supported") removed KVM guest tests from
interrupts that do not set HV=1, when PR-KVM is not configured.

This is wrong for HV-KVM HPT guest MMIO emulation case which attempts
to load the faulting instruction word with MSR[DR]=1 and MSR[HV]=1 with
the guest MMU context loaded. This can cause host DSI, DSLB interrupts
which must test for KVM guest. Restore this and add a comment.

Fixes: 2284ffea8f0c ("powerpc/64s/exception: Only test KVM in SRR interrupts when PR KVM is supported")
Cc: stable@vger.kernel.org # v5.7+
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201117135617.3521127-1-npiggin@gmail.com
---
 arch/powerpc/kernel/exceptions-64s.S | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 07d64883c0b5..8e6b2cc8db67 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1410,6 +1410,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
  *   If none is found, do a Linux page fault. Linux page faults can happen in
  *   kernel mode due to user copy operations of course.
  *
+ *   KVM: The KVM HDSI handler may perform a load with MSR[DR]=1 in guest
+ *   MMU context, which may cause a DSI in the host, which must go to the
+ *   KVM handler. MSR[IR] is not enabled, so the real-mode handler will
+ *   always be used regardless of AIL setting.
+ *
  * - Radix MMU
  *   The hardware loads from the Linux page table directly, so a fault goes
  *   immediately to Linux page fault.
@@ -1420,10 +1425,8 @@ INT_DEFINE_BEGIN(data_access)
 	IVEC=0x300
 	IDAR=1
 	IDSISR=1
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 	IKVM_SKIP=1
 	IKVM_REAL=1
-#endif
 INT_DEFINE_END(data_access)
 
 EXC_REAL_BEGIN(data_access, 0x300, 0x80)
@@ -1462,6 +1465,8 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
  *   ppc64_bolted_size (first segment). The kernel handler must avoid stomping
  *   on user-handler data structures.
  *
+ *   KVM: Same as 0x300, DSLB must test for KVM guest.
+ *
  * A dedicated save area EXSLB is used (XXX: but it actually need not be
  * these days, we could use EXGEN).
  */
@@ -1470,10 +1475,8 @@ INT_DEFINE_BEGIN(data_access_slb)
 	IAREA=PACA_EXSLB
 	IRECONCILE=0
 	IDAR=1
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 	IKVM_SKIP=1
 	IKVM_REAL=1
-#endif
 INT_DEFINE_END(data_access_slb)
 
 EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
-- 
cgit v1.2.3-70-g09d2


From 78aec9bb1f3c79e4570eb50260d6320063f823a2 Mon Sep 17 00:00:00 2001
From: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Date: Wed, 21 Oct 2020 23:12:20 +0200
Subject: ARC: bitops: Remove unecessary operation and value

The 1-bit shift rotation to the left on x variable located on
4   last if statement can be removed because the computed value is will
not be used afront.

Signed-off-by: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/bitops.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index c6606f4d20d6..fb98440c0bd4 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -243,10 +243,8 @@ static inline int constant_fls(unsigned int x)
 		x <<= 2;
 		r -= 2;
 	}
-	if (!(x & 0x80000000u)) {
-		x <<= 1;
+	if (!(x & 0x80000000u))
 		r -= 1;
-	}
 	return r;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 5f840df591a9554e4e1355ef1f8946bc2120ca9f Mon Sep 17 00:00:00 2001
From: Flavio Suligoi <f.suligoi@asem.it>
Date: Mon, 9 Nov 2020 14:21:30 +0100
Subject: ARC: mm: fix spelling mistakes

Signed-off-by: Flavio Suligoi <f.suligoi@asem.it>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/tlb.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index c340acd989a0..9bb3c24f3677 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -30,14 +30,14 @@
  *  -Changes related to MMU v2 (Rel 4.8)
  *
  * Vineetg: Aug 29th 2008
- *  -In TLB Flush operations (Metal Fix MMU) there is a explict command to
+ *  -In TLB Flush operations (Metal Fix MMU) there is a explicit command to
  *    flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd,
  *    it fails. Thus need to load it with ANY valid value before invoking
  *    TLBIVUTLB cmd
  *
  * Vineetg: Aug 21th 2008:
  *  -Reduced the duration of IRQ lockouts in TLB Flush routines
- *  -Multiple copies of TLB erase code seperated into a "single" function
+ *  -Multiple copies of TLB erase code separated into a "single" function
  *  -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID
  *       in interrupt-safe region.
  *
@@ -66,7 +66,7 @@
  *
  * Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has
  * much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways.
- * Given this, the thrasing problem should never happen because once the 3
+ * Given this, the thrashing problem should never happen because once the 3
  * J-TLB entries are created (even though 3rd will knock out one of the prev
  * two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy
  *
@@ -127,7 +127,7 @@ static void utlb_invalidate(void)
 	 * There was however an obscure hardware bug, where uTLB flush would
 	 * fail when a prior probe for J-TLB (both totally unrelated) would
 	 * return lkup err - because the entry didn't exist in MMU.
-	 * The Workround was to set Index reg with some valid value, prior to
+	 * The Workaround was to set Index reg with some valid value, prior to
 	 * flush. This was fixed in MMU v3
 	 */
 	unsigned int idx;
@@ -272,7 +272,7 @@ noinline void local_flush_tlb_all(void)
 }
 
 /*
- * Flush the entrie MM for userland. The fastest way is to move to Next ASID
+ * Flush the entire MM for userland. The fastest way is to move to Next ASID
  */
 noinline void local_flush_tlb_mm(struct mm_struct *mm)
 {
@@ -303,7 +303,7 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm)
  * Difference between this and Kernel Range Flush is
  *  -Here the fastest way (if range is too large) is to move to next ASID
  *      without doing any explicit Shootdown
- *  -In case of kernel Flush, entry has to be shot down explictly
+ *  -In case of kernel Flush, entry has to be shot down explicitly
  */
 void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			   unsigned long end)
@@ -620,7 +620,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
  * Super Page size is configurable in hardware (4K to 16M), but fixed once
  * RTL builds.
  *
- * The exact THP size a Linx configuration will support is a function of:
+ * The exact THP size a Linux configuration will support is a function of:
  *  - MMU page size (typical 8K, RTL fixed)
  *  - software page walker address split between PGD:PTE:PFN (typical
  *    11:8:13, but can be changed with 1 line)
@@ -698,7 +698,7 @@ void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
 
 #endif
 
-/* Read the Cache Build Confuration Registers, Decode them and save into
+/* Read the Cache Build Configuration Registers, Decode them and save into
  * the cpuinfo structure for later use.
  * No Validation is done here, simply read/convert the BCRs
  */
@@ -803,13 +803,13 @@ void arc_mmu_init(void)
 	pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str)));
 
 	/*
-	 * Can't be done in processor.h due to header include depenedencies
+	 * Can't be done in processor.h due to header include dependencies
 	 */
 	BUILD_BUG_ON(!IS_ALIGNED((CONFIG_ARC_KVADDR_SIZE << 20), PMD_SIZE));
 
 	/*
 	 * stack top size sanity check,
-	 * Can't be done in processor.h due to header include depenedencies
+	 * Can't be done in processor.h due to header include dependencies
 	 */
 	BUILD_BUG_ON(!IS_ALIGNED(STACK_TOP, PMD_SIZE));
 
@@ -881,7 +881,7 @@ void arc_mmu_init(void)
  *      the duplicate one.
  * -Knob to be verbose abt it.(TODO: hook them up to debugfs)
  */
-volatile int dup_pd_silent; /* Be slient abt it or complain (default) */
+volatile int dup_pd_silent; /* Be silent abt it or complain (default) */
 
 void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 			  struct pt_regs *regs)
@@ -948,7 +948,7 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 
 /***********************************************************************
  * Diagnostic Routines
- *  -Called from Low Level TLB Hanlders if things don;t look good
+ *  -Called from Low Level TLB Handlers if things don;t look good
  **********************************************************************/
 
 #ifdef CONFIG_ARC_DBG_TLB_PARANOIA
-- 
cgit v1.2.3-70-g09d2


From e42404fa10fd11fe72d0a0e149a321d10e577715 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 6 Nov 2020 16:59:27 -0800
Subject: ARC: stack unwinding: don't assume non-current task is sleeping

To start stack unwinding (SP, PC and BLINK) are needed. When the
explicit execution context (pt_regs etc) is not available, unwinder
assumes the task is sleeping (in __switch_to()) and fetches SP and BLINK
from kernel mode stack.

But this assumption is not true, specially in a SMP system, when top
runs on 1 core, there may be active running processes on all cores.

So when unwinding non courrent tasks, ensure they are NOT running.

And while at it, handle the self unwinding case explicitly.

This came out of investigation of a customer reported hang with
rcutorture+top

Link: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/31
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/stacktrace.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index b23986f98450..b2557f581ea8 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -38,15 +38,15 @@
 
 #ifdef CONFIG_ARC_DW2_UNWIND
 
-static void seed_unwind_frame_info(struct task_struct *tsk,
-				   struct pt_regs *regs,
-				   struct unwind_frame_info *frame_info)
+static int
+seed_unwind_frame_info(struct task_struct *tsk, struct pt_regs *regs,
+		       struct unwind_frame_info *frame_info)
 {
 	/*
 	 * synchronous unwinding (e.g. dump_stack)
 	 *  - uses current values of SP and friends
 	 */
-	if (tsk == NULL && regs == NULL) {
+	if (regs == NULL && (tsk == NULL || tsk == current)) {
 		unsigned long fp, sp, blink, ret;
 		frame_info->task = current;
 
@@ -65,11 +65,15 @@ static void seed_unwind_frame_info(struct task_struct *tsk,
 		frame_info->call_frame = 0;
 	} else if (regs == NULL) {
 		/*
-		 * Asynchronous unwinding of sleeping task
-		 *  - Gets SP etc from task's pt_regs (saved bottom of kernel
-		 *    mode stack of task)
+		 * Asynchronous unwinding of a likely sleeping task
+		 *  - first ensure it is actually sleeping
+		 *  - if so, it will be in __switch_to, kernel mode SP of task
+		 *    is safe-kept and BLINK at a well known location in there
 		 */
 
+		if (tsk->state == TASK_RUNNING)
+			return -1;
+
 		frame_info->task = tsk;
 
 		frame_info->regs.r27 = TSK_K_FP(tsk);
@@ -103,6 +107,8 @@ static void seed_unwind_frame_info(struct task_struct *tsk,
 		frame_info->regs.r63 = regs->ret;
 		frame_info->call_frame = 0;
 	}
+
+	return 0;
 }
 
 #endif
@@ -116,7 +122,8 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
 	unsigned int address;
 	struct unwind_frame_info frame_info;
 
-	seed_unwind_frame_info(tsk, regs, &frame_info);
+	if (seed_unwind_frame_info(tsk, regs, &frame_info))
+		return 0;
 
 	while (1) {
 		address = UNW_PC(&frame_info);
-- 
cgit v1.2.3-70-g09d2


From f737561c709667013d832316dd3198a7fe3d1260 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 6 Nov 2020 17:37:34 -0800
Subject: ARC: stack unwinding: reorganize how initial register state setup

This is a non-functional change, if anything a better fall-back
handling.

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/stacktrace.c | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index b2557f581ea8..f73da203b170 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -42,11 +42,23 @@ static int
 seed_unwind_frame_info(struct task_struct *tsk, struct pt_regs *regs,
 		       struct unwind_frame_info *frame_info)
 {
-	/*
-	 * synchronous unwinding (e.g. dump_stack)
-	 *  - uses current values of SP and friends
-	 */
-	if (regs == NULL && (tsk == NULL || tsk == current)) {
+	if (regs) {
+		/*
+		 * Asynchronous unwinding of intr/exception
+		 *  - Just uses the pt_regs passed
+		 */
+		frame_info->task = tsk;
+
+		frame_info->regs.r27 = regs->fp;
+		frame_info->regs.r28 = regs->sp;
+		frame_info->regs.r31 = regs->blink;
+		frame_info->regs.r63 = regs->ret;
+		frame_info->call_frame = 0;
+	} else if (tsk == NULL || tsk == current) {
+		/*
+		 * synchronous unwinding (e.g. dump_stack)
+		 *  - uses current values of SP and friends
+		 */
 		unsigned long fp, sp, blink, ret;
 		frame_info->task = current;
 
@@ -63,7 +75,7 @@ seed_unwind_frame_info(struct task_struct *tsk, struct pt_regs *regs,
 		frame_info->regs.r31 = blink;
 		frame_info->regs.r63 = ret;
 		frame_info->call_frame = 0;
-	} else if (regs == NULL) {
+	} else {
 		/*
 		 * Asynchronous unwinding of a likely sleeping task
 		 *  - first ensure it is actually sleeping
@@ -94,20 +106,7 @@ seed_unwind_frame_info(struct task_struct *tsk, struct pt_regs *regs,
 		frame_info->regs.r28 += 60;
 		frame_info->call_frame = 0;
 
-	} else {
-		/*
-		 * Asynchronous unwinding of intr/exception
-		 *  - Just uses the pt_regs passed
-		 */
-		frame_info->task = tsk;
-
-		frame_info->regs.r27 = regs->fp;
-		frame_info->regs.r28 = regs->sp;
-		frame_info->regs.r31 = regs->blink;
-		frame_info->regs.r63 = regs->ret;
-		frame_info->call_frame = 0;
 	}
-
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 860aaabac8235cfde10fe556aa82abbbe3117888 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 17 Nov 2020 21:23:34 +0100
Subject: x86/dumpstack: Do not try to access user space code of other tasks

sysrq-t ends up invoking show_opcodes() for each task which tries to access
the user space code of other processes, which is obviously bogus.

It either manages to dump where the foreign task's regs->ip points to in a
valid mapping of the current task or triggers a pagefault and prints "Code:
Bad RIP value.". Both is just wrong.

Add a safeguard in copy_code() and check whether the @regs pointer matches
currents pt_regs. If not, do not even try to access it.

While at it, add commentary why using copy_from_user_nmi() is safe in
copy_code() even if the function name suggests otherwise.

Reported-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Tested-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201117202753.667274723@linutronix.de
---
 arch/x86/kernel/dumpstack.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 25c06b67e7e0..97aa900386cb 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -78,6 +78,9 @@ static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src,
 	if (!user_mode(regs))
 		return copy_from_kernel_nofault(buf, (u8 *)src, nbytes);
 
+	/* The user space code from other tasks cannot be accessed. */
+	if (regs != task_pt_regs(current))
+		return -EPERM;
 	/*
 	 * Make sure userspace isn't trying to trick us into dumping kernel
 	 * memory by pointing the userspace instruction pointer at it.
@@ -85,6 +88,12 @@ static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src,
 	if (__chk_range_not_ok(src, nbytes, TASK_SIZE_MAX))
 		return -EINVAL;
 
+	/*
+	 * Even if named copy_from_user_nmi() this can be invoked from
+	 * other contexts and will not try to resolve a pagefault, which is
+	 * the correct thing to do here as this code can be called from any
+	 * context.
+	 */
 	return copy_from_user_nmi(buf, (void __user *)src, nbytes);
 }
 
@@ -115,13 +124,19 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl)
 	u8 opcodes[OPCODE_BUFSIZE];
 	unsigned long prologue = regs->ip - PROLOGUE_SIZE;
 
-	if (copy_code(regs, opcodes, prologue, sizeof(opcodes))) {
-		printk("%sCode: Unable to access opcode bytes at RIP 0x%lx.\n",
-		       loglvl, prologue);
-	} else {
+	switch (copy_code(regs, opcodes, prologue, sizeof(opcodes))) {
+	case 0:
 		printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %"
 		       __stringify(EPILOGUE_SIZE) "ph\n", loglvl, opcodes,
 		       opcodes[PROLOGUE_SIZE], opcodes + PROLOGUE_SIZE + 1);
+		break;
+	case -EPERM:
+		/* No access to the user space stack of other tasks. Ignore. */
+		break;
+	default:
+		printk("%sCode: Unable to access opcode bytes at RIP 0x%lx.\n",
+		       loglvl, prologue);
+		break;
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From 4c80d05714d347405865802b7098f1c97362cbef Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 17 Nov 2020 20:00:33 +0100
Subject: s390/uv: handle destroy page legacy interface

Older firmware can return rc=0x107 rrc=0xd for destroy page if the
page is already non-secure. This should be handled like a success
as already done by newer firmware.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Fixes: 1a80b54d1ce1 ("s390/uv: add destroy page call")
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
---
 arch/s390/kernel/uv.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 14bd9d58edc9..883bfed9f5c2 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -129,8 +129,15 @@ int uv_destroy_page(unsigned long paddr)
 		.paddr = paddr
 	};
 
-	if (uv_call(0, (u64)&uvcb))
+	if (uv_call(0, (u64)&uvcb)) {
+		/*
+		 * Older firmware uses 107/d as an indication of a non secure
+		 * page. Let us emulate the newer variant (no-op).
+		 */
+		if (uvcb.header.rc == 0x107 && uvcb.header.rrc == 0xd)
+			return 0;
 		return -EINVAL;
+	}
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 4d213e76a359e540ca786ee937da7f35faa8e5f8 Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@gmail.com>
Date: Tue, 10 Nov 2020 15:19:08 +0800
Subject: iommu/vt-d: Avoid panic if iommu init fails in tboot system

"intel_iommu=off" command line is used to disable iommu but iommu is force
enabled in a tboot system for security reason.

However for better performance on high speed network device, a new option
"intel_iommu=tboot_noforce" is introduced to disable the force on.

By default kernel should panic if iommu init fail in tboot for security
reason, but it's unnecessory if we use "intel_iommu=tboot_noforce,off".

Fix the code setting force_on and move intel_iommu_tboot_noforce
from tboot code to intel iommu code.

Fixes: 7304e8f28bb2 ("iommu/vt-d: Correctly disable Intel IOMMU force on")
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@gmail.com>
Tested-by: Lukasz Hawrylko <lukasz.hawrylko@linux.intel.com>
Acked-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20201110071908.3133-1-zhenzhong.duan@gmail.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/x86/kernel/tboot.c     | 3 ---
 drivers/iommu/intel/iommu.c | 5 +++--
 include/linux/intel-iommu.h | 1 -
 3 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 992fb1415c0f..420be871d9d4 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -514,9 +514,6 @@ int tboot_force_iommu(void)
 	if (!tboot_enabled())
 		return 0;
 
-	if (intel_iommu_tboot_noforce)
-		return 1;
-
 	if (no_iommu || swiotlb || dmar_disabled)
 		pr_warn("Forcing Intel-IOMMU to enabled\n");
 
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 1b1ca63e6bbe..4d9b298002f0 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -179,7 +179,7 @@ static int rwbf_quirk;
  * (used when kernel is launched w/ TXT)
  */
 static int force_on = 0;
-int intel_iommu_tboot_noforce;
+static int intel_iommu_tboot_noforce;
 static int no_platform_optin;
 
 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
@@ -4885,7 +4885,8 @@ int __init intel_iommu_init(void)
 	 * Intel IOMMU is required for a TXT/tboot launch or platform
 	 * opt in, so enforce that.
 	 */
-	force_on = tboot_force_iommu() || platform_optin_force_iommu();
+	force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
+		    platform_optin_force_iommu();
 
 	if (iommu_init_mempool()) {
 		if (force_on)
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index fbf5b3e7707e..d956987ed032 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -798,7 +798,6 @@ extern int iommu_calculate_agaw(struct intel_iommu *iommu);
 extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu);
 extern int dmar_disabled;
 extern int intel_iommu_enabled;
-extern int intel_iommu_tboot_noforce;
 extern int intel_iommu_gfx_mapped;
 #else
 static inline int iommu_calculate_agaw(struct intel_iommu *iommu)
-- 
cgit v1.2.3-70-g09d2


From 05d5de6ba7dbe490dd413b5ca11d0875bd2bc006 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 11 Nov 2020 15:12:11 +0100
Subject: ARM: dts: dra76x: m_can: fix order of clocks

According to the bosch,m_can.yaml bindings the first clock shall be the "hclk",
while the second clock "cclk".

This patch fixes the order accordingly.

Fixes: 0adbe832f21a ("ARM: dts: dra76x: Add MCAN node")
Cc: Faiz Abbas <faiz_abbas@ti.com>
Cc: Tony Lindgren <tony@atomide.com>
Cc: linux-omap@vger.kernel.org
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/dra76x.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/dra76x.dtsi b/arch/arm/boot/dts/dra76x.dtsi
index b69c7d40f5d8..2f326151116b 100644
--- a/arch/arm/boot/dts/dra76x.dtsi
+++ b/arch/arm/boot/dts/dra76x.dtsi
@@ -32,8 +32,8 @@
 				interrupts = <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>,
 					     <GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>;
 				interrupt-names = "int0", "int1";
-				clocks = <&mcan_clk>, <&l3_iclk_div>;
-				clock-names = "cclk", "hclk";
+				clocks = <&l3_iclk_div>, <&mcan_clk>;
+				clock-names = "hclk", "cclk";
 				bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>;
 			};
 		};
-- 
cgit v1.2.3-70-g09d2


From f79643787e0a0762d2409b7b8334e83f22d85695 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 17 Nov 2020 16:59:12 +1100
Subject: powerpc/64s: flush L1D on kernel entry

IBM Power9 processors can speculatively operate on data in the L1 cache
before it has been completely validated, via a way-prediction mechanism. It
is not possible for an attacker to determine the contents of impermissible
memory using this method, since these systems implement a combination of
hardware and software security measures to prevent scenarios where
protected data could be leaked.

However these measures don't address the scenario where an attacker induces
the operating system to speculatively execute instructions using data that
the attacker controls. This can be used for example to speculatively bypass
"kernel user access prevention" techniques, as discovered by Anthony
Steinhauser of Google's Safeside Project. This is not an attack by itself,
but there is a possibility it could be used in conjunction with
side-channels or other weaknesses in the privileged code to construct an
attack.

This issue can be mitigated by flushing the L1 cache between privilege
boundaries of concern. This patch flushes the L1 cache on kernel entry.

This is part of the fix for CVE-2020-4788.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 Documentation/admin-guide/kernel-parameters.txt |  3 ++
 arch/powerpc/include/asm/exception-64s.h        |  9 +++-
 arch/powerpc/include/asm/feature-fixups.h       | 10 +++++
 arch/powerpc/include/asm/security_features.h    |  4 ++
 arch/powerpc/include/asm/setup.h                |  3 ++
 arch/powerpc/kernel/exceptions-64s.S            | 37 +++++++++++++++
 arch/powerpc/kernel/setup_64.c                  | 60 ++++++++++++++++++++++++-
 arch/powerpc/kernel/vmlinux.lds.S               |  7 +++
 arch/powerpc/lib/feature-fixups.c               | 54 ++++++++++++++++++++++
 arch/powerpc/platforms/powernv/setup.c          | 11 +++++
 arch/powerpc/platforms/pseries/setup.c          |  4 ++
 11 files changed, 200 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 526d65d8573a..383889fe771f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2858,6 +2858,7 @@
 					       mds=off [X86]
 					       tsx_async_abort=off [X86]
 					       kvm.nx_huge_pages=off [X86]
+					       no_entry_flush [PPC]
 
 				Exceptions:
 					       This does not have any effect on
@@ -3186,6 +3187,8 @@
 
 	noefi		Disable EFI runtime services support.
 
+	no_entry_flush  [PPC] Don't flush the L1-D cache when entering the kernel.
+
 	noexec		[IA-64]
 
 	noexec		[X86]
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index ebe95aa04d53..83fa88bc9935 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -57,11 +57,18 @@
 	nop;								\
 	nop
 
+#define ENTRY_FLUSH_SLOT						\
+	ENTRY_FLUSH_FIXUP_SECTION;					\
+	nop;								\
+	nop;								\
+	nop;
+
 /*
  * r10 must be free to use, r13 must be paca
  */
 #define INTERRUPT_TO_KERNEL						\
-	STF_ENTRY_BARRIER_SLOT
+	STF_ENTRY_BARRIER_SLOT;						\
+	ENTRY_FLUSH_SLOT
 
 /*
  * Macros for annotating the expected destination of (h)rfid
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index b0af97add751..06a48219bbf2 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -205,6 +205,14 @@ label##3:					       	\
 	FTR_ENTRY_OFFSET 955b-956b;			\
 	.popsection;
 
+#define ENTRY_FLUSH_FIXUP_SECTION			\
+957:							\
+	.pushsection __entry_flush_fixup,"a";		\
+	.align 2;					\
+958:							\
+	FTR_ENTRY_OFFSET 957b-958b;			\
+	.popsection;
+
 #define RFI_FLUSH_FIXUP_SECTION				\
 951:							\
 	.pushsection __rfi_flush_fixup,"a";		\
@@ -237,8 +245,10 @@ label##3:					       	\
 #include <linux/types.h>
 
 extern long stf_barrier_fallback;
+extern long entry_flush_fallback;
 extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup;
 extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup;
+extern long __start___entry_flush_fixup, __stop___entry_flush_fixup;
 extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
 extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup;
 extern long __start__btb_flush_fixup, __stop__btb_flush_fixup;
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
index fbb8fa32150f..9e7459d2edca 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -86,12 +86,16 @@ static inline bool security_ftr_enabled(u64 feature)
 // Software required to flush link stack on context switch
 #define SEC_FTR_FLUSH_LINK_STACK	0x0000000000001000ull
 
+// The L1-D cache should be flushed when entering the kernel
+#define SEC_FTR_L1D_FLUSH_ENTRY		0x0000000000004000ull
+
 
 // Features enabled by default
 #define SEC_FTR_DEFAULT \
 	(SEC_FTR_L1D_FLUSH_HV | \
 	 SEC_FTR_L1D_FLUSH_PR | \
 	 SEC_FTR_BNDS_CHK_SPEC_BAR | \
+	 SEC_FTR_L1D_FLUSH_ENTRY | \
 	 SEC_FTR_FAVOUR_SECURITY)
 
 #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 9efbddee2bca..e2fcd3e874f8 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -52,12 +52,15 @@ enum l1d_flush_type {
 };
 
 void setup_rfi_flush(enum l1d_flush_type, bool enable);
+void setup_entry_flush(bool enable);
+void setup_uaccess_flush(bool enable);
 void do_rfi_flush_fixups(enum l1d_flush_type types);
 #ifdef CONFIG_PPC_BARRIER_NOSPEC
 void setup_barrier_nospec(void);
 #else
 static inline void setup_barrier_nospec(void) { };
 #endif
+void do_entry_flush_fixups(enum l1d_flush_type types);
 void do_barrier_nospec_fixups(bool enable);
 extern bool barrier_nospec_enabled;
 
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index f7d748b88705..5577dd887d37 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -2951,6 +2951,43 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback)
 	.endr
 	blr
 
+TRAMP_REAL_BEGIN(entry_flush_fallback)
+	std	r9,PACA_EXRFI+EX_R9(r13)
+	std	r10,PACA_EXRFI+EX_R10(r13)
+	std	r11,PACA_EXRFI+EX_R11(r13)
+	mfctr	r9
+	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+	ld	r11,PACA_L1D_FLUSH_SIZE(r13)
+	srdi	r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
+	mtctr	r11
+	DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+	/* order ld/st prior to dcbt stop all streams with flushing */
+	sync
+
+	/*
+	 * The load addresses are at staggered offsets within cachelines,
+	 * which suits some pipelines better (on others it should not
+	 * hurt).
+	 */
+1:
+	ld	r11,(0x80 + 8)*0(r10)
+	ld	r11,(0x80 + 8)*1(r10)
+	ld	r11,(0x80 + 8)*2(r10)
+	ld	r11,(0x80 + 8)*3(r10)
+	ld	r11,(0x80 + 8)*4(r10)
+	ld	r11,(0x80 + 8)*5(r10)
+	ld	r11,(0x80 + 8)*6(r10)
+	ld	r11,(0x80 + 8)*7(r10)
+	addi	r10,r10,0x80*8
+	bdnz	1b
+
+	mtctr	r9
+	ld	r9,PACA_EXRFI+EX_R9(r13)
+	ld	r10,PACA_EXRFI+EX_R10(r13)
+	ld	r11,PACA_EXRFI+EX_R11(r13)
+	blr
+
 TRAMP_REAL_BEGIN(rfi_flush_fallback)
 	SET_SCRATCH0(r13);
 	GET_PACA(r13);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index bb9cab3641d7..7d447b43ad13 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -945,7 +945,9 @@ early_initcall(disable_hardlockup_detector);
 static enum l1d_flush_type enabled_flush_types;
 static void *l1d_flush_fallback_area;
 static bool no_rfi_flush;
+static bool no_entry_flush;
 bool rfi_flush;
+bool entry_flush;
 
 static int __init handle_no_rfi_flush(char *p)
 {
@@ -955,6 +957,14 @@ static int __init handle_no_rfi_flush(char *p)
 }
 early_param("no_rfi_flush", handle_no_rfi_flush);
 
+static int __init handle_no_entry_flush(char *p)
+{
+	pr_info("entry-flush: disabled on command line.");
+	no_entry_flush = true;
+	return 0;
+}
+early_param("no_entry_flush", handle_no_entry_flush);
+
 /*
  * The RFI flush is not KPTI, but because users will see doco that says to use
  * nopti we hijack that option here to also disable the RFI flush.
@@ -986,6 +996,18 @@ void rfi_flush_enable(bool enable)
 	rfi_flush = enable;
 }
 
+void entry_flush_enable(bool enable)
+{
+	if (enable) {
+		do_entry_flush_fixups(enabled_flush_types);
+		on_each_cpu(do_nothing, NULL, 1);
+	} else {
+		do_entry_flush_fixups(L1D_FLUSH_NONE);
+	}
+
+	entry_flush = enable;
+}
+
 static void __ref init_fallback_flush(void)
 {
 	u64 l1d_size, limit;
@@ -1044,10 +1066,19 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable)
 
 	enabled_flush_types = types;
 
-	if (!no_rfi_flush && !cpu_mitigations_off())
+	if (!cpu_mitigations_off() && !no_rfi_flush)
 		rfi_flush_enable(enable);
 }
 
+void setup_entry_flush(bool enable)
+{
+	if (cpu_mitigations_off())
+		return;
+
+	if (!no_entry_flush)
+		entry_flush_enable(enable);
+}
+
 #ifdef CONFIG_DEBUG_FS
 static int rfi_flush_set(void *data, u64 val)
 {
@@ -1075,9 +1106,36 @@ static int rfi_flush_get(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
 
+static int entry_flush_set(void *data, u64 val)
+{
+	bool enable;
+
+	if (val == 1)
+		enable = true;
+	else if (val == 0)
+		enable = false;
+	else
+		return -EINVAL;
+
+	/* Only do anything if we're changing state */
+	if (enable != entry_flush)
+		entry_flush_enable(enable);
+
+	return 0;
+}
+
+static int entry_flush_get(void *data, u64 *val)
+{
+	*val = entry_flush ? 1 : 0;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n");
+
 static __init int rfi_flush_debugfs_init(void)
 {
 	debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
+	debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush);
 	return 0;
 }
 device_initcall(rfi_flush_debugfs_init);
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index e0548b4950de..715ec4d4614a 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -131,6 +131,13 @@ SECTIONS
 		__stop___stf_entry_barrier_fixup = .;
 	}
 
+	. = ALIGN(8);
+	__entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) {
+		__start___entry_flush_fixup = .;
+		*(__entry_flush_fixup)
+		__stop___entry_flush_fixup = .;
+	}
+
 	. = ALIGN(8);
 	__stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) {
 		__start___stf_exit_barrier_fixup = .;
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 4c0a7ee9fa00..70e83cfd74aa 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -234,6 +234,60 @@ void do_stf_barrier_fixups(enum stf_barrier_type types)
 	do_stf_exit_barrier_fixups(types);
 }
 
+void do_entry_flush_fixups(enum l1d_flush_type types)
+{
+	unsigned int instrs[3], *dest;
+	long *start, *end;
+	int i;
+
+	start = PTRRELOC(&__start___entry_flush_fixup);
+	end = PTRRELOC(&__stop___entry_flush_fixup);
+
+	instrs[0] = 0x60000000; /* nop */
+	instrs[1] = 0x60000000; /* nop */
+	instrs[2] = 0x60000000; /* nop */
+
+	i = 0;
+	if (types == L1D_FLUSH_FALLBACK) {
+		instrs[i++] = 0x7d4802a6; /* mflr r10		*/
+		instrs[i++] = 0x60000000; /* branch patched below */
+		instrs[i++] = 0x7d4803a6; /* mtlr r10		*/
+	}
+
+	if (types & L1D_FLUSH_ORI) {
+		instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+		instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+	}
+
+	if (types & L1D_FLUSH_MTTRIG)
+		instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+
+	for (i = 0; start < end; start++, i++) {
+		dest = (void *)start + *start;
+
+		pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+		patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+
+		if (types == L1D_FLUSH_FALLBACK)
+			patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&entry_flush_fallback,
+				     BRANCH_SET_LINK);
+		else
+			patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
+
+		patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+	}
+
+	printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i,
+		(types == L1D_FLUSH_NONE)       ? "no" :
+		(types == L1D_FLUSH_FALLBACK)   ? "fallback displacement" :
+		(types &  L1D_FLUSH_ORI)        ? (types & L1D_FLUSH_MTTRIG)
+							? "ori+mttrig type"
+							: "ori type" :
+		(types &  L1D_FLUSH_MTTRIG)     ? "mttrig type"
+						: "unknown");
+}
+
 void do_rfi_flush_fixups(enum l1d_flush_type types)
 {
 	unsigned int instrs[3], *dest;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 9acaa0f131b9..d04a085c423d 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -122,12 +122,23 @@ static void pnv_setup_rfi_flush(void)
 			type = L1D_FLUSH_ORI;
 	}
 
+	/*
+	 * If we are non-Power9 bare metal, we don't need to flush on kernel
+	 * entry: it fixes a P9 specific vulnerability.
+	 */
+	if (!pvr_version_is(PVR_POWER9))
+		security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
+
 	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
 		 (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)   || \
 		  security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
 
 	setup_rfi_flush(type, enable);
 	setup_count_cache_flush();
+
+	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
+	setup_entry_flush(enable);
 }
 
 static void __init pnv_check_guarded_cores(void)
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 633c45ec406d..8136c5368ee4 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -579,6 +579,10 @@ void pseries_setup_rfi_flush(void)
 
 	setup_rfi_flush(types, enable);
 	setup_count_cache_flush();
+
+	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
+	setup_entry_flush(enable);
 }
 
 #ifdef CONFIG_PCI_IOV
-- 
cgit v1.2.3-70-g09d2


From 9a32a7e78bd0cd9a9b6332cbdc345ee5ffd0c5de Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 17 Nov 2020 16:59:13 +1100
Subject: powerpc/64s: flush L1D after user accesses

IBM Power9 processors can speculatively operate on data in the L1 cache
before it has been completely validated, via a way-prediction mechanism. It
is not possible for an attacker to determine the contents of impermissible
memory using this method, since these systems implement a combination of
hardware and software security measures to prevent scenarios where
protected data could be leaked.

However these measures don't address the scenario where an attacker induces
the operating system to speculatively execute instructions using data that
the attacker controls. This can be used for example to speculatively bypass
"kernel user access prevention" techniques, as discovered by Anthony
Steinhauser of Google's Safeside Project. This is not an attack by itself,
but there is a possibility it could be used in conjunction with
side-channels or other weaknesses in the privileged code to construct an
attack.

This issue can be mitigated by flushing the L1 cache between privilege
boundaries of concern. This patch flushes the L1 cache after user accesses.

This is part of the fix for CVE-2020-4788.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 Documentation/admin-guide/kernel-parameters.txt |  4 ++
 arch/powerpc/include/asm/book3s/64/kup-radix.h  | 66 ++++++++++++-------
 arch/powerpc/include/asm/exception-64s.h        |  3 +
 arch/powerpc/include/asm/feature-fixups.h       |  9 +++
 arch/powerpc/include/asm/kup.h                  | 19 ++++--
 arch/powerpc/include/asm/security_features.h    |  3 +
 arch/powerpc/include/asm/setup.h                |  1 +
 arch/powerpc/kernel/exceptions-64s.S            | 85 ++++++++-----------------
 arch/powerpc/kernel/setup_64.c                  | 62 ++++++++++++++++++
 arch/powerpc/kernel/vmlinux.lds.S               |  7 ++
 arch/powerpc/lib/feature-fixups.c               | 50 +++++++++++++++
 arch/powerpc/platforms/powernv/setup.c          | 10 ++-
 arch/powerpc/platforms/pseries/setup.c          |  4 ++
 13 files changed, 233 insertions(+), 90 deletions(-)

(limited to 'arch')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 383889fe771f..44fde25bb221 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2859,6 +2859,7 @@
 					       tsx_async_abort=off [X86]
 					       kvm.nx_huge_pages=off [X86]
 					       no_entry_flush [PPC]
+					       no_uaccess_flush [PPC]
 
 				Exceptions:
 					       This does not have any effect on
@@ -3238,6 +3239,9 @@
 	nospec_store_bypass_disable
 			[HW] Disable all mitigations for the Speculative Store Bypass vulnerability
 
+	no_uaccess_flush
+	                [PPC] Don't flush the L1-D cache after accessing user data.
+
 	noxsave		[BUGS=X86] Disables x86 extended register state save
 			and restore using xsave. The kernel will fallback to
 			enabling legacy floating-point and sse state.
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index 3ee1ec60be84..97c2394e7dea 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -61,6 +61,8 @@
 
 #else /* !__ASSEMBLY__ */
 
+DECLARE_STATIC_KEY_FALSE(uaccess_flush_key);
+
 #ifdef CONFIG_PPC_KUAP
 
 #include <asm/mmu.h>
@@ -103,8 +105,16 @@ static inline void kuap_check_amr(void)
 
 static inline unsigned long get_kuap(void)
 {
+	/*
+	 * We return AMR_KUAP_BLOCKED when we don't support KUAP because
+	 * prevent_user_access_return needs to return AMR_KUAP_BLOCKED to
+	 * cause restore_user_access to do a flush.
+	 *
+	 * This has no effect in terms of actually blocking things on hash,
+	 * so it doesn't break anything.
+	 */
 	if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP))
-		return 0;
+		return AMR_KUAP_BLOCKED;
 
 	return mfspr(SPRN_AMR);
 }
@@ -123,6 +133,31 @@ static inline void set_kuap(unsigned long value)
 	isync();
 }
 
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+	return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) &&
+		    (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
+		    "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
+}
+#else /* CONFIG_PPC_KUAP */
+static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) { }
+
+static inline unsigned long kuap_get_and_check_amr(void)
+{
+	return 0UL;
+}
+
+static inline void kuap_check_amr(void) { }
+
+static inline unsigned long get_kuap(void)
+{
+	return AMR_KUAP_BLOCKED;
+}
+
+static inline void set_kuap(unsigned long value) { }
+#endif /* !CONFIG_PPC_KUAP */
+
 static __always_inline void allow_user_access(void __user *to, const void __user *from,
 					      unsigned long size, unsigned long dir)
 {
@@ -142,6 +177,8 @@ static inline void prevent_user_access(void __user *to, const void __user *from,
 				       unsigned long size, unsigned long dir)
 {
 	set_kuap(AMR_KUAP_BLOCKED);
+	if (static_branch_unlikely(&uaccess_flush_key))
+		do_uaccess_flush();
 }
 
 static inline unsigned long prevent_user_access_return(void)
@@ -149,6 +186,8 @@ static inline unsigned long prevent_user_access_return(void)
 	unsigned long flags = get_kuap();
 
 	set_kuap(AMR_KUAP_BLOCKED);
+	if (static_branch_unlikely(&uaccess_flush_key))
+		do_uaccess_flush();
 
 	return flags;
 }
@@ -156,30 +195,9 @@ static inline unsigned long prevent_user_access_return(void)
 static inline void restore_user_access(unsigned long flags)
 {
 	set_kuap(flags);
+	if (static_branch_unlikely(&uaccess_flush_key) && flags == AMR_KUAP_BLOCKED)
+		do_uaccess_flush();
 }
-
-static inline bool
-bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
-{
-	return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) &&
-		    (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
-		    "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
-}
-#else /* CONFIG_PPC_KUAP */
-static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr)
-{
-}
-
-static inline void kuap_check_amr(void)
-{
-}
-
-static inline unsigned long kuap_get_and_check_amr(void)
-{
-	return 0;
-}
-#endif /* CONFIG_PPC_KUAP */
-
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 83fa88bc9935..1d32b174ab6a 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -144,6 +144,9 @@
 	RFSCV;								\
 	b	rfscv_flush_fallback
 
+#else /* __ASSEMBLY__ */
+/* Prototype for function defined in exceptions-64s.S */
+void do_uaccess_flush(void);
 #endif /* __ASSEMBLY__ */
 
 #endif	/* _ASM_POWERPC_EXCEPTION_H */
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 06a48219bbf2..fbd406cd6916 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -205,6 +205,14 @@ label##3:					       	\
 	FTR_ENTRY_OFFSET 955b-956b;			\
 	.popsection;
 
+#define UACCESS_FLUSH_FIXUP_SECTION			\
+959:							\
+	.pushsection __uaccess_flush_fixup,"a";		\
+	.align 2;					\
+960:							\
+	FTR_ENTRY_OFFSET 959b-960b;			\
+	.popsection;
+
 #define ENTRY_FLUSH_FIXUP_SECTION			\
 957:							\
 	.pushsection __entry_flush_fixup,"a";		\
@@ -248,6 +256,7 @@ extern long stf_barrier_fallback;
 extern long entry_flush_fallback;
 extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup;
 extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup;
+extern long __start___uaccess_flush_fixup, __stop___uaccess_flush_fixup;
 extern long __start___entry_flush_fixup, __stop___entry_flush_fixup;
 extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
 extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup;
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 1d0f7d838b2e..0f5c606ae057 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -53,17 +53,26 @@ static inline void setup_kuep(bool disabled) { }
 void setup_kuap(bool disabled);
 #else
 static inline void setup_kuap(bool disabled) { }
+
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+	return false;
+}
+
+/*
+ * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush
+ * the L1D cache after user accesses. Only include the empty stubs for other
+ * platforms.
+ */
+#ifndef CONFIG_PPC64
 static inline void allow_user_access(void __user *to, const void __user *from,
 				     unsigned long size, unsigned long dir) { }
 static inline void prevent_user_access(void __user *to, const void __user *from,
 				       unsigned long size, unsigned long dir) { }
 static inline unsigned long prevent_user_access_return(void) { return 0UL; }
 static inline void restore_user_access(unsigned long flags) { }
-static inline bool
-bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
-{
-	return false;
-}
+#endif /* CONFIG_PPC64 */
 #endif /* CONFIG_PPC_KUAP */
 
 static inline void allow_read_from_user(const void __user *from, unsigned long size)
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
index 9e7459d2edca..b774a4477d5f 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -89,6 +89,8 @@ static inline bool security_ftr_enabled(u64 feature)
 // The L1-D cache should be flushed when entering the kernel
 #define SEC_FTR_L1D_FLUSH_ENTRY		0x0000000000004000ull
 
+// The L1-D cache should be flushed after user accesses from the kernel
+#define SEC_FTR_L1D_FLUSH_UACCESS	0x0000000000008000ull
 
 // Features enabled by default
 #define SEC_FTR_DEFAULT \
@@ -96,6 +98,7 @@ static inline bool security_ftr_enabled(u64 feature)
 	 SEC_FTR_L1D_FLUSH_PR | \
 	 SEC_FTR_BNDS_CHK_SPEC_BAR | \
 	 SEC_FTR_L1D_FLUSH_ENTRY | \
+	 SEC_FTR_L1D_FLUSH_UACCESS | \
 	 SEC_FTR_FAVOUR_SECURITY)
 
 #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index e2fcd3e874f8..a466749703f1 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -60,6 +60,7 @@ void setup_barrier_nospec(void);
 #else
 static inline void setup_barrier_nospec(void) { };
 #endif
+void do_uaccess_flush_fixups(enum l1d_flush_type types);
 void do_entry_flush_fixups(enum l1d_flush_type types);
 void do_barrier_nospec_fixups(bool enable);
 extern bool barrier_nospec_enabled;
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 5577dd887d37..f63a3d3bca3d 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -2951,11 +2951,8 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback)
 	.endr
 	blr
 
-TRAMP_REAL_BEGIN(entry_flush_fallback)
-	std	r9,PACA_EXRFI+EX_R9(r13)
-	std	r10,PACA_EXRFI+EX_R10(r13)
-	std	r11,PACA_EXRFI+EX_R11(r13)
-	mfctr	r9
+/* Clobbers r10, r11, ctr */
+.macro L1D_DISPLACEMENT_FLUSH
 	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
 	ld	r11,PACA_L1D_FLUSH_SIZE(r13)
 	srdi	r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
@@ -2981,7 +2978,14 @@ TRAMP_REAL_BEGIN(entry_flush_fallback)
 	ld	r11,(0x80 + 8)*7(r10)
 	addi	r10,r10,0x80*8
 	bdnz	1b
+.endm
 
+TRAMP_REAL_BEGIN(entry_flush_fallback)
+	std	r9,PACA_EXRFI+EX_R9(r13)
+	std	r10,PACA_EXRFI+EX_R10(r13)
+	std	r11,PACA_EXRFI+EX_R11(r13)
+	mfctr	r9
+	L1D_DISPLACEMENT_FLUSH
 	mtctr	r9
 	ld	r9,PACA_EXRFI+EX_R9(r13)
 	ld	r10,PACA_EXRFI+EX_R10(r13)
@@ -2997,32 +3001,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
 	std	r10,PACA_EXRFI+EX_R10(r13)
 	std	r11,PACA_EXRFI+EX_R11(r13)
 	mfctr	r9
-	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
-	ld	r11,PACA_L1D_FLUSH_SIZE(r13)
-	srdi	r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
-	mtctr	r11
-	DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
-
-	/* order ld/st prior to dcbt stop all streams with flushing */
-	sync
-
-	/*
-	 * The load adresses are at staggered offsets within cachelines,
-	 * which suits some pipelines better (on others it should not
-	 * hurt).
-	 */
-1:
-	ld	r11,(0x80 + 8)*0(r10)
-	ld	r11,(0x80 + 8)*1(r10)
-	ld	r11,(0x80 + 8)*2(r10)
-	ld	r11,(0x80 + 8)*3(r10)
-	ld	r11,(0x80 + 8)*4(r10)
-	ld	r11,(0x80 + 8)*5(r10)
-	ld	r11,(0x80 + 8)*6(r10)
-	ld	r11,(0x80 + 8)*7(r10)
-	addi	r10,r10,0x80*8
-	bdnz	1b
-
+	L1D_DISPLACEMENT_FLUSH
 	mtctr	r9
 	ld	r9,PACA_EXRFI+EX_R9(r13)
 	ld	r10,PACA_EXRFI+EX_R10(r13)
@@ -3040,32 +3019,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
 	std	r10,PACA_EXRFI+EX_R10(r13)
 	std	r11,PACA_EXRFI+EX_R11(r13)
 	mfctr	r9
-	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
-	ld	r11,PACA_L1D_FLUSH_SIZE(r13)
-	srdi	r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
-	mtctr	r11
-	DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
-
-	/* order ld/st prior to dcbt stop all streams with flushing */
-	sync
-
-	/*
-	 * The load adresses are at staggered offsets within cachelines,
-	 * which suits some pipelines better (on others it should not
-	 * hurt).
-	 */
-1:
-	ld	r11,(0x80 + 8)*0(r10)
-	ld	r11,(0x80 + 8)*1(r10)
-	ld	r11,(0x80 + 8)*2(r10)
-	ld	r11,(0x80 + 8)*3(r10)
-	ld	r11,(0x80 + 8)*4(r10)
-	ld	r11,(0x80 + 8)*5(r10)
-	ld	r11,(0x80 + 8)*6(r10)
-	ld	r11,(0x80 + 8)*7(r10)
-	addi	r10,r10,0x80*8
-	bdnz	1b
-
+	L1D_DISPLACEMENT_FLUSH
 	mtctr	r9
 	ld	r9,PACA_EXRFI+EX_R9(r13)
 	ld	r10,PACA_EXRFI+EX_R10(r13)
@@ -3116,8 +3070,21 @@ TRAMP_REAL_BEGIN(rfscv_flush_fallback)
 	RFSCV
 
 USE_TEXT_SECTION()
-	MASKED_INTERRUPT
-	MASKED_INTERRUPT hsrr=1
+
+_GLOBAL(do_uaccess_flush)
+	UACCESS_FLUSH_FIXUP_SECTION
+	nop
+	nop
+	nop
+	blr
+	L1D_DISPLACEMENT_FLUSH
+	blr
+_ASM_NOKPROBE_SYMBOL(do_uaccess_flush)
+EXPORT_SYMBOL(do_uaccess_flush)
+
+
+MASKED_INTERRUPT
+MASKED_INTERRUPT hsrr=1
 
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 kvmppc_skip_interrupt:
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 7d447b43ad13..74fd47f46fa5 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -946,8 +946,12 @@ static enum l1d_flush_type enabled_flush_types;
 static void *l1d_flush_fallback_area;
 static bool no_rfi_flush;
 static bool no_entry_flush;
+static bool no_uaccess_flush;
 bool rfi_flush;
 bool entry_flush;
+bool uaccess_flush;
+DEFINE_STATIC_KEY_FALSE(uaccess_flush_key);
+EXPORT_SYMBOL(uaccess_flush_key);
 
 static int __init handle_no_rfi_flush(char *p)
 {
@@ -965,6 +969,14 @@ static int __init handle_no_entry_flush(char *p)
 }
 early_param("no_entry_flush", handle_no_entry_flush);
 
+static int __init handle_no_uaccess_flush(char *p)
+{
+	pr_info("uaccess-flush: disabled on command line.");
+	no_uaccess_flush = true;
+	return 0;
+}
+early_param("no_uaccess_flush", handle_no_uaccess_flush);
+
 /*
  * The RFI flush is not KPTI, but because users will see doco that says to use
  * nopti we hijack that option here to also disable the RFI flush.
@@ -1008,6 +1020,20 @@ void entry_flush_enable(bool enable)
 	entry_flush = enable;
 }
 
+void uaccess_flush_enable(bool enable)
+{
+	if (enable) {
+		do_uaccess_flush_fixups(enabled_flush_types);
+		static_branch_enable(&uaccess_flush_key);
+		on_each_cpu(do_nothing, NULL, 1);
+	} else {
+		static_branch_disable(&uaccess_flush_key);
+		do_uaccess_flush_fixups(L1D_FLUSH_NONE);
+	}
+
+	uaccess_flush = enable;
+}
+
 static void __ref init_fallback_flush(void)
 {
 	u64 l1d_size, limit;
@@ -1079,6 +1105,15 @@ void setup_entry_flush(bool enable)
 		entry_flush_enable(enable);
 }
 
+void setup_uaccess_flush(bool enable)
+{
+	if (cpu_mitigations_off())
+		return;
+
+	if (!no_uaccess_flush)
+		uaccess_flush_enable(enable);
+}
+
 #ifdef CONFIG_DEBUG_FS
 static int rfi_flush_set(void *data, u64 val)
 {
@@ -1132,10 +1167,37 @@ static int entry_flush_get(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n");
 
+static int uaccess_flush_set(void *data, u64 val)
+{
+	bool enable;
+
+	if (val == 1)
+		enable = true;
+	else if (val == 0)
+		enable = false;
+	else
+		return -EINVAL;
+
+	/* Only do anything if we're changing state */
+	if (enable != uaccess_flush)
+		uaccess_flush_enable(enable);
+
+	return 0;
+}
+
+static int uaccess_flush_get(void *data, u64 *val)
+{
+	*val = uaccess_flush ? 1 : 0;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n");
+
 static __init int rfi_flush_debugfs_init(void)
 {
 	debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
 	debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush);
+	debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush);
 	return 0;
 }
 device_initcall(rfi_flush_debugfs_init);
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 715ec4d4614a..6db90cdf11da 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -131,6 +131,13 @@ SECTIONS
 		__stop___stf_entry_barrier_fixup = .;
 	}
 
+	. = ALIGN(8);
+	__uaccess_flush_fixup : AT(ADDR(__uaccess_flush_fixup) - LOAD_OFFSET) {
+		__start___uaccess_flush_fixup = .;
+		*(__uaccess_flush_fixup)
+		__stop___uaccess_flush_fixup = .;
+	}
+
 	. = ALIGN(8);
 	__entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) {
 		__start___entry_flush_fixup = .;
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 70e83cfd74aa..321c12a9ef6b 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -234,6 +234,56 @@ void do_stf_barrier_fixups(enum stf_barrier_type types)
 	do_stf_exit_barrier_fixups(types);
 }
 
+void do_uaccess_flush_fixups(enum l1d_flush_type types)
+{
+	unsigned int instrs[4], *dest;
+	long *start, *end;
+	int i;
+
+	start = PTRRELOC(&__start___uaccess_flush_fixup);
+	end = PTRRELOC(&__stop___uaccess_flush_fixup);
+
+	instrs[0] = 0x60000000; /* nop */
+	instrs[1] = 0x60000000; /* nop */
+	instrs[2] = 0x60000000; /* nop */
+	instrs[3] = 0x4e800020; /* blr */
+
+	i = 0;
+	if (types == L1D_FLUSH_FALLBACK) {
+		instrs[3] = 0x60000000; /* nop */
+		/* fallthrough to fallback flush */
+	}
+
+	if (types & L1D_FLUSH_ORI) {
+		instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+		instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+	}
+
+	if (types & L1D_FLUSH_MTTRIG)
+		instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+
+	for (i = 0; start < end; start++, i++) {
+		dest = (void *)start + *start;
+
+		pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+		patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+
+		patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
+		patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+		patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3]));
+	}
+
+	printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i,
+		(types == L1D_FLUSH_NONE)       ? "no" :
+		(types == L1D_FLUSH_FALLBACK)   ? "fallback displacement" :
+		(types &  L1D_FLUSH_ORI)        ? (types & L1D_FLUSH_MTTRIG)
+							? "ori+mttrig type"
+							: "ori type" :
+		(types &  L1D_FLUSH_MTTRIG)     ? "mttrig type"
+						: "unknown");
+}
+
 void do_entry_flush_fixups(enum l1d_flush_type types)
 {
 	unsigned int instrs[3], *dest;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index d04a085c423d..087ec92acfc4 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -124,10 +124,12 @@ static void pnv_setup_rfi_flush(void)
 
 	/*
 	 * If we are non-Power9 bare metal, we don't need to flush on kernel
-	 * entry: it fixes a P9 specific vulnerability.
+	 * entry or after user access: they fix a P9 specific vulnerability.
 	 */
-	if (!pvr_version_is(PVR_POWER9))
+	if (!pvr_version_is(PVR_POWER9)) {
 		security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
+		security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
+	}
 
 	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
 		 (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)   || \
@@ -139,6 +141,10 @@ static void pnv_setup_rfi_flush(void)
 	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
 		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
 	setup_entry_flush(enable);
+
+	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
+	setup_uaccess_flush(enable);
 }
 
 static void __init pnv_check_guarded_cores(void)
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 8136c5368ee4..3617cdb079f6 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -583,6 +583,10 @@ void pseries_setup_rfi_flush(void)
 	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
 		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
 	setup_entry_flush(enable);
+
+	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
+	setup_uaccess_flush(enable);
 }
 
 #ifdef CONFIG_PCI_IOV
-- 
cgit v1.2.3-70-g09d2


From 178d52c6e89c38d0553b0ac8b99927b11eb995b0 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 19 Nov 2020 23:43:53 +1100
Subject: powerpc: Only include kup-radix.h for 64-bit Book3S

In kup.h we currently include kup-radix.h for all 64-bit builds, which
includes Book3S and Book3E. The latter doesn't make sense, Book3E
never uses the Radix MMU.

This has worked up until now, but almost by accident, and the recent
uaccess flush changes introduced a build breakage on Book3E because of
the bad structure of the code.

So disentangle things so that we only use kup-radix.h for Book3S. This
requires some more stubs in kup.h and fixing an include in
syscall_64.c.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/kup-radix.h |  4 ++--
 arch/powerpc/include/asm/kup.h                 | 11 ++++++++---
 arch/powerpc/kernel/syscall_64.c               |  2 +-
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index 97c2394e7dea..28716e2f13e3 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -27,6 +27,7 @@
 #endif
 .endm
 
+#ifdef CONFIG_PPC_KUAP
 .macro kuap_check_amr gpr1, gpr2
 #ifdef CONFIG_PPC_KUAP_DEBUG
 	BEGIN_MMU_FTR_SECTION_NESTED(67)
@@ -38,6 +39,7 @@
 	END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)
 #endif
 .endm
+#endif
 
 .macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr
 #ifdef CONFIG_PPC_KUAP
@@ -148,8 +150,6 @@ static inline unsigned long kuap_get_and_check_amr(void)
 	return 0UL;
 }
 
-static inline void kuap_check_amr(void) { }
-
 static inline unsigned long get_kuap(void)
 {
 	return AMR_KUAP_BLOCKED;
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 0f5c606ae057..0d93331d0fab 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -14,7 +14,7 @@
 #define KUAP_CURRENT_WRITE	8
 #define KUAP_CURRENT		(KUAP_CURRENT_READ | KUAP_CURRENT_WRITE)
 
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S_64
 #include <asm/book3s/64/kup-radix.h>
 #endif
 #ifdef CONFIG_PPC_8xx
@@ -35,6 +35,9 @@
 .macro kuap_check	current, gpr
 .endm
 
+.macro kuap_check_amr	gpr1, gpr2
+.endm
+
 #endif
 
 #else /* !__ASSEMBLY__ */
@@ -60,19 +63,21 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 	return false;
 }
 
+static inline void kuap_check_amr(void) { }
+
 /*
  * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush
  * the L1D cache after user accesses. Only include the empty stubs for other
  * platforms.
  */
-#ifndef CONFIG_PPC64
+#ifndef CONFIG_PPC_BOOK3S_64
 static inline void allow_user_access(void __user *to, const void __user *from,
 				     unsigned long size, unsigned long dir) { }
 static inline void prevent_user_access(void __user *to, const void __user *from,
 				       unsigned long size, unsigned long dir) { }
 static inline unsigned long prevent_user_access_return(void) { return 0UL; }
 static inline void restore_user_access(unsigned long flags) { }
-#endif /* CONFIG_PPC64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
 #endif /* CONFIG_PPC_KUAP */
 
 static inline void allow_read_from_user(const void __user *from, unsigned long size)
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index 8e50818aa50b..310bcd768cd5 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -2,7 +2,7 @@
 
 #include <linux/err.h>
 #include <asm/asm-prototypes.h>
-#include <asm/book3s/64/kup-radix.h>
+#include <asm/kup.h>
 #include <asm/cputime.h>
 #include <asm/hw_irq.h>
 #include <asm/kprobes.h>
-- 
cgit v1.2.3-70-g09d2


From da631f7fd623b6c180c8d93a93040d1e0d61291f Mon Sep 17 00:00:00 2001
From: Daniel Axtens <dja@axtens.net>
Date: Tue, 17 Nov 2020 16:59:16 +1100
Subject: powerpc/64s: rename pnv|pseries_setup_rfi_flush to
 _setup_security_mitigations

pseries|pnv_setup_rfi_flush already does the count cache flush setup, and
we just added entry and uaccess flushes. So the name is not very accurate
any more. In both platforms we then also immediately setup the STF flush.

Rename them to _setup_security_mitigations and fold the STF flush in.

Signed-off-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/setup.c    | 7 ++++---
 arch/powerpc/platforms/pseries/mobility.c | 4 ++--
 arch/powerpc/platforms/pseries/pseries.h  | 2 +-
 arch/powerpc/platforms/pseries/setup.c    | 7 ++++---
 4 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 087ec92acfc4..46115231a3b2 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -98,7 +98,7 @@ static void init_fw_feat_flags(struct device_node *np)
 		security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
 }
 
-static void pnv_setup_rfi_flush(void)
+static void pnv_setup_security_mitigations(void)
 {
 	struct device_node *np, *fw_features;
 	enum l1d_flush_type type;
@@ -145,6 +145,8 @@ static void pnv_setup_rfi_flush(void)
 	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
 		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
 	setup_uaccess_flush(enable);
+
+	setup_stf_barrier();
 }
 
 static void __init pnv_check_guarded_cores(void)
@@ -173,8 +175,7 @@ static void __init pnv_setup_arch(void)
 {
 	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
 
-	pnv_setup_rfi_flush();
-	setup_stf_barrier();
+	pnv_setup_security_mitigations();
 
 	/* Initialize SMP */
 	pnv_smp_init();
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index d6f4162478a5..2f73cb5bf12d 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -349,8 +349,8 @@ void post_mobility_fixup(void)
 
 	cpus_read_unlock();
 
-	/* Possibly switch to a new RFI flush type */
-	pseries_setup_rfi_flush();
+	/* Possibly switch to a new L1 flush type */
+	pseries_setup_security_mitigations();
 
 	/* Reinitialise system information for hv-24x7 */
 	read_24x7_sys_info();
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 13fa370a87e4..593840847cd3 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -111,7 +111,7 @@ static inline unsigned long cmo_get_page_size(void)
 
 int dlpar_workqueue_init(void);
 
-void pseries_setup_rfi_flush(void);
+void pseries_setup_security_mitigations(void);
 void pseries_lpar_read_hblkrm_characteristics(void);
 
 #endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 3617cdb079f6..090c13f6c881 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -542,7 +542,7 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
 		security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
 }
 
-void pseries_setup_rfi_flush(void)
+void pseries_setup_security_mitigations(void)
 {
 	struct h_cpu_char_result result;
 	enum l1d_flush_type types;
@@ -587,6 +587,8 @@ void pseries_setup_rfi_flush(void)
 	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
 		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
 	setup_uaccess_flush(enable);
+
+	setup_stf_barrier();
 }
 
 #ifdef CONFIG_PCI_IOV
@@ -776,8 +778,7 @@ static void __init pSeries_setup_arch(void)
 
 	fwnmi_init();
 
-	pseries_setup_rfi_flush();
-	setup_stf_barrier();
+	pseries_setup_security_mitigations();
 	pseries_lpar_read_hblkrm_characteristics();
 
 	/* By default, only probe PCI (can be overridden by rtas_pci) */
-- 
cgit v1.2.3-70-g09d2


From a98fd117a2553ab1a6d2fe3c7acae88c1eca4372 Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.io>
Date: Fri, 20 Nov 2020 13:08:51 +0800
Subject: ARM: dts: sun8i: v3s: fix GIC node memory range

Currently the GIC node in V3s DTSI follows some old DT examples, and
being broken. This leads a warning at boot.

Fix this.

Fixes: f989086ccbc6 ("ARM: dts: sunxi: add dtsi file for V3s SoC")
Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://lore.kernel.org/r/20201120050851.4123759-1-icenowy@aosc.io
---
 arch/arm/boot/dts/sun8i-v3s.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun8i-v3s.dtsi b/arch/arm/boot/dts/sun8i-v3s.dtsi
index 0c7341676921..89abd4cc7e23 100644
--- a/arch/arm/boot/dts/sun8i-v3s.dtsi
+++ b/arch/arm/boot/dts/sun8i-v3s.dtsi
@@ -539,7 +539,7 @@
 		gic: interrupt-controller@1c81000 {
 			compatible = "arm,gic-400";
 			reg = <0x01c81000 0x1000>,
-			      <0x01c82000 0x1000>,
+			      <0x01c82000 0x2000>,
 			      <0x01c84000 0x2000>,
 			      <0x01c86000 0x2000>;
 			interrupt-controller;
-- 
cgit v1.2.3-70-g09d2


From a927bd6ba952d13c52b8b385030943032f659a3e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 21 Nov 2020 22:17:05 -0800
Subject: mm: fix phys_to_target_node() and memory_add_physaddr_to_nid()
 exports

The core-mm has a default __weak implementation of phys_to_target_node()
to mirror the weak definition of memory_add_physaddr_to_nid().  That
symbol is exported for modules.  However, while the export in
mm/memory_hotplug.c exported the symbol in the configuration cases of:

	CONFIG_NUMA_KEEP_MEMINFO=y
	CONFIG_MEMORY_HOTPLUG=y

...and:

	CONFIG_NUMA_KEEP_MEMINFO=n
	CONFIG_MEMORY_HOTPLUG=y

...it failed to export the symbol in the case of:

	CONFIG_NUMA_KEEP_MEMINFO=y
	CONFIG_MEMORY_HOTPLUG=n

Not only is that broken, but Christoph points out that the kernel should
not be exporting any __weak symbol, which means that
memory_add_physaddr_to_nid() example that phys_to_target_node() copied
is broken too.

Rework the definition of phys_to_target_node() and
memory_add_physaddr_to_nid() to not require weak symbols.  Move to the
common arch override design-pattern of an asm header defining a symbol
to replace the default implementation.

The only common header that all memory_add_physaddr_to_nid() producing
architectures implement is asm/sparsemem.h.  In fact, powerpc already
defines its memory_add_physaddr_to_nid() helper in sparsemem.h.
Double-down on that observation and define phys_to_target_node() where
necessary in asm/sparsemem.h.  An alternate consideration that was
discarded was to put this override in asm/numa.h, but that entangles
with the definition of MAX_NUMNODES relative to the inclusion of
linux/nodemask.h, and requires powerpc to grow a new header.

The dependency on NUMA_KEEP_MEMINFO for DEV_DAX_HMEM_DEVICES is invalid
now that the symbol is properly exported / stubbed in all combinations
of CONFIG_NUMA_KEEP_MEMINFO and CONFIG_MEMORY_HOTPLUG.

[dan.j.williams@intel.com: v4]
  Link: https://lkml.kernel.org/r/160461461867.1505359.5301571728749534585.stgit@dwillia2-desk3.amr.corp.intel.com
[dan.j.williams@intel.com: powerpc: fix create_section_mapping compile warning]
  Link: https://lkml.kernel.org/r/160558386174.2948926.2740149041249041764.stgit@dwillia2-desk3.amr.corp.intel.com

Fixes: a035b6bf863e ("mm/memory_hotplug: introduce default phys_to_target_node() implementation")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: Thomas Gleixner <tglx@linutronix.de>
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Link: https://lkml.kernel.org/r/160447639846.1133764.7044090803980177548.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/include/asm/sparsemem.h    |  6 ++++++
 arch/powerpc/include/asm/mmzone.h    |  5 +++++
 arch/powerpc/include/asm/sparsemem.h |  5 ++---
 arch/powerpc/mm/mem.c                |  1 +
 arch/x86/include/asm/sparsemem.h     | 10 ++++++++++
 arch/x86/mm/numa.c                   |  2 ++
 drivers/dax/Kconfig                  |  1 -
 include/linux/memory_hotplug.h       | 14 --------------
 include/linux/numa.h                 | 30 +++++++++++++++++++++++++++++-
 mm/memory_hotplug.c                  | 18 ------------------
 10 files changed, 55 insertions(+), 37 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/include/asm/sparsemem.h b/arch/ia64/include/asm/sparsemem.h
index 336d0570e1fa..dd8c166ffd7b 100644
--- a/arch/ia64/include/asm/sparsemem.h
+++ b/arch/ia64/include/asm/sparsemem.h
@@ -18,4 +18,10 @@
 #endif
 
 #endif /* CONFIG_SPARSEMEM */
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int memory_add_physaddr_to_nid(u64 addr);
+#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
+#endif
+
 #endif /* _ASM_IA64_SPARSEMEM_H */
diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h
index 91c69ff53a8a..6cda76b57c5d 100644
--- a/arch/powerpc/include/asm/mmzone.h
+++ b/arch/powerpc/include/asm/mmzone.h
@@ -46,5 +46,10 @@ u64 memory_hotplug_max(void);
 #define __HAVE_ARCH_RESERVED_KERNEL_PAGES
 #endif
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+extern int create_section_mapping(unsigned long start, unsigned long end,
+				  int nid, pgprot_t prot);
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_MMZONE_H_ */
diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h
index 1e6fa371cc38..d072866842e4 100644
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -13,9 +13,9 @@
 #endif /* CONFIG_SPARSEMEM */
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-extern int create_section_mapping(unsigned long start, unsigned long end,
-				  int nid, pgprot_t prot);
 extern int remove_section_mapping(unsigned long start, unsigned long end);
+extern int memory_add_physaddr_to_nid(u64 start);
+#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
 
 #ifdef CONFIG_NUMA
 extern int hot_add_scn_to_nid(unsigned long scn_addr);
@@ -26,6 +26,5 @@ static inline int hot_add_scn_to_nid(unsigned long scn_addr)
 }
 #endif /* CONFIG_NUMA */
 #endif /* CONFIG_MEMORY_HOTPLUG */
-
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_SPARSEMEM_H */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 01ec2a252f09..3fc325bebe4d 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -50,6 +50,7 @@
 #include <asm/rtas.h>
 #include <asm/kasan.h>
 #include <asm/svm.h>
+#include <asm/mmzone.h>
 
 #include <mm/mmu_decl.h>
 
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 6bfc878f6771..6a9ccc1b2be5 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -28,4 +28,14 @@
 #endif
 
 #endif /* CONFIG_SPARSEMEM */
+
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_NUMA_KEEP_MEMINFO
+extern int phys_to_target_node(phys_addr_t start);
+#define phys_to_target_node phys_to_target_node
+extern int memory_add_physaddr_to_nid(u64 start);
+#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
+#endif
+#endif /* __ASSEMBLY__ */
+
 #endif /* _ASM_X86_SPARSEMEM_H */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 44148691d78b..5eb4dc2b97da 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -938,6 +938,7 @@ int phys_to_target_node(phys_addr_t start)
 
 	return meminfo_to_nid(&numa_reserved_meminfo, start);
 }
+EXPORT_SYMBOL_GPL(phys_to_target_node);
 
 int memory_add_physaddr_to_nid(u64 start)
 {
@@ -947,4 +948,5 @@ int memory_add_physaddr_to_nid(u64 start)
 		nid = numa_meminfo.blk[0].nid;
 	return nid;
 }
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index 567428e10b7b..d2834c2cfa10 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -50,7 +50,6 @@ config DEV_DAX_HMEM
 	  Say M if unsure.
 
 config DEV_DAX_HMEM_DEVICES
-	depends on NUMA_KEEP_MEMINFO # for phys_to_target_node()
 	depends on DEV_DAX_HMEM && DAX=y
 	def_bool y
 
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index d65c6fdc5cfc..551093b74596 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -281,20 +281,6 @@ static inline bool movable_node_is_enabled(void)
 }
 #endif /* ! CONFIG_MEMORY_HOTPLUG */
 
-#ifdef CONFIG_NUMA
-extern int memory_add_physaddr_to_nid(u64 start);
-extern int phys_to_target_node(u64 start);
-#else
-static inline int memory_add_physaddr_to_nid(u64 start)
-{
-	return 0;
-}
-static inline int phys_to_target_node(u64 start)
-{
-	return 0;
-}
-#endif
-
 #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
 /*
  * pgdat resizing functions
diff --git a/include/linux/numa.h b/include/linux/numa.h
index 8cb33ccfb671..cb44cfe2b725 100644
--- a/include/linux/numa.h
+++ b/include/linux/numa.h
@@ -21,13 +21,41 @@
 #endif
 
 #ifdef CONFIG_NUMA
+#include <linux/printk.h>
+#include <asm/sparsemem.h>
+
 /* Generic implementation available */
 int numa_map_to_online_node(int node);
-#else
+
+#ifndef memory_add_physaddr_to_nid
+static inline int memory_add_physaddr_to_nid(u64 start)
+{
+	pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n",
+			start);
+	return 0;
+}
+#endif
+#ifndef phys_to_target_node
+static inline int phys_to_target_node(u64 start)
+{
+	pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n",
+			start);
+	return 0;
+}
+#endif
+#else /* !CONFIG_NUMA */
 static inline int numa_map_to_online_node(int node)
 {
 	return NUMA_NO_NODE;
 }
+static inline int memory_add_physaddr_to_nid(u64 start)
+{
+	return 0;
+}
+static inline int phys_to_target_node(u64 start)
+{
+	return 0;
+}
 #endif
 
 #endif /* _LINUX_NUMA_H */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b44d4c7ba73b..63b2e46b6555 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -350,24 +350,6 @@ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages,
 	return err;
 }
 
-#ifdef CONFIG_NUMA
-int __weak memory_add_physaddr_to_nid(u64 start)
-{
-	pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n",
-			start);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
-
-int __weak phys_to_target_node(u64 start)
-{
-	pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n",
-			start);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(phys_to_target_node);
-#endif
-
 /* find the smallest valid pfn in the range [start_pfn, end_pfn) */
 static unsigned long find_smallest_section_pfn(int nid, struct zone *zone,
 				     unsigned long start_pfn,
-- 
cgit v1.2.3-70-g09d2


From b6b79dd53082db11070b4368d85dd6699ff0b063 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 23 Nov 2020 18:40:16 +1100
Subject: powerpc/64s: Fix allnoconfig build since uaccess flush

Using DECLARE_STATIC_KEY_FALSE needs linux/jump_table.h.

Otherwise the build fails with eg:

  arch/powerpc/include/asm/book3s/64/kup-radix.h:66:1: warning: data definition has no type or storage class
     66 | DECLARE_STATIC_KEY_FALSE(uaccess_flush_key);

Fixes: 9a32a7e78bd0 ("powerpc/64s: flush L1D after user accesses")
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
[mpe: Massage change log]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201123184016.693fe464@canb.auug.org.au
---
 arch/powerpc/include/asm/book3s/64/kup-radix.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index 28716e2f13e3..a39e2d193fdc 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -63,6 +63,8 @@
 
 #else /* !__ASSEMBLY__ */
 
+#include <linux/jump_label.h>
+
 DECLARE_STATIC_KEY_FALSE(uaccess_flush_key);
 
 #ifdef CONFIG_PPC_KUAP
-- 
cgit v1.2.3-70-g09d2


From 1179f170b6f0af7bb0b3b7628136eaac450ddf31 Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Fri, 20 Nov 2020 14:17:52 +0100
Subject: s390: fix fpu restore in entry.S

We need to disable interrupts in load_fpu_regs(). Otherwise an
interrupt might come in after the registers are loaded, but before
CIF_FPU is cleared in load_fpu_regs(). When the interrupt returns,
CIF_FPU will be cleared and the registers will never be restored.

The entry.S code usually saves the interrupt state in __SF_EMPTY on the
stack when disabling/restoring interrupts. sie64a however saves the pointer
to the sie control block in __SF_SIE_CONTROL, which references the same
location.  This is non-obvious to the reader. To avoid thrashing the sie
control block pointer in load_fpu_regs(), move the __SIE_* offsets eight
bytes after __SF_EMPTY on the stack.

Cc: <stable@vger.kernel.org> # 5.8
Fixes: 0b0ed657fe00 ("s390: remove critical section cleanup from entry.S")
Reported-by: Pierre Morel <pmorel@linux.ibm.com>
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/asm-offsets.c | 10 +++++-----
 arch/s390/kernel/entry.S       |  2 ++
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 2012c1cf0853..483051e10db3 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -53,11 +53,11 @@ int main(void)
 	/* stack_frame offsets */
 	OFFSET(__SF_BACKCHAIN, stack_frame, back_chain);
 	OFFSET(__SF_GPRS, stack_frame, gprs);
-	OFFSET(__SF_EMPTY, stack_frame, empty1);
-	OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[0]);
-	OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[1]);
-	OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]);
-	OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[3]);
+	OFFSET(__SF_EMPTY, stack_frame, empty1[0]);
+	OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[1]);
+	OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[2]);
+	OFFSET(__SF_SIE_REASON, stack_frame, empty1[3]);
+	OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[4]);
 	BLANK();
 	OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val);
 	BLANK();
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 5346545b9860..26bb0603c5a1 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -1068,6 +1068,7 @@ EXPORT_SYMBOL(save_fpu_regs)
  *	%r4
  */
 load_fpu_regs:
+	stnsm	__SF_EMPTY(%r15),0xfc
 	lg	%r4,__LC_CURRENT
 	aghi	%r4,__TASK_thread
 	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
@@ -1099,6 +1100,7 @@ load_fpu_regs:
 .Lload_fpu_regs_done:
 	ni	__LC_CPU_FLAGS+7,255-_CIF_FPU
 .Lload_fpu_regs_exit:
+	ssm	__SF_EMPTY(%r15)
 	BR_EX	%r14
 .Lload_fpu_regs_end:
 ENDPROC(load_fpu_regs)
-- 
cgit v1.2.3-70-g09d2


From 03659efe4287230b1d65b31c993708f335c8de82 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 22 Nov 2020 20:45:10 -0800
Subject: arm64/fpsimd: add <asm/insn.h> to <asm/kprobes.h> to fix fpsimd build

Adding <asm/exception.h> brought in <asm/kprobes.h> which uses
<asm/probes.h>, which uses 'pstate_check_t' so the latter needs to
#include <asm/insn.h> for this typedef.

Fixes this build error:

  In file included from arch/arm64/include/asm/kprobes.h:24,
                    from arch/arm64/include/asm/exception.h:11,
                    from arch/arm64/kernel/fpsimd.c:35:
  arch/arm64/include/asm/probes.h:16:2: error: unknown type name 'pstate_check_t'
      16 |  pstate_check_t *pstate_cc;

Fixes: c6b90d5cf637 ("arm64/fpsimd: Fix missing-prototypes in fpsimd.c")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Will Deacon <will@kernel.org>
Cc: Tian Tao <tiantao6@hisilicon.com>
Link: https://lore.kernel.org/r/20201123044510.9942-1-rdunlap@infradead.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/probes.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h
index 4266262101fe..006946745352 100644
--- a/arch/arm64/include/asm/probes.h
+++ b/arch/arm64/include/asm/probes.h
@@ -7,6 +7,8 @@
 #ifndef _ARM_PROBES_H
 #define _ARM_PROBES_H
 
+#include <asm/insn.h>
+
 typedef u32 probe_opcode_t;
 typedef void (probes_handler_t) (u32 opcode, long addr, struct pt_regs *);
 
-- 
cgit v1.2.3-70-g09d2


From 07509e10dcc77627f8b6a57381e878fe269958d3 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 20 Nov 2020 13:28:01 +0000
Subject: arm64: pgtable: Fix pte_accessible()

pte_accessible() is used by ptep_clear_flush() to figure out whether TLB
invalidation is necessary when unmapping pages for reclaim. Although our
implementation is correct according to the architecture, returning true
only for valid, young ptes in the absence of racing page-table
modifications, this is in fact flawed due to lazy invalidation of old
ptes in ptep_clear_flush_young() where we elide the expensive DSB
instruction for completing the TLB invalidation.

Rather than penalise the aging path, adjust pte_accessible() to return
true for any valid pte, even if the access flag is cleared.

Cc: <stable@vger.kernel.org>
Fixes: 76c714be0e5e ("arm64: pgtable: implement pte_accessible()")
Reported-by: Yu Zhao <yuzhao@google.com>
Acked-by: Yu Zhao <yuzhao@google.com>
Reviewed-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20201120143557.6715-2-will@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/pgtable.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 4ff12a7adcfd..326c34677e86 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -115,8 +115,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))
 #define pte_valid_not_user(pte) \
 	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
-#define pte_valid_young(pte) \
-	((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF))
 #define pte_valid_user(pte) \
 	((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
 
@@ -124,9 +122,12 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
  * Could the pte be present in the TLB? We must check mm_tlb_flush_pending
  * so that we don't erroneously return false for pages that have been
  * remapped as PROT_NONE but are yet to be flushed from the TLB.
+ * Note that we can't make any assumptions based on the state of the access
+ * flag, since ptep_clear_flush_young() elides a DSB when invalidating the
+ * TLB.
  */
 #define pte_accessible(mm, pte)	\
-	(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte))
+	(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
 
 /*
  * p??_access_permitted() is true for valid user mappings (subject to the
-- 
cgit v1.2.3-70-g09d2


From ff1712f953e27f0b0718762ec17d0adb15c9fd0b Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 20 Nov 2020 13:57:48 +0000
Subject: arm64: pgtable: Ensure dirty bit is preserved across pte_wrprotect()

With hardware dirty bit management, calling pte_wrprotect() on a writable,
dirty PTE will lose the dirty state and return a read-only, clean entry.

Move the logic from ptep_set_wrprotect() into pte_wrprotect() to ensure that
the dirty bit is preserved for writable entries, as this is required for
soft-dirty bit management if we enable it in the future.

Cc: <stable@vger.kernel.org>
Fixes: 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits")
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20201120143557.6715-3-will@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/pgtable.h | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 326c34677e86..5628289b9d5e 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -165,13 +165,6 @@ static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot)
 	return pmd;
 }
 
-static inline pte_t pte_wrprotect(pte_t pte)
-{
-	pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
-	pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
-	return pte;
-}
-
 static inline pte_t pte_mkwrite(pte_t pte)
 {
 	pte = set_pte_bit(pte, __pgprot(PTE_WRITE));
@@ -197,6 +190,20 @@ static inline pte_t pte_mkdirty(pte_t pte)
 	return pte;
 }
 
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	/*
+	 * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
+	 * clear), set the PTE_DIRTY bit.
+	 */
+	if (pte_hw_dirty(pte))
+		pte = pte_mkdirty(pte);
+
+	pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
+	pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
+	return pte;
+}
+
 static inline pte_t pte_mkold(pte_t pte)
 {
 	return clear_pte_bit(pte, __pgprot(PTE_AF));
@@ -846,12 +853,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 	pte = READ_ONCE(*ptep);
 	do {
 		old_pte = pte;
-		/*
-		 * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
-		 * clear), set the PTE_DIRTY bit.
-		 */
-		if (pte_hw_dirty(pte))
-			pte = pte_mkdirty(pte);
 		pte = pte_wrprotect(pte);
 		pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
 					       pte_val(old_pte), pte_val(pte));
-- 
cgit v1.2.3-70-g09d2


From fd8d9db3559a29fd737bcdb7c4fcbe1940caae34 Mon Sep 17 00:00:00 2001
From: Xiaochen Shen <xiaochen.shen@intel.com>
Date: Sat, 31 Oct 2020 03:10:53 +0800
Subject: x86/resctrl: Remove superfluous kernfs_get() calls to prevent
 refcount leak

Willem reported growing of kernfs_node_cache entries in slabtop when
repeatedly creating and removing resctrl subdirectories as well as when
repeatedly mounting and unmounting the resctrl filesystem.

On resource group (control as well as monitoring) creation via a mkdir
an extra kernfs_node reference is obtained to ensure that the rdtgroup
structure remains accessible for the rdtgroup_kn_unlock() calls where it
is removed on deletion. The kernfs_node reference count is dropped by
kernfs_put() in rdtgroup_kn_unlock().

With the above explaining the need for one kernfs_get()/kernfs_put()
pair in resctrl there are more places where a kernfs_node reference is
obtained without a corresponding release. The excessive amount of
reference count on kernfs nodes will never be dropped to 0 and the
kernfs nodes will never be freed in the call paths of rmdir and umount.
It leads to reference count leak and kernfs_node_cache memory leak.

Remove the superfluous kernfs_get() calls and expand the existing
comments surrounding the remaining kernfs_get()/kernfs_put() pair that
remains in use.

Superfluous kernfs_get() calls are removed from two areas:

  (1) In call paths of mount and mkdir, when kernfs nodes for "info",
  "mon_groups" and "mon_data" directories and sub-directories are
  created, the reference count of newly created kernfs node is set to 1.
  But after kernfs_create_dir() returns, superfluous kernfs_get() are
  called to take an additional reference.

  (2) kernfs_get() calls in rmdir call paths.

Fixes: 17eafd076291 ("x86/intel_rdt: Split resource group removal in two")
Fixes: 4af4a88e0c92 ("x86/intel_rdt/cqm: Add mount,umount support")
Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support")
Fixes: d89b7379015f ("x86/intel_rdt/cqm: Add mon_data")
Fixes: c7d9aac61311 ("x86/intel_rdt/cqm: Add mkdir support for RDT monitoring")
Fixes: 5dc1d5c6bac2 ("x86/intel_rdt: Simplify info and base file lists")
Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system")
Fixes: 4e978d06dedb ("x86/intel_rdt: Add "info" files to resctrl file system")
Reported-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Willem de Bruijn <willemb@google.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/1604085053-31639-1-git-send-email-xiaochen.shen@intel.com
---
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 35 ++--------------------------------
 1 file changed, 2 insertions(+), 33 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index af323e2e3100..2ab1266a5f14 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -1769,7 +1769,6 @@ static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name,
 	if (IS_ERR(kn_subdir))
 		return PTR_ERR(kn_subdir);
 
-	kernfs_get(kn_subdir);
 	ret = rdtgroup_kn_set_ugid(kn_subdir);
 	if (ret)
 		return ret;
@@ -1792,7 +1791,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
 	kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
 	if (IS_ERR(kn_info))
 		return PTR_ERR(kn_info);
-	kernfs_get(kn_info);
 
 	ret = rdtgroup_add_files(kn_info, RF_TOP_INFO);
 	if (ret)
@@ -1813,12 +1811,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
 			goto out_destroy;
 	}
 
-	/*
-	 * This extra ref will be put in kernfs_remove() and guarantees
-	 * that @rdtgrp->kn is always accessible.
-	 */
-	kernfs_get(kn_info);
-
 	ret = rdtgroup_kn_set_ugid(kn_info);
 	if (ret)
 		goto out_destroy;
@@ -1847,12 +1839,6 @@ mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
 	if (dest_kn)
 		*dest_kn = kn;
 
-	/*
-	 * This extra ref will be put in kernfs_remove() and guarantees
-	 * that @rdtgrp->kn is always accessible.
-	 */
-	kernfs_get(kn);
-
 	ret = rdtgroup_kn_set_ugid(kn);
 	if (ret)
 		goto out_destroy;
@@ -2139,13 +2125,11 @@ static int rdt_get_tree(struct fs_context *fc)
 					  &kn_mongrp);
 		if (ret < 0)
 			goto out_info;
-		kernfs_get(kn_mongrp);
 
 		ret = mkdir_mondata_all(rdtgroup_default.kn,
 					&rdtgroup_default, &kn_mondata);
 		if (ret < 0)
 			goto out_mongrp;
-		kernfs_get(kn_mondata);
 		rdtgroup_default.mon.mon_data_kn = kn_mondata;
 	}
 
@@ -2499,11 +2483,6 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
 	if (IS_ERR(kn))
 		return PTR_ERR(kn);
 
-	/*
-	 * This extra ref will be put in kernfs_remove() and guarantees
-	 * that kn is always accessible.
-	 */
-	kernfs_get(kn);
 	ret = rdtgroup_kn_set_ugid(kn);
 	if (ret)
 		goto out_destroy;
@@ -2838,8 +2817,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 	/*
 	 * kernfs_remove() will drop the reference count on "kn" which
 	 * will free it. But we still need it to stick around for the
-	 * rdtgroup_kn_unlock(kn} call below. Take one extra reference
-	 * here, which will be dropped inside rdtgroup_kn_unlock().
+	 * rdtgroup_kn_unlock(kn) call. Take one extra reference here,
+	 * which will be dropped inside rdtgroup_kn_unlock().
 	 */
 	kernfs_get(kn);
 
@@ -3049,11 +3028,6 @@ static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
 	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
 	list_del(&rdtgrp->mon.crdtgrp_list);
 
-	/*
-	 * one extra hold on this, will drop when we kfree(rdtgrp)
-	 * in rdtgroup_kn_unlock()
-	 */
-	kernfs_get(kn);
 	kernfs_remove(rdtgrp->kn);
 
 	return 0;
@@ -3065,11 +3039,6 @@ static int rdtgroup_ctrl_remove(struct kernfs_node *kn,
 	rdtgrp->flags = RDT_DELETED;
 	list_del(&rdtgrp->rdtgroup_list);
 
-	/*
-	 * one extra hold on this, will drop when we kfree(rdtgrp)
-	 * in rdtgroup_kn_unlock()
-	 */
-	kernfs_get(kn);
 	kernfs_remove(rdtgrp->kn);
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 758999246965eeb8b253d47e72f7bfe508804b16 Mon Sep 17 00:00:00 2001
From: Xiaochen Shen <xiaochen.shen@intel.com>
Date: Sat, 31 Oct 2020 03:11:28 +0800
Subject: x86/resctrl: Add necessary kernfs_put() calls to prevent refcount
 leak

On resource group creation via a mkdir an extra kernfs_node reference is
obtained by kernfs_get() to ensure that the rdtgroup structure remains
accessible for the rdtgroup_kn_unlock() calls where it is removed on
deletion. Currently the extra kernfs_node reference count is only
dropped by kernfs_put() in rdtgroup_kn_unlock() while the rdtgroup
structure is removed in a few other locations that lack the matching
reference drop.

In call paths of rmdir and umount, when a control group is removed,
kernfs_remove() is called to remove the whole kernfs nodes tree of the
control group (including the kernfs nodes trees of all child monitoring
groups), and then rdtgroup structure is freed by kfree(). The rdtgroup
structures of all child monitoring groups under the control group are
freed by kfree() in free_all_child_rdtgrp().

Before calling kfree() to free the rdtgroup structures, the kernfs node
of the control group itself as well as the kernfs nodes of all child
monitoring groups still take the extra references which will never be
dropped to 0 and the kernfs nodes will never be freed. It leads to
reference count leak and kernfs_node_cache memory leak.

For example, reference count leak is observed in these two cases:
  (1) mount -t resctrl resctrl /sys/fs/resctrl
      mkdir /sys/fs/resctrl/c1
      mkdir /sys/fs/resctrl/c1/mon_groups/m1
      umount /sys/fs/resctrl

  (2) mkdir /sys/fs/resctrl/c1
      mkdir /sys/fs/resctrl/c1/mon_groups/m1
      rmdir /sys/fs/resctrl/c1

The same reference count leak issue also exists in the error exit paths
of mkdir in mkdir_rdt_prepare() and rdtgroup_mkdir_ctrl_mon().

Fix this issue by following changes to make sure the extra kernfs_node
reference on rdtgroup is dropped before freeing the rdtgroup structure.
  (1) Introduce rdtgroup removal helper rdtgroup_remove() to wrap up
  kernfs_put() and kfree().

  (2) Call rdtgroup_remove() in rdtgroup removal path where the rdtgroup
  structure is about to be freed by kfree().

  (3) Call rdtgroup_remove() or kernfs_put() as appropriate in the error
  exit paths of mkdir where an extra reference is taken by kernfs_get().

Fixes: f3cbeacaa06e ("x86/intel_rdt/cqm: Add rmdir support")
Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files")
Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system")
Reported-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/1604085088-31707-1-git-send-email-xiaochen.shen@intel.com
---
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 2ab1266a5f14..6f4ca4bea625 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -507,6 +507,24 @@ unlock:
 	return ret ?: nbytes;
 }
 
+/**
+ * rdtgroup_remove - the helper to remove resource group safely
+ * @rdtgrp: resource group to remove
+ *
+ * On resource group creation via a mkdir, an extra kernfs_node reference is
+ * taken to ensure that the rdtgroup structure remains accessible for the
+ * rdtgroup_kn_unlock() calls where it is removed.
+ *
+ * Drop the extra reference here, then free the rdtgroup structure.
+ *
+ * Return: void
+ */
+static void rdtgroup_remove(struct rdtgroup *rdtgrp)
+{
+	kernfs_put(rdtgrp->kn);
+	kfree(rdtgrp);
+}
+
 struct task_move_callback {
 	struct callback_head	work;
 	struct rdtgroup		*rdtgrp;
@@ -529,7 +547,7 @@ static void move_myself(struct callback_head *head)
 	    (rdtgrp->flags & RDT_DELETED)) {
 		current->closid = 0;
 		current->rmid = 0;
-		kfree(rdtgrp);
+		rdtgroup_remove(rdtgrp);
 	}
 
 	if (unlikely(current->flags & PF_EXITING))
@@ -2065,8 +2083,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)
 		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
 			rdtgroup_pseudo_lock_remove(rdtgrp);
 		kernfs_unbreak_active_protection(kn);
-		kernfs_put(rdtgrp->kn);
-		kfree(rdtgrp);
+		rdtgroup_remove(rdtgrp);
 	} else {
 		kernfs_unbreak_active_protection(kn);
 	}
@@ -2341,7 +2358,7 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
 		if (atomic_read(&sentry->waitcount) != 0)
 			sentry->flags = RDT_DELETED;
 		else
-			kfree(sentry);
+			rdtgroup_remove(sentry);
 	}
 }
 
@@ -2383,7 +2400,7 @@ static void rmdir_all_sub(void)
 		if (atomic_read(&rdtgrp->waitcount) != 0)
 			rdtgrp->flags = RDT_DELETED;
 		else
-			kfree(rdtgrp);
+			rdtgroup_remove(rdtgrp);
 	}
 	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
 	update_closid_rmid(cpu_online_mask, &rdtgroup_default);
@@ -2818,7 +2835,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 	 * kernfs_remove() will drop the reference count on "kn" which
 	 * will free it. But we still need it to stick around for the
 	 * rdtgroup_kn_unlock(kn) call. Take one extra reference here,
-	 * which will be dropped inside rdtgroup_kn_unlock().
+	 * which will be dropped by kernfs_put() in rdtgroup_remove().
 	 */
 	kernfs_get(kn);
 
@@ -2859,6 +2876,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 out_idfree:
 	free_rmid(rdtgrp->mon.rmid);
 out_destroy:
+	kernfs_put(rdtgrp->kn);
 	kernfs_remove(rdtgrp->kn);
 out_free_rgrp:
 	kfree(rdtgrp);
@@ -2871,7 +2889,7 @@ static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
 {
 	kernfs_remove(rgrp->kn);
 	free_rmid(rgrp->mon.rmid);
-	kfree(rgrp);
+	rdtgroup_remove(rgrp);
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From a7361b9c4615951f52ffd2b1afa09a1384c7b4e4 Mon Sep 17 00:00:00 2001
From: Adam Sampson <ats@offog.org>
Date: Mon, 23 Nov 2020 17:47:39 +0000
Subject: ARM: dts: sun7i: pcduino3-nano: enable RGMII RX/TX delay on PHY

The RX/TX delays for the Ethernet PHY on the Linksprite pcDuino 3 Nano
are configured in hardware, using resistors that are populated to pull
the RTL8211E's RXDLY/TXDLY pins low or high as needed.

phy-mode should be set to rgmii-id to reflect this. Previously it was
set to rgmii, which used to work but now results in the delays being
disabled again as a result of the bugfix in commit bbc4d71d6354 ("net:
phy: realtek: fix rtl8211e rx/tx delay config").

Tested on two pcDuino 3 Nano boards purchased in 2015. Without this fix,
Ethernet works unreliably on one board and doesn't work at all on the
other.

Fixes: 061035d456c9 ("ARM: dts: sun7i: Add dts file for pcDuino 3 Nano board")
Signed-off-by: Adam Sampson <ats@offog.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Link: https://lore.kernel.org/r/20201123174739.6809-1-ats@offog.org
---
 arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts b/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts
index fce2f7fcd084..bf38c66c1815 100644
--- a/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts
+++ b/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015 Adam Sampson <ats@offog.org>
+ * Copyright 2015-2020 Adam Sampson <ats@offog.org>
  *
  * This file is dual-licensed: you can use it either under the terms
  * of the GPL or the X11 license, at your option. Note that this dual
@@ -115,7 +115,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&gmac_rgmii_pins>;
 	phy-handle = <&phy1>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	status = "okay";
 };
 
-- 
cgit v1.2.3-70-g09d2


From 58c644ba512cfbc2e39b758dd979edd1d6d00e27 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 20 Nov 2020 11:50:35 +0100
Subject: sched/idle: Fix arch_cpu_idle() vs tracing

We call arch_cpu_idle() with RCU disabled, but then use
local_irq_{en,dis}able(), which invokes tracing, which relies on RCU.

Switch all arch_cpu_idle() implementations to use
raw_local_irq_{en,dis}able() and carefully manage the
lockdep,rcu,tracing state like we do in entry.

(XXX: we really should change arch_cpu_idle() to not return with
interrupts enabled)

Reported-by: Sven Schnelle <svens@linux.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lkml.kernel.org/r/20201120114925.594122626@infradead.org
---
 arch/alpha/kernel/process.c      |  2 +-
 arch/arm/kernel/process.c        |  2 +-
 arch/arm64/kernel/process.c      |  2 +-
 arch/csky/kernel/process.c       |  2 +-
 arch/h8300/kernel/process.c      |  2 +-
 arch/hexagon/kernel/process.c    |  2 +-
 arch/ia64/kernel/process.c       |  2 +-
 arch/microblaze/kernel/process.c |  2 +-
 arch/mips/kernel/idle.c          | 12 ++++++------
 arch/nios2/kernel/process.c      |  2 +-
 arch/openrisc/kernel/process.c   |  2 +-
 arch/parisc/kernel/process.c     |  2 +-
 arch/powerpc/kernel/idle.c       |  4 ++--
 arch/riscv/kernel/process.c      |  2 +-
 arch/s390/kernel/idle.c          |  6 +++---
 arch/sh/kernel/idle.c            |  2 +-
 arch/sparc/kernel/leon_pmc.c     |  4 ++--
 arch/sparc/kernel/process_32.c   |  2 +-
 arch/sparc/kernel/process_64.c   |  4 ++--
 arch/um/kernel/process.c         |  2 +-
 arch/x86/include/asm/mwait.h     |  2 --
 arch/x86/kernel/process.c        | 12 +++++++-----
 kernel/sched/idle.c              | 28 +++++++++++++++++++++++++++-
 23 files changed, 64 insertions(+), 38 deletions(-)

(limited to 'arch')

diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 7462a7911002..4c7b0414a3ff 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -57,7 +57,7 @@ EXPORT_SYMBOL(pm_power_off);
 void arch_cpu_idle(void)
 {
 	wtint(0);
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 void arch_cpu_idle_dead(void)
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 8e6ace03e960..9f199b1e8383 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -71,7 +71,7 @@ void arch_cpu_idle(void)
 		arm_pm_idle();
 	else
 		cpu_do_idle();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 void arch_cpu_idle_prepare(void)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 4784011cecac..9ebe02574127 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -126,7 +126,7 @@ void arch_cpu_idle(void)
 	 * tricks
 	 */
 	cpu_do_idle();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c
index f730869e21ee..69af6bc87e64 100644
--- a/arch/csky/kernel/process.c
+++ b/arch/csky/kernel/process.c
@@ -102,6 +102,6 @@ void arch_cpu_idle(void)
 #ifdef CONFIG_CPU_PM_STOP
 	asm volatile("stop\n");
 #endif
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 #endif
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index aea0a40b77a9..bc1364db58fe 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -57,7 +57,7 @@ asmlinkage void ret_from_kernel_thread(void);
  */
 void arch_cpu_idle(void)
 {
-	local_irq_enable();
+	raw_local_irq_enable();
 	__asm__("sleep");
 }
 
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index 5a0a95d93ddb..67767c5ed98c 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -44,7 +44,7 @@ void arch_cpu_idle(void)
 {
 	__vmwait();
 	/*  interrupts wake us up, but irqs are still disabled */
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /*
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 6b61a703bcf5..c9ff8796b509 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -239,7 +239,7 @@ void arch_cpu_idle(void)
 	if (mark_idle)
 		(*mark_idle)(1);
 
-	safe_halt();
+	raw_safe_halt();
 
 	if (mark_idle)
 		(*mark_idle)(0);
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index a9e46e525cd0..f99860771ff4 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -149,5 +149,5 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpregs)
 
 void arch_cpu_idle(void)
 {
-       local_irq_enable();
+       raw_local_irq_enable();
 }
diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index 5bc3b04693c7..18e69ebf5691 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -33,19 +33,19 @@ static void __cpuidle r3081_wait(void)
 {
 	unsigned long cfg = read_c0_conf();
 	write_c0_conf(cfg | R30XX_CONF_HALT);
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 static void __cpuidle r39xx_wait(void)
 {
 	if (!need_resched())
 		write_c0_conf(read_c0_conf() | TX39_CONF_HALT);
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 void __cpuidle r4k_wait(void)
 {
-	local_irq_enable();
+	raw_local_irq_enable();
 	__r4k_wait();
 }
 
@@ -64,7 +64,7 @@ void __cpuidle r4k_wait_irqoff(void)
 		"	.set	arch=r4000	\n"
 		"	wait			\n"
 		"	.set	pop		\n");
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /*
@@ -84,7 +84,7 @@ static void __cpuidle rm7k_wait_irqoff(void)
 		"	wait						\n"
 		"	mtc0	$1, $12		# stalls until W stage	\n"
 		"	.set	pop					\n");
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /*
@@ -257,7 +257,7 @@ void arch_cpu_idle(void)
 	if (cpu_wait)
 		cpu_wait();
 	else
-		local_irq_enable();
+		raw_local_irq_enable();
 }
 
 #ifdef CONFIG_CPU_IDLE
diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
index 4ffe857e6ada..50b4eb19a6cc 100644
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -33,7 +33,7 @@ EXPORT_SYMBOL(pm_power_off);
 
 void arch_cpu_idle(void)
 {
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /*
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 0ff391f00334..3c98728cce24 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -79,7 +79,7 @@ void machine_power_off(void)
  */
 void arch_cpu_idle(void)
 {
-	local_irq_enable();
+	raw_local_irq_enable();
 	if (mfspr(SPR_UPR) & SPR_UPR_PMP)
 		mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME);
 }
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index f196d96e2f9f..a92a23d6acd9 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -169,7 +169,7 @@ void __cpuidle arch_cpu_idle_dead(void)
 
 void __cpuidle arch_cpu_idle(void)
 {
-	local_irq_enable();
+	raw_local_irq_enable();
 
 	/* nop on real hardware, qemu will idle sleep. */
 	asm volatile("or %%r10,%%r10,%%r10\n":::);
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index ae0e2632393d..1f835539fda4 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -52,9 +52,9 @@ void arch_cpu_idle(void)
 		 * interrupts enabled, some don't.
 		 */
 		if (irqs_disabled())
-			local_irq_enable();
+			raw_local_irq_enable();
 	} else {
-		local_irq_enable();
+		raw_local_irq_enable();
 		/*
 		 * Go into low thread priority and possibly
 		 * low power mode.
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 19225ec65db6..dd5f985b1f40 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -36,7 +36,7 @@ extern asmlinkage void ret_from_kernel_thread(void);
 void arch_cpu_idle(void)
 {
 	wait_for_interrupt();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 void show_regs(struct pt_regs *regs)
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index f7f1e64e0d98..2b85096964f8 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -33,10 +33,10 @@ void enabled_wait(void)
 		PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
 	clear_cpu_flag(CIF_NOHZ_DELAY);
 
-	local_irq_save(flags);
+	raw_local_irq_save(flags);
 	/* Call the assembler magic in entry.S */
 	psw_idle(idle, psw_mask);
-	local_irq_restore(flags);
+	raw_local_irq_restore(flags);
 
 	/* Account time spent with enabled wait psw loaded as idle time. */
 	raw_write_seqcount_begin(&idle->seqcount);
@@ -123,7 +123,7 @@ void arch_cpu_idle_enter(void)
 void arch_cpu_idle(void)
 {
 	enabled_wait();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 void arch_cpu_idle_exit(void)
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
index 0dc0f52f9bb8..f59814983bd5 100644
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -22,7 +22,7 @@ static void (*sh_idle)(void);
 void default_idle(void)
 {
 	set_bl_bit();
-	local_irq_enable();
+	raw_local_irq_enable();
 	/* Isn't this racy ? */
 	cpu_sleep();
 	clear_bl_bit();
diff --git a/arch/sparc/kernel/leon_pmc.c b/arch/sparc/kernel/leon_pmc.c
index 065e2d4b7290..396f46bca52e 100644
--- a/arch/sparc/kernel/leon_pmc.c
+++ b/arch/sparc/kernel/leon_pmc.c
@@ -50,7 +50,7 @@ static void pmc_leon_idle_fixup(void)
 	register unsigned int address = (unsigned int)leon3_irqctrl_regs;
 
 	/* Interrupts need to be enabled to not hang the CPU */
-	local_irq_enable();
+	raw_local_irq_enable();
 
 	__asm__ __volatile__ (
 		"wr	%%g0, %%asr19\n"
@@ -66,7 +66,7 @@ static void pmc_leon_idle_fixup(void)
 static void pmc_leon_idle(void)
 {
 	/* Interrupts need to be enabled to not hang the CPU */
-	local_irq_enable();
+	raw_local_irq_enable();
 
 	/* For systems without power-down, this will be no-op */
 	__asm__ __volatile__ ("wr	%g0, %asr19\n\t");
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index adfcaeab3ddc..a02363735915 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -74,7 +74,7 @@ void arch_cpu_idle(void)
 {
 	if (sparc_idle)
 		(*sparc_idle)();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index a75093b993f9..6f8c7822fc06 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -62,11 +62,11 @@ void arch_cpu_idle(void)
 {
 	if (tlb_type != hypervisor) {
 		touch_nmi_watchdog();
-		local_irq_enable();
+		raw_local_irq_enable();
 	} else {
 		unsigned long pstate;
 
-		local_irq_enable();
+		raw_local_irq_enable();
 
                 /* The sun4v sleeping code requires that we have PSTATE.IE cleared over
                  * the cpu sleep hypervisor call.
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 3bed09538dd9..9505a7e87396 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -217,7 +217,7 @@ void arch_cpu_idle(void)
 {
 	cpu_tasks[current_thread_info()->cpu].pid = os_getpid();
 	um_idle_sleep();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 int __cant_sleep(void) {
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index e039a933aca3..29dd27b5a339 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -88,8 +88,6 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx,
 
 static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
 {
-	trace_hardirqs_on();
-
 	mds_idle_clear_cpu_buffers();
 	/* "mwait %eax, %ecx;" */
 	asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ba4593a913fa..145a7ac0c19a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -685,7 +685,7 @@ void arch_cpu_idle(void)
  */
 void __cpuidle default_idle(void)
 {
-	safe_halt();
+	raw_safe_halt();
 }
 #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)
 EXPORT_SYMBOL(default_idle);
@@ -736,6 +736,8 @@ void stop_this_cpu(void *dummy)
 /*
  * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power
  * states (local apic timer and TSC stop).
+ *
+ * XXX this function is completely buggered vs RCU and tracing.
  */
 static void amd_e400_idle(void)
 {
@@ -757,9 +759,9 @@ static void amd_e400_idle(void)
 	 * The switch back from broadcast mode needs to be called with
 	 * interrupts disabled.
 	 */
-	local_irq_disable();
+	raw_local_irq_disable();
 	tick_broadcast_exit();
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /*
@@ -801,9 +803,9 @@ static __cpuidle void mwait_idle(void)
 		if (!need_resched())
 			__sti_mwait(0, 0);
 		else
-			local_irq_enable();
+			raw_local_irq_enable();
 	} else {
-		local_irq_enable();
+		raw_local_irq_enable();
 	}
 	__current_clr_polling();
 }
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 24d0ee26377d..c6932b8f4467 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -78,7 +78,7 @@ void __weak arch_cpu_idle_dead(void) { }
 void __weak arch_cpu_idle(void)
 {
 	cpu_idle_force_poll = 1;
-	local_irq_enable();
+	raw_local_irq_enable();
 }
 
 /**
@@ -94,9 +94,35 @@ void __cpuidle default_idle_call(void)
 
 		trace_cpu_idle(1, smp_processor_id());
 		stop_critical_timings();
+
+		/*
+		 * arch_cpu_idle() is supposed to enable IRQs, however
+		 * we can't do that because of RCU and tracing.
+		 *
+		 * Trace IRQs enable here, then switch off RCU, and have
+		 * arch_cpu_idle() use raw_local_irq_enable(). Note that
+		 * rcu_idle_enter() relies on lockdep IRQ state, so switch that
+		 * last -- this is very similar to the entry code.
+		 */
+		trace_hardirqs_on_prepare();
+		lockdep_hardirqs_on_prepare(_THIS_IP_);
 		rcu_idle_enter();
+		lockdep_hardirqs_on(_THIS_IP_);
+
 		arch_cpu_idle();
+
+		/*
+		 * OK, so IRQs are enabled here, but RCU needs them disabled to
+		 * turn itself back on.. funny thing is that disabling IRQs
+		 * will cause tracing, which needs RCU. Jump through hoops to
+		 * make it 'work'.
+		 */
+		raw_local_irq_disable();
+		lockdep_hardirqs_off(_THIS_IP_);
 		rcu_idle_exit();
+		lockdep_hardirqs_on(_THIS_IP_);
+		raw_local_irq_enable();
+
 		start_critical_timings();
 		trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
 	}
-- 
cgit v1.2.3-70-g09d2


From e2be2a833ab5338fa5b8b99ba622b911d96f1795 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Wed, 25 Nov 2020 09:41:24 +0800
Subject: x86/tboot: Don't disable swiotlb when iommu is forced on

After commit 327d5b2fee91c ("iommu/vt-d: Allow 32bit devices to uses DMA
domain"), swiotlb could also be used for direct memory access if IOMMU
is enabled but a device is configured to pass through the DMA translation.
Keep swiotlb when IOMMU is forced on, otherwise, some devices won't work
if "iommu=pt" kernel parameter is used.

Fixes: 327d5b2fee91 ("iommu/vt-d: Allow 32bit devices to uses DMA domain")
Reported-and-tested-by: Adrian Huang <ahuang12@lenovo.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20201125014124.4070776-1-baolu.lu@linux.intel.com
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=210237
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/x86/kernel/tboot.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 420be871d9d4..ae64f98ec2ab 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -514,13 +514,10 @@ int tboot_force_iommu(void)
 	if (!tboot_enabled())
 		return 0;
 
-	if (no_iommu || swiotlb || dmar_disabled)
+	if (no_iommu || dmar_disabled)
 		pr_warn("Forcing Intel-IOMMU to enabled\n");
 
 	dmar_disabled = 0;
-#ifdef CONFIG_SWIOTLB
-	swiotlb = 0;
-#endif
 	no_iommu = 0;
 
 	return 1;
-- 
cgit v1.2.3-70-g09d2


From fb319496935b7475a863a00c76895e8bb3216704 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Mon, 16 Nov 2020 16:20:26 +0000
Subject: arm64: tegra: Disable the ACONNECT for Jetson TX2

Commit ff4c371d2bc0 ("arm64: defconfig: Build ADMA and ACONNECT driver")
enable the Tegra ADMA and ACONNECT drivers and this is causing resume
from system suspend to fail on Jetson TX2. Resume is failing because the
ACONNECT driver is being resumed before the BPMP driver, and the ACONNECT
driver is attempting to power on a power-domain that is provided by the
BPMP. While a proper fix for the resume sequencing problem is identified,
disable the ACONNECT for Jetson TX2 temporarily to avoid breaking system
suspend.

Please note that ACONNECT driver is used by the Audio Processing Engine
(APE) on Tegra, but because there is no mainline support for APE on
Jetson TX2 currently, disabling the ACONNECT does not disable any useful
feature at the moment.

Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts
index 381a84912ba8..c28d51cc5797 100644
--- a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts
+++ b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts
@@ -10,18 +10,6 @@
 	model = "NVIDIA Jetson TX2 Developer Kit";
 	compatible = "nvidia,p2771-0000", "nvidia,tegra186";
 
-	aconnect {
-		status = "okay";
-
-		dma-controller@2930000 {
-			status = "okay";
-		};
-
-		interrupt-controller@2a40000 {
-			status = "okay";
-		};
-	};
-
 	i2c@3160000 {
 		power-monitor@42 {
 			compatible = "ti,ina3221";
-- 
cgit v1.2.3-70-g09d2


From 476e23f4c540949ac5ea4fad4f6f6fa0e2d41f42 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Wed, 11 Nov 2020 10:41:17 +0000
Subject: arm64: tegra: Correct the UART for Jetson Xavier NX

The Jetson Xavier NX board routes UARTA to the 40-pin header and UARTC
to a 12-pin debug header. The UARTs can be used by either the Tegra
Combined UART (TCU) driver or the Tegra 8250 driver. By default, the
TCU will use UARTC on Jetson Xavier NX. Currently, device-tree for
Xavier NX enables the TCU and the Tegra 8250 node for UARTC. Fix this
by disabling the Tegra 8250 node for UARTC and enabling the Tegra 8250
node for UARTA.

Fixes: 3f9efbbe57bc ("arm64: tegra: Add support for Jetson Xavier NX")
Cc: stable@vger.kernel.org
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi
index a2893be80507..0dc8304a2edd 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi
@@ -54,7 +54,7 @@
 			status = "okay";
 		};
 
-		serial@c280000 {
+		serial@3100000 {
 			status = "okay";
 		};
 
-- 
cgit v1.2.3-70-g09d2


From f24a2acc15bcc7bbd295f9759efc873b88fbe429 Mon Sep 17 00:00:00 2001
From: JC Kuo <jckuo@nvidia.com>
Date: Thu, 19 Nov 2020 15:23:45 +0800
Subject: arm64: tegra: Fix USB_VBUS_EN0 regulator on Jetson TX1

USB host mode is broken on the OTG port of Jetson TX1 platform because
the USB_VBUS_EN0 regulator (regulator@11) is being overwritten by the
vdd-cam-1v2 regulator. This commit rearranges USB_VBUS_EN0 to be
regulator@14.

Fixes: 257c8047be44 ("arm64: tegra: jetson-tx1: Add camera supplies")
Cc: stable@vger.kernel.org
Signed-off-by: JC Kuo <jckuo@nvidia.com>
Reviewed-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi
index e18e1a9a3011..a9caaf7c0d67 100644
--- a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi
@@ -1663,16 +1663,6 @@
 		vin-supply = <&vdd_5v0_sys>;
 	};
 
-	vdd_usb_vbus_otg: regulator@11 {
-		compatible = "regulator-fixed";
-		regulator-name = "USB_VBUS_EN0";
-		regulator-min-microvolt = <5000000>;
-		regulator-max-microvolt = <5000000>;
-		gpio = <&gpio TEGRA_GPIO(CC, 4) GPIO_ACTIVE_HIGH>;
-		enable-active-high;
-		vin-supply = <&vdd_5v0_sys>;
-	};
-
 	vdd_hdmi: regulator@10 {
 		compatible = "regulator-fixed";
 		regulator-name = "VDD_HDMI_5V0";
@@ -1712,4 +1702,14 @@
 		enable-active-high;
 		vin-supply = <&vdd_3v3_sys>;
 	};
+
+	vdd_usb_vbus_otg: regulator@14 {
+		compatible = "regulator-fixed";
+		regulator-name = "USB_VBUS_EN0";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		gpio = <&gpio TEGRA_GPIO(CC, 4) GPIO_ACTIVE_HIGH>;
+		enable-active-high;
+		vin-supply = <&vdd_5v0_sys>;
+	};
 };
-- 
cgit v1.2.3-70-g09d2


From 1741e18737948c140ccc4cc643e8126d95ee6e79 Mon Sep 17 00:00:00 2001
From: Dipen Patel <dipenp@nvidia.com>
Date: Fri, 11 Sep 2020 19:26:45 -0700
Subject: arm64: tegra: Wrong AON HSP reg property size

The AON HSP node's "reg" property size 0xa0000 will overlap with other
resources. This patch fixes that wrong value with correct size 0x90000.

Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Dipen Patel <dipenp@nvidia.com>
Fixes: a38570c22e9d ("arm64: tegra: Add nodes for TCU on Tegra194")
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 arch/arm64/boot/dts/nvidia/tegra194.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
index e9c90f0f44ff..93438d2b9469 100644
--- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi
@@ -1161,7 +1161,7 @@
 
 		hsp_aon: hsp@c150000 {
 			compatible = "nvidia,tegra194-hsp", "nvidia,tegra186-hsp";
-			reg = <0x0c150000 0xa0000>;
+			reg = <0x0c150000 0x90000>;
 			interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>,
 			             <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>,
 			             <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>,
-- 
cgit v1.2.3-70-g09d2


From d98bccf10dd0f36cabee71a425381fce0908de3b Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Wed, 18 Nov 2020 16:04:58 +0000
Subject: arm64: tegra: Fix Tegra234 VDK node names

When the device-tree board file was added for the Tegra234 VDK simulator
it incorrectly used the names 'cbb' and 'sdhci' instead of 'bus' and
'mmc', respectively. The names 'bus' and 'mmc' are required by the
device-tree json-schema validation tools. Therefore, fix this by
renaming these nodes accordingly.

Fixes: 639448912ba1 ("arm64: tegra: Initial Tegra234 VDK support")
Reported-by: Ashish Singhal <ashishsingha@nvidia.com>
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts b/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts
index f6e6a24829af..b5d9a5526272 100644
--- a/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts
+++ b/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts
@@ -8,7 +8,7 @@
 	compatible = "nvidia,tegra234-vdk", "nvidia,tegra234";
 
 	aliases {
-		sdhci3 = "/cbb@0/sdhci@3460000";
+		mmc3 = "/bus@0/mmc@3460000";
 		serial0 = &uarta;
 	};
 
@@ -17,12 +17,12 @@
 		stdout-path = "serial0:115200n8";
 	};
 
-	cbb@0 {
+	bus@0 {
 		serial@3100000 {
 			status = "okay";
 		};
 
-		sdhci@3460000 {
+		mmc@3460000 {
 			status = "okay";
 			bus-width = <8>;
 			non-removable;
-- 
cgit v1.2.3-70-g09d2


From e553fdc8105ac2ef3f321739da3908bb6673f7de Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Sun, 8 Nov 2020 13:37:37 -0700
Subject: riscv: Explicitly specify the build id style in vDSO Makefile again

Commit a96843372331 ("kbuild: explicitly specify the build id style")
explicitly set the build ID style to SHA1. Commit c2c81bb2f691 ("RISC-V:
Fix the VDSO symbol generaton for binutils-2.35+") undid this change,
likely unintentionally.

Restore it so that the build ID style stays consistent across the tree
regardless of linker.

Fixes: c2c81bb2f691 ("RISC-V: Fix the VDSO symbol generaton for binutils-2.35+")
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Bill Wendling <morbo@google.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/kernel/vdso/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index cb8f9e4cfcbf..0cfd6da784f8 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -44,7 +44,7 @@ SYSCFLAGS_vdso.so.dbg = $(c_flags)
 $(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE
 	$(call if_changed,vdsold)
 SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \
-	-Wl,--build-id -Wl,--hash-style=both
+	-Wl,--build-id=sha1 -Wl,--hash-style=both
 
 # We also create a special relocatable object that should mirror the symbol
 # table and layout of the linked DSO. With ld --just-symbols we can then
-- 
cgit v1.2.3-70-g09d2


From 6134b110f97178d6919441a82dc91a7f3664b4e0 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@wdc.com>
Date: Fri, 6 Nov 2020 13:23:59 +0530
Subject: RISC-V: Add missing jump label initialization

The jump_label_init() should be called from setup_arch() very
early for proper functioning of jump label support.

Fixes: ebc00dde8a97 ("riscv: Add jump-label implementation")
Signed-off-by: Anup Patel <anup.patel@wdc.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/kernel/setup.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index c424cc6dd833..117f3212a8e4 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -75,6 +75,7 @@ void __init setup_arch(char **cmdline_p)
 	*cmdline_p = boot_command_line;
 
 	early_ioremap_setup();
+	jump_label_init();
 	parse_early_param();
 
 	efi_init();
-- 
cgit v1.2.3-70-g09d2


From 30aca1bacb398dec6c1ed5eeca33f355bd7b6203 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 16 Nov 2020 17:39:51 -0800
Subject: RISC-V: fix barrier() use in <vdso/processor.h>

riscv's <vdso/processor.h> uses barrier() so it should include
<asm/barrier.h>

Fixes this build error:
  CC [M]  drivers/net/ethernet/emulex/benet/be_main.o
In file included from ./include/vdso/processor.h:10,
                 from ./arch/riscv/include/asm/processor.h:11,
                 from ./include/linux/prefetch.h:15,
                 from drivers/net/ethernet/emulex/benet/be_main.c:14:
./arch/riscv/include/asm/vdso/processor.h: In function 'cpu_relax':
./arch/riscv/include/asm/vdso/processor.h:14:2: error: implicit declaration of function 'barrier' [-Werror=implicit-function-declaration]
   14 |  barrier();

This happens with a total of 5 networking drivers -- they all use
<linux/prefetch.h>.

rv64 allmodconfig now builds cleanly after this patch.

Fixes fallout from:
815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive")

Fixes: ad5d1122b82f ("riscv: use vDSO common flow to reduce the latency of the time-related functions")
Reported-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Arvind Sankar <nivedita@alum.mit.edu>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/include/asm/vdso/processor.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/riscv/include/asm/vdso/processor.h b/arch/riscv/include/asm/vdso/processor.h
index 82a5693b1861..134388cbaaa1 100644
--- a/arch/riscv/include/asm/vdso/processor.h
+++ b/arch/riscv/include/asm/vdso/processor.h
@@ -4,6 +4,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/barrier.h>
+
 static inline void cpu_relax(void)
 {
 #ifdef __riscv_muldiv
-- 
cgit v1.2.3-70-g09d2


From 33fc379df76b4991e5ae312f07bcd6820811971e Mon Sep 17 00:00:00 2001
From: Anand K Mistry <amistry@google.com>
Date: Tue, 10 Nov 2020 12:33:53 +1100
Subject: x86/speculation: Fix prctl() when
 spectre_v2_user={seccomp,prctl},ibpb

When spectre_v2_user={seccomp,prctl},ibpb is specified on the command
line, IBPB is force-enabled and STIPB is conditionally-enabled (or not
available).

However, since

  21998a351512 ("x86/speculation: Avoid force-disabling IBPB based on STIBP and enhanced IBRS.")

the spectre_v2_user_ibpb variable is set to SPECTRE_V2_USER_{PRCTL,SECCOMP}
instead of SPECTRE_V2_USER_STRICT, which is the actual behaviour.
Because the issuing of IBPB relies on the switch_mm_*_ibpb static
branches, the mitigations behave as expected.

Since

  1978b3a53a74 ("x86/speculation: Allow IBPB to be conditionally enabled on CPUs with always-on STIBP")

this discrepency caused the misreporting of IB speculation via prctl().

On CPUs with STIBP always-on and spectre_v2_user=seccomp,ibpb,
prctl(PR_GET_SPECULATION_CTRL) would return PR_SPEC_PRCTL |
PR_SPEC_ENABLE instead of PR_SPEC_DISABLE since both IBPB and STIPB are
always on. It also allowed prctl(PR_SET_SPECULATION_CTRL) to set the IB
speculation mode, even though the flag is ignored.

Similarly, for CPUs without SMT, prctl(PR_GET_SPECULATION_CTRL) should
also return PR_SPEC_DISABLE since IBPB is always on and STIBP is not
available.

 [ bp: Massage commit message. ]

Fixes: 21998a351512 ("x86/speculation: Avoid force-disabling IBPB based on STIBP and enhanced IBRS.")
Fixes: 1978b3a53a74 ("x86/speculation: Allow IBPB to be conditionally enabled on CPUs with always-on STIBP")
Signed-off-by: Anand K Mistry <amistry@google.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/20201110123349.1.Id0cbf996d2151f4c143c90f9028651a5b49a5908@changeid
---
 arch/x86/kernel/cpu/bugs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 581fb7223ad0..d41b70fe4918 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -739,11 +739,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
 	if (boot_cpu_has(X86_FEATURE_IBPB)) {
 		setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
 
+		spectre_v2_user_ibpb = mode;
 		switch (cmd) {
 		case SPECTRE_V2_USER_CMD_FORCE:
 		case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
 		case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
 			static_branch_enable(&switch_mm_always_ibpb);
+			spectre_v2_user_ibpb = SPECTRE_V2_USER_STRICT;
 			break;
 		case SPECTRE_V2_USER_CMD_PRCTL:
 		case SPECTRE_V2_USER_CMD_AUTO:
@@ -757,8 +759,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
 		pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
 			static_key_enabled(&switch_mm_always_ibpb) ?
 			"always-on" : "conditional");
-
-		spectre_v2_user_ibpb = mode;
 	}
 
 	/*
-- 
cgit v1.2.3-70-g09d2


From 5844cc25fd121074de7895181a2fa1ce100a0fdd Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 26 Nov 2020 20:25:27 +1000
Subject: powerpc/64s: Fix hash ISA v3.0 TLBIEL instruction generation

A typo has the R field of the instruction assigned by lucky dip a la
register allocator.

Fixes: d4748276ae14c ("powerpc/64s: Improve local TLB flush for boot and MCE on POWER9")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201126102530.691335-2-npiggin@gmail.com
---
 arch/powerpc/mm/book3s64/hash_native.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index 0203cdf48c54..97fa42d7027e 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -68,7 +68,7 @@ static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned in
 	rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
 
 	asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
-		     : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r)
+		     : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r)
 		     : "memory");
 }
 
-- 
cgit v1.2.3-70-g09d2


From c0b27c517acf8a2b359dd373a7e7e88b01a8308e Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 26 Nov 2020 20:25:28 +1000
Subject: powerpc/64s/pseries: Fix hash tlbiel_all_isa300 for guest kernels

tlbiel_all() can not be usable in !HVMODE when running hash presently,
remove HV privileged flushes when running in guest to make it usable.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201126102530.691335-3-npiggin@gmail.com
---
 arch/powerpc/mm/book3s64/hash_native.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c
index 97fa42d7027e..52e170bd95ae 100644
--- a/arch/powerpc/mm/book3s64/hash_native.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -92,16 +92,15 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
 	asm volatile("ptesync": : :"memory");
 
 	/*
-	 * Flush the first set of the TLB, and any caching of partition table
-	 * entries. Then flush the remaining sets of the TLB. Hash mode uses
-	 * partition scoped TLB translations.
+	 * Flush the partition table cache if this is HV mode.
 	 */
-	tlbiel_hash_set_isa300(0, is, 0, 2, 0);
-	for (set = 1; set < num_sets; set++)
-		tlbiel_hash_set_isa300(set, is, 0, 0, 0);
+	if (early_cpu_has_feature(CPU_FTR_HVMODE))
+		tlbiel_hash_set_isa300(0, is, 0, 2, 0);
 
 	/*
-	 * Now invalidate the process table cache.
+	 * Now invalidate the process table cache. UPRT=0 HPT modes (what
+	 * current hardware implements) do not use the process table, but
+	 * add the flushes anyway.
 	 *
 	 * From ISA v3.0B p. 1078:
 	 *     The following forms are invalid.
@@ -110,6 +109,14 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
 	 */
 	tlbiel_hash_set_isa300(0, is, 0, 2, 1);
 
+	/*
+	 * Then flush the sets of the TLB proper. Hash mode uses
+	 * partition scoped TLB translations, which may be flushed
+	 * in !HV mode.
+	 */
+	for (set = 0; set < num_sets; set++)
+		tlbiel_hash_set_isa300(set, is, 0, 0, 0);
+
 	ppc_after_tlbiel_barrier();
 
 	asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
-- 
cgit v1.2.3-70-g09d2


From 01b0f0eae0812e80efeee4ee17687e5386335e08 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 26 Nov 2020 20:25:30 +1000
Subject: powerpc/64s: Trim offlined CPUs from mm_cpumasks

When offlining a CPU, powerpc/64s does not flush TLBs, rather it just
leaves the CPU set in mm_cpumasks, so it continues to receive TLBIEs
to manage its TLBs.

However the exit_flush_lazy_tlbs() function expects that after
returning, all CPUs (except self) have flushed TLBs for that mm, in
which case TLBIEL can be used for this flush. This breaks for offline
CPUs because they don't get the IPI to flush their TLB. This can lead
to stale translations.

Fix this by clearing the CPU from mm_cpumasks, then flushing all TLBs
before going offline.

These offlined CPU bits stuck in the cpumask also prevents the cpumask
from being trimmed back to local mode, which means continual broadcast
IPIs or TLBIEs are needed for TLB flushing. This patch prevents that
situation too.

A cast of many were involved in working this out, but in particular
Milton, Aneesh, Paul made key discoveries.

Fixes: 0cef77c7798a7 ("powerpc/64s/radix: flush remote CPUs out of single-threaded mm_cpumask")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Debugged-by: Milton Miller <miltonm@us.ibm.com>
Debugged-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Debugged-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201126102530.691335-5-npiggin@gmail.com
---
 arch/powerpc/include/asm/book3s/64/mmu.h     | 12 ++++++++++++
 arch/powerpc/mm/book3s64/mmu_context.c       | 20 ++++++++++++++++++++
 arch/powerpc/platforms/powermac/smp.c        |  2 ++
 arch/powerpc/platforms/powernv/smp.c         |  3 +++
 arch/powerpc/platforms/pseries/hotplug-cpu.c |  3 +++
 5 files changed, 40 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index e0b52940e43c..750918451dd2 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -242,6 +242,18 @@ extern void radix_init_pseries(void);
 static inline void radix_init_pseries(void) { };
 #endif
 
+#ifdef CONFIG_HOTPLUG_CPU
+#define arch_clear_mm_cpumask_cpu(cpu, mm)				\
+	do {								\
+		if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {		\
+			atomic_dec(&(mm)->context.active_cpus);		\
+			cpumask_clear_cpu(cpu, mm_cpumask(mm));		\
+		}							\
+	} while (0)
+
+void cleanup_cpu_mmu_context(void);
+#endif
+
 static inline int get_user_context(mm_context_t *ctx, unsigned long ea)
 {
 	int index = ea >> MAX_EA_BITS_PER_CONTEXT;
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
index 1c54821de7bf..0c8557220ae2 100644
--- a/arch/powerpc/mm/book3s64/mmu_context.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -17,6 +17,7 @@
 #include <linux/export.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
+#include <linux/cpu.h>
 
 #include <asm/mmu_context.h>
 #include <asm/pgalloc.h>
@@ -307,3 +308,22 @@ void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
 	isync();
 }
 #endif
+
+/**
+ * cleanup_cpu_mmu_context - Clean up MMU details for this CPU (newly offlined)
+ *
+ * This clears the CPU from mm_cpumask for all processes, and then flushes the
+ * local TLB to ensure TLB coherency in case the CPU is onlined again.
+ *
+ * KVM guest translations are not necessarily flushed here. If KVM started
+ * using mm_cpumask or the Linux APIs which do, this would have to be resolved.
+ */
+#ifdef CONFIG_HOTPLUG_CPU
+void cleanup_cpu_mmu_context(void)
+{
+	int cpu = smp_processor_id();
+
+	clear_tasks_mm_cpumask(cpu);
+	tlbiel_all();
+}
+#endif
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index 74ebe664b016..adae2a6712e1 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -911,6 +911,8 @@ static int smp_core99_cpu_disable(void)
 
 	mpic_cpu_set_priority(0xf);
 
+	cleanup_cpu_mmu_context();
+
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 54c4ba45c7ce..cbb67813cd5d 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -143,6 +143,9 @@ static int pnv_smp_cpu_disable(void)
 		xive_smp_disable_cpu();
 	else
 		xics_migrate_irqs_away();
+
+	cleanup_cpu_mmu_context();
+
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index f2837e33bf5d..a02012f1b04a 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -90,6 +90,9 @@ static int pseries_cpu_disable(void)
 		xive_smp_disable_cpu();
 	else
 		xics_migrate_irqs_away();
+
+	cleanup_cpu_mmu_context();
+
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 10f78fd0dabbc3856ddd67b09a46abdedb045913 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Fri, 27 Nov 2020 11:07:38 +0530
Subject: powerpc/numa: Fix a regression on memoryless node 0

Commit e75130f20b1f ("powerpc/numa: Offline memoryless cpuless node 0")
offlines node 0 and expects nodes to be subsequently onlined when CPUs
or nodes are detected.

Commit 6398eaa26816 ("powerpc/numa: Prefer node id queried from vphn")
skips onlining node 0 when CPUs are associated with node 0.

On systems with node 0 having CPUs but no memory, this causes node 0 be
marked offline. This causes issues at boot time when trying to set
memory node for online CPUs while building the zonelist.

0:mon> t
[link register   ] c000000000400354 __build_all_zonelists+0x164/0x280
[c00000000161bda0] c0000000016533c8 node_states+0x20/0xa0 (unreliable)
[c00000000161bdc0] c000000000400384 __build_all_zonelists+0x194/0x280
[c00000000161be30] c000000001041800 build_all_zonelists_init+0x4c/0x118
[c00000000161be80] c0000000004020d0 build_all_zonelists+0x190/0x1b0
[c00000000161bef0] c000000001003cf8 start_kernel+0x18c/0x6a8
[c00000000161bf90] c00000000000adb4 start_here_common+0x1c/0x3e8
0:mon> r
R00 = c000000000400354   R16 = 000000000b57a0e8
R01 = c00000000161bda0   R17 = 000000000b57a6b0
R02 = c00000000161ce00   R18 = 000000000b5afee8
R03 = 0000000000000000   R19 = 000000000b6448a0
R04 = 0000000000000000   R20 = fffffffffffffffd
R05 = 0000000000000000   R21 = 0000000001400000
R06 = 0000000000000000   R22 = 000000001ec00000
R07 = 0000000000000001   R23 = c000000001175580
R08 = 0000000000000000   R24 = c000000001651ed8
R09 = c0000000017e84d8   R25 = c000000001652480
R10 = 0000000000000000   R26 = c000000001175584
R11 = c000000c7fac0d10   R27 = c0000000019568d0
R12 = c000000000400180   R28 = 0000000000000000
R13 = c000000002200000   R29 = c00000000164dd78
R14 = 000000000b579f78   R30 = 0000000000000000
R15 = 000000000b57a2b8   R31 = c000000001175584
pc  = c000000000400194 local_memory_node+0x24/0x80
cfar= c000000000074334 mcount+0xc/0x10
lr  = c000000000400354 __build_all_zonelists+0x164/0x280
msr = 8000000002001033   cr  = 44002284
ctr = c000000000400180   xer = 0000000000000001   trap =  380
dar = 0000000000001388   dsisr = c00000000161bc90
0:mon>

Fix this by setting node to be online while onlining CPUs that belong to
node 0.

Fixes: e75130f20b1f ("powerpc/numa: Offline memoryless cpuless node 0")
Fixes: 6398eaa26816 ("powerpc/numa: Prefer node id queried from vphn")
Reported-by: Milan Mohanty <milmohan@in.ibm.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201127053738.10085-1-srikar@linux.vnet.ibm.com
---
 arch/powerpc/mm/numa.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 63f61d8b55e5..f2bf98bdcea2 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -742,8 +742,7 @@ static int __init parse_numa_properties(void)
 			of_node_put(cpu);
 		}
 
-		if (likely(nid > 0))
-			node_set_online(nid);
+		node_set_online(nid);
 	}
 
 	get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
-- 
cgit v1.2.3-70-g09d2


From 72c3bcdcda494cbd600712a32e67702cdee60c07 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 27 Nov 2020 08:53:52 +0100
Subject: KVM: x86: handle !lapic_in_kernel case in kvm_cpu_*_extint

Centralize handling of interrupts from the userspace APIC
in kvm_cpu_has_extint and kvm_cpu_get_extint, since
userspace APIC interrupts are handled more or less the
same as ExtINTs are with split irqchip.  This removes
duplicated code from kvm_cpu_has_injectable_intr and
kvm_cpu_has_interrupt, and makes the code more similar
between kvm_cpu_has_{extint,interrupt} on one side
and kvm_cpu_get_{extint,interrupt} on the other.

Cc: stable@vger.kernel.org
Reviewed-by: Filippo Sironi <sironi@amazon.de>
Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
Tested-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/irq.c   | 83 +++++++++++++++++++++-------------------------------
 arch/x86/kvm/lapic.c |  2 +-
 2 files changed, 34 insertions(+), 51 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 99d118ffc67d..ee94671272e3 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -41,28 +41,9 @@ static int pending_userspace_extint(struct kvm_vcpu *v)
  * non-APIC source without intack.
  */
 static int kvm_cpu_has_extint(struct kvm_vcpu *v)
-{
-	u8 accept = kvm_apic_accept_pic_intr(v);
-
-	if (accept) {
-		if (irqchip_split(v->kvm))
-			return pending_userspace_extint(v);
-		else
-			return v->kvm->arch.vpic->output;
-	} else
-		return 0;
-}
-
-/*
- * check if there is injectable interrupt:
- * when virtual interrupt delivery enabled,
- * interrupt from apic will handled by hardware,
- * we don't need to check it here.
- */
-int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
 {
 	/*
-	 * FIXME: interrupt.injected represents an interrupt that it's
+	 * FIXME: interrupt.injected represents an interrupt whose
 	 * side-effects have already been applied (e.g. bit from IRR
 	 * already moved to ISR). Therefore, it is incorrect to rely
 	 * on interrupt.injected to know if there is a pending
@@ -75,6 +56,23 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
 	if (!lapic_in_kernel(v))
 		return v->arch.interrupt.injected;
 
+	if (!kvm_apic_accept_pic_intr(v))
+		return 0;
+
+	if (irqchip_split(v->kvm))
+		return pending_userspace_extint(v);
+	else
+		return v->kvm->arch.vpic->output;
+}
+
+/*
+ * check if there is injectable interrupt:
+ * when virtual interrupt delivery enabled,
+ * interrupt from apic will handled by hardware,
+ * we don't need to check it here.
+ */
+int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
+{
 	if (kvm_cpu_has_extint(v))
 		return 1;
 
@@ -91,20 +89,6 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_injectable_intr);
  */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 {
-	/*
-	 * FIXME: interrupt.injected represents an interrupt that it's
-	 * side-effects have already been applied (e.g. bit from IRR
-	 * already moved to ISR). Therefore, it is incorrect to rely
-	 * on interrupt.injected to know if there is a pending
-	 * interrupt in the user-mode LAPIC.
-	 * This leads to nVMX/nSVM not be able to distinguish
-	 * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on
-	 * pending interrupt or should re-inject an injected
-	 * interrupt.
-	 */
-	if (!lapic_in_kernel(v))
-		return v->arch.interrupt.injected;
-
 	if (kvm_cpu_has_extint(v))
 		return 1;
 
@@ -118,16 +102,21 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
  */
 static int kvm_cpu_get_extint(struct kvm_vcpu *v)
 {
-	if (kvm_cpu_has_extint(v)) {
-		if (irqchip_split(v->kvm)) {
-			int vector = v->arch.pending_external_vector;
-
-			v->arch.pending_external_vector = -1;
-			return vector;
-		} else
-			return kvm_pic_read_irq(v->kvm); /* PIC */
-	} else
+	if (!kvm_cpu_has_extint(v)) {
+		WARN_ON(!lapic_in_kernel(v));
 		return -1;
+	}
+
+	if (!lapic_in_kernel(v))
+		return v->arch.interrupt.nr;
+
+	if (irqchip_split(v->kvm)) {
+		int vector = v->arch.pending_external_vector;
+
+		v->arch.pending_external_vector = -1;
+		return vector;
+	} else
+		return kvm_pic_read_irq(v->kvm); /* PIC */
 }
 
 /*
@@ -135,13 +124,7 @@ static int kvm_cpu_get_extint(struct kvm_vcpu *v)
  */
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
 {
-	int vector;
-
-	if (!lapic_in_kernel(v))
-		return v->arch.interrupt.nr;
-
-	vector = kvm_cpu_get_extint(v);
-
+	int vector = kvm_cpu_get_extint(v);
 	if (vector != -1)
 		return vector;			/* PIC */
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 105e7859d1f2..86c33d53c90a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2465,7 +2465,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	u32 ppr;
 
-	if (!kvm_apic_hw_enabled(apic))
+	if (!kvm_apic_present(vcpu))
 		return -1;
 
 	__apic_update_ppr(apic, &ppr);
-- 
cgit v1.2.3-70-g09d2


From 71cc849b7093bb83af966c0e60cb11b7f35cd746 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 27 Nov 2020 09:18:20 +0100
Subject: KVM: x86: Fix split-irqchip vs interrupt injection window request

kvm_cpu_accept_dm_intr and kvm_vcpu_ready_for_interrupt_injection are
a hodge-podge of conditions, hacked together to get something that
more or less works.  But what is actually needed is much simpler;
in both cases the fundamental question is, do we have a place to stash
an interrupt if userspace does KVM_INTERRUPT?

In userspace irqchip mode, that is !vcpu->arch.interrupt.injected.
Currently kvm_event_needs_reinjection(vcpu) covers it, but it is
unnecessarily restrictive.

In split irqchip mode it's a bit more complicated, we need to check
kvm_apic_accept_pic_intr(vcpu) (the IRQ window exit is basically an INTACK
cycle and thus requires ExtINTs not to be masked) as well as
!pending_userspace_extint(vcpu).  However, there is no need to
check kvm_event_needs_reinjection(vcpu), since split irqchip keeps
pending ExtINT state separate from event injection state, and checking
kvm_cpu_has_interrupt(vcpu) is wrong too since ExtINT has higher
priority than APIC interrupts.  In fact the latter fixes a bug:
when userspace requests an IRQ window vmexit, an interrupt in the
local APIC can cause kvm_cpu_has_interrupt() to be true and thus
kvm_vcpu_ready_for_interrupt_injection() to return false.  When this
happens, vcpu_run does not exit to userspace but the interrupt window
vmexits keep occurring.  The VM loops without any hope of making progress.

Once we try to fix these with something like

     return kvm_arch_interrupt_allowed(vcpu) &&
-        !kvm_cpu_has_interrupt(vcpu) &&
-        !kvm_event_needs_reinjection(vcpu) &&
-        kvm_cpu_accept_dm_intr(vcpu);
+        (!lapic_in_kernel(vcpu)
+         ? !vcpu->arch.interrupt.injected
+         : (kvm_apic_accept_pic_intr(vcpu)
+            && !pending_userspace_extint(v)));

we realize two things.  First, thanks to the previous patch the complex
conditional can reuse !kvm_cpu_has_extint(vcpu).  Second, the interrupt
window request in vcpu_enter_guest()

        bool req_int_win =
                dm_request_for_irq_injection(vcpu) &&
                kvm_cpu_accept_dm_intr(vcpu);

should be kept in sync with kvm_vcpu_ready_for_interrupt_injection():
it is unnecessary to ask the processor for an interrupt window
if we would not be able to return to userspace.  Therefore,
kvm_cpu_accept_dm_intr(vcpu) is basically !kvm_cpu_has_extint(vcpu)
ANDed with the existing check for masked ExtINT.  It all makes sense:

- we can accept an interrupt from userspace if there is a place
  to stash it (and, for irqchip split, ExtINTs are not masked).
  Interrupts from userspace _can_ be accepted even if right now
  EFLAGS.IF=0.

- in order to tell userspace we will inject its interrupt ("IRQ
  window open" i.e. kvm_vcpu_ready_for_interrupt_injection), both
  KVM and the vCPU need to be ready to accept the interrupt.

... and this is what the patch implements.

Reported-by: David Woodhouse <dwmw@amazon.co.uk>
Analyzed-by: David Woodhouse <dwmw@amazon.co.uk>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Nikos Tsironis <ntsironis@arrikto.com>
Reviewed-by: David Woodhouse <dwmw@amazon.co.uk>
Tested-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/irq.c              |  2 +-
 arch/x86/kvm/x86.c              | 18 ++++++++++--------
 3 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 324ddd7fd0aa..7e5f33a0d0e2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1656,6 +1656,7 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
+int kvm_cpu_has_extint(struct kvm_vcpu *v);
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index ee94671272e3..814698e5b152 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -40,7 +40,7 @@ static int pending_userspace_extint(struct kvm_vcpu *v)
  * check if there is pending interrupt from
  * non-APIC source without intack.
  */
-static int kvm_cpu_has_extint(struct kvm_vcpu *v)
+int kvm_cpu_has_extint(struct kvm_vcpu *v)
 {
 	/*
 	 * FIXME: interrupt.injected represents an interrupt whose
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 078a39d489fe..e545a8a613b1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4051,21 +4051,23 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
 
 static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
 {
+	/*
+	 * We can accept userspace's request for interrupt injection
+	 * as long as we have a place to store the interrupt number.
+	 * The actual injection will happen when the CPU is able to
+	 * deliver the interrupt.
+	 */
+	if (kvm_cpu_has_extint(vcpu))
+		return false;
+
+	/* Acknowledging ExtINT does not happen if LINT0 is masked.  */
 	return (!lapic_in_kernel(vcpu) ||
 		kvm_apic_accept_pic_intr(vcpu));
 }
 
-/*
- * if userspace requested an interrupt window, check that the
- * interrupt window is open.
- *
- * No need to exit to userspace if we already have an interrupt queued.
- */
 static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
 {
 	return kvm_arch_interrupt_allowed(vcpu) &&
-		!kvm_cpu_has_interrupt(vcpu) &&
-		!kvm_event_needs_reinjection(vcpu) &&
 		kvm_cpu_accept_dm_intr(vcpu);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 9a2a0d3ca163fc645991804b8b032f7d59326bb5 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 26 Nov 2020 12:02:06 +0100
Subject: kvm: x86/mmu: Fix get_mmio_spte() on CPUs supporting 5-level PT

Commit 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU") caused
the following WARNING on an Intel Ice Lake CPU:

 get_mmio_spte: detect reserved bits on spte, addr 0xb80a0, dump hierarchy:
 ------ spte 0xb80a0 level 5.
 ------ spte 0xfcd210107 level 4.
 ------ spte 0x1004c40107 level 3.
 ------ spte 0x1004c41107 level 2.
 ------ spte 0x1db00000000b83b6 level 1.
 WARNING: CPU: 109 PID: 10254 at arch/x86/kvm/mmu/mmu.c:3569 kvm_mmu_page_fault.cold.150+0x54/0x22f [kvm]
...
 Call Trace:
  ? kvm_io_bus_get_first_dev+0x55/0x110 [kvm]
  vcpu_enter_guest+0xaa1/0x16a0 [kvm]
  ? vmx_get_cs_db_l_bits+0x17/0x30 [kvm_intel]
  ? skip_emulated_instruction+0xaa/0x150 [kvm_intel]
  kvm_arch_vcpu_ioctl_run+0xca/0x520 [kvm]

The guest triggering this crashes. Note, this happens with the traditional
MMU and EPT enabled, not with the newly introduced TDP MMU. Turns out,
there was a subtle change in the above mentioned commit. Previously,
walk_shadow_page_get_mmio_spte() was setting 'root' to 'iterator.level'
which is returned by shadow_walk_init() and this equals to
'vcpu->arch.mmu->shadow_root_level'. Now, get_mmio_spte() sets it to
'int root = vcpu->arch.mmu->root_level'.

The difference between 'root_level' and 'shadow_root_level' on CPUs
supporting 5-level page tables is that in some case we don't want to
use 5-level, in particular when 'cpuid_maxphyaddr(vcpu) <= 48'
kvm_mmu_get_tdp_level() returns '4'. In case upper layer is not used,
the corresponding SPTE will fail '__is_rsvd_bits_set()' check.

Revert to using 'shadow_root_level'.

Fixes: 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU")
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20201126110206.2118959-1-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 5bb1939b65d8..7a6ae9e90bd7 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3517,7 +3517,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 {
 	u64 sptes[PT64_ROOT_MAX_LEVEL];
 	struct rsvd_bits_validate *rsvd_check;
-	int root = vcpu->arch.mmu->root_level;
+	int root = vcpu->arch.mmu->shadow_root_level;
 	int leaf;
 	int level;
 	bool reserved = false;
-- 
cgit v1.2.3-70-g09d2


From 25bc65d8ddfc17cc1d7a45bd48e9bdc0e729ced3 Mon Sep 17 00:00:00 2001
From: Gabriele Paoloni <gabriele.paoloni@intel.com>
Date: Fri, 27 Nov 2020 16:18:15 +0000
Subject: x86/mce: Do not overwrite no_way_out if mce_end() fails

Currently, if mce_end() fails, no_way_out - the variable denoting
whether the machine can recover from this MCE - is determined by whether
the worst severity that was found across the MCA banks associated with
the current CPU, is of panic severity.

However, at this point no_way_out could have been already set by
mca_start() after looking at all severities of all CPUs that entered the
MCE handler. If mce_end() fails, check first if no_way_out is already
set and, if so, stick to it, otherwise use the local worst value.

 [ bp: Massage. ]

Signed-off-by: Gabriele Paoloni <gabriele.paoloni@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/20201127161819.3106432-2-gabriele.paoloni@intel.com
---
 arch/x86/kernel/cpu/mce/core.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 4102b866e7c0..32b7099e3511 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1384,8 +1384,10 @@ noinstr void do_machine_check(struct pt_regs *regs)
 	 * When there's any problem use only local no_way_out state.
 	 */
 	if (!lmce) {
-		if (mce_end(order) < 0)
-			no_way_out = worst >= MCE_PANIC_SEVERITY;
+		if (mce_end(order) < 0) {
+			if (!no_way_out)
+				no_way_out = worst >= MCE_PANIC_SEVERITY;
+		}
 	} else {
 		/*
 		 * If there was a fatal machine check we should have
-- 
cgit v1.2.3-70-g09d2


From 9ea69a55b3b9a71cded9726af591949c1138f235 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <lvivier@redhat.com>
Date: Thu, 26 Nov 2020 09:28:52 +0100
Subject: powerpc/pseries: Pass MSI affinity to irq_create_mapping()

With virtio multiqueue, normally each queue IRQ is mapped to a CPU.

Commit 0d9f0a52c8b9f ("virtio_scsi: use virtio IRQ affinity") exposed
an existing shortcoming of the arch code by moving virtio_scsi to
the automatic IRQ affinity assignment.

The affinity is correctly computed in msi_desc but this is not applied
to the system IRQs.

It appears the affinity is correctly passed to rtas_setup_msi_irqs() but
lost at this point and never passed to irq_domain_alloc_descs()
(see commit 06ee6d571f0e ("genirq: Add affinity hint to irq allocation"))
because irq_create_mapping() doesn't take an affinity parameter.

Use the new irq_create_mapping_affinity() function, which allows to forward
the affinity setting from rtas_setup_msi_irqs() to irq_domain_alloc_descs().

With this change, the virtqueues are correctly dispatched between the CPUs
on pseries.

Fixes: e75eafb9b039 ("genirq/msi: Switch to new irq spreading infrastructure")
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Greg Kurz <groug@kaod.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20201126082852.1178497-3-lvivier@redhat.com
---
 arch/powerpc/platforms/pseries/msi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 133f6adcb39c..b3ac2455faad 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -458,7 +458,8 @@ again:
 			return hwirq;
 		}
 
-		virq = irq_create_mapping(NULL, hwirq);
+		virq = irq_create_mapping_affinity(NULL, hwirq,
+						   entry->affinity);
 
 		if (!virq) {
 			pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq);
-- 
cgit v1.2.3-70-g09d2


From ca1314d73eed493c49bb1932c60a8605530db2e4 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 30 Nov 2020 11:59:40 +0000
Subject: arm64: syscall: exit userspace before unmasking exceptions

In el0_svc_common() we unmask exceptions before we call user_exit(), and
so there's a window where an IRQ or debug exception can be taken while
RCU is not watching. In do_debug_exception() we account for this in via
debug_exception_{enter,exit}(), but in the el1_irq asm we do not and we
call trace functions which rely on RCU before we have a guarantee that
RCU is watching.

Let's avoid this by having el0_svc_common() exit userspace before
unmasking exceptions, matching what we do for all other EL0 entry paths.
We can use user_exit_irqoff() to avoid the pointless save/restore of IRQ
flags while we're sure exceptions are masked in DAIF.

The workaround for Cortex-A76 erratum 1463225 may trigger a debug
exception before this point, but the debug code invoked in this case is
safe even when RCU is not watching.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201130115950.22492-2-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/syscall.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index e4c0dadf0d92..13fe79f8e2db 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -120,8 +120,8 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
 	 */
 
 	cortex_a76_erratum_1463225_svc_handler();
+	user_exit_irqoff();
 	local_daif_restore(DAIF_PROCCTX);
-	user_exit();
 
 	if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) {
 		/*
-- 
cgit v1.2.3-70-g09d2


From 114e0a684753516ef4b71ccb55a8ebcfa8735edb Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 30 Nov 2020 11:59:41 +0000
Subject: arm64: mark idle code as noinstr

Core code disables RCU when calling arch_cpu_idle(), so it's not safe
for arch_cpu_idle() or its calees to be instrumented, as the
instrumentation callbacks may attempt to use RCU or other features which
are unsafe to use in this context.

Mark them noinstr to prevent issues.

The use of local_irq_enable() in arch_cpu_idle() is similarly
problematic, and the "sched/idle: Fix arch_cpu_idle() vs tracing" patch
queued in the tip tree addresses that case.

Reported-by: Marco Elver <elver@google.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201130115950.22492-3-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/process.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index a47a40ec6ad9..0c65a91f8f53 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -72,13 +72,13 @@ EXPORT_SYMBOL_GPL(pm_power_off);
 
 void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
 
-static void __cpu_do_idle(void)
+static void noinstr __cpu_do_idle(void)
 {
 	dsb(sy);
 	wfi();
 }
 
-static void __cpu_do_idle_irqprio(void)
+static void noinstr __cpu_do_idle_irqprio(void)
 {
 	unsigned long pmr;
 	unsigned long daif_bits;
@@ -108,7 +108,7 @@ static void __cpu_do_idle_irqprio(void)
  *	ensure that interrupts are not masked at the PMR (because the core will
  *	not wake up if we block the wake up signal in the interrupt controller).
  */
-void cpu_do_idle(void)
+void noinstr cpu_do_idle(void)
 {
 	if (system_uses_irq_prio_masking())
 		__cpu_do_idle_irqprio();
@@ -119,7 +119,7 @@ void cpu_do_idle(void)
 /*
  * This is our default idle handler.
  */
-void arch_cpu_idle(void)
+void noinstr arch_cpu_idle(void)
 {
 	/*
 	 * This should do all the clock switching and wait for interrupt
-- 
cgit v1.2.3-70-g09d2


From da192676483232a0a9478c89cdddd412e5167470 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 30 Nov 2020 11:59:42 +0000
Subject: arm64: entry: mark entry code as noinstr

Functions in entry-common.c are marked as notrace and NOKPROBE_SYMBOL(),
but they're still subject to other instrumentation which may rely on
lockdep/rcu/context-tracking being up-to-date, and may cause nested
exceptions (e.g. for WARN/BUG or KASAN's use of BRK) which will corrupt
exceptions registers which have not yet been read.

Prevent this by marking all functions in entry-common.c as noinstr to
prevent compiler instrumentation. This also blacklists the functions for
tracing and kprobes, so we don't need to handle that separately.
Functions elsewhere will be dealt with in subsequent patches.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201130115950.22492-4-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/entry-common.c | 75 ++++++++++++++--------------------------
 1 file changed, 25 insertions(+), 50 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 43d4c329775f..75e99161f79e 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -17,7 +17,7 @@
 #include <asm/mmu.h>
 #include <asm/sysreg.h>
 
-static void notrace el1_abort(struct pt_regs *regs, unsigned long esr)
+static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
 
@@ -25,32 +25,28 @@ static void notrace el1_abort(struct pt_regs *regs, unsigned long esr)
 	far = untagged_addr(far);
 	do_mem_abort(far, esr, regs);
 }
-NOKPROBE_SYMBOL(el1_abort);
 
-static void notrace el1_pc(struct pt_regs *regs, unsigned long esr)
+static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
 
 	local_daif_inherit(regs);
 	do_sp_pc_abort(far, esr, regs);
 }
-NOKPROBE_SYMBOL(el1_pc);
 
-static void notrace el1_undef(struct pt_regs *regs)
+static void noinstr el1_undef(struct pt_regs *regs)
 {
 	local_daif_inherit(regs);
 	do_undefinstr(regs);
 }
-NOKPROBE_SYMBOL(el1_undef);
 
-static void notrace el1_inv(struct pt_regs *regs, unsigned long esr)
+static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr)
 {
 	local_daif_inherit(regs);
 	bad_mode(regs, 0, esr);
 }
-NOKPROBE_SYMBOL(el1_inv);
 
-static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr)
+static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
 
@@ -64,16 +60,14 @@ static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr)
 
 	do_debug_exception(far, esr, regs);
 }
-NOKPROBE_SYMBOL(el1_dbg);
 
-static void notrace el1_fpac(struct pt_regs *regs, unsigned long esr)
+static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr)
 {
 	local_daif_inherit(regs);
 	do_ptrauth_fault(regs, esr);
 }
-NOKPROBE_SYMBOL(el1_fpac);
 
-asmlinkage void notrace el1_sync_handler(struct pt_regs *regs)
+asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs)
 {
 	unsigned long esr = read_sysreg(esr_el1);
 
@@ -106,9 +100,8 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs)
 		el1_inv(regs, esr);
 	}
 }
-NOKPROBE_SYMBOL(el1_sync_handler);
 
-static void notrace el0_da(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
 
@@ -117,9 +110,8 @@ static void notrace el0_da(struct pt_regs *regs, unsigned long esr)
 	far = untagged_addr(far);
 	do_mem_abort(far, esr, regs);
 }
-NOKPROBE_SYMBOL(el0_da);
 
-static void notrace el0_ia(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
 
@@ -135,41 +127,36 @@ static void notrace el0_ia(struct pt_regs *regs, unsigned long esr)
 	local_daif_restore(DAIF_PROCCTX);
 	do_mem_abort(far, esr, regs);
 }
-NOKPROBE_SYMBOL(el0_ia);
 
-static void notrace el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
 {
 	user_exit_irqoff();
 	local_daif_restore(DAIF_PROCCTX);
 	do_fpsimd_acc(esr, regs);
 }
-NOKPROBE_SYMBOL(el0_fpsimd_acc);
 
-static void notrace el0_sve_acc(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
 {
 	user_exit_irqoff();
 	local_daif_restore(DAIF_PROCCTX);
 	do_sve_acc(esr, regs);
 }
-NOKPROBE_SYMBOL(el0_sve_acc);
 
-static void notrace el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
 {
 	user_exit_irqoff();
 	local_daif_restore(DAIF_PROCCTX);
 	do_fpsimd_exc(esr, regs);
 }
-NOKPROBE_SYMBOL(el0_fpsimd_exc);
 
-static void notrace el0_sys(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr)
 {
 	user_exit_irqoff();
 	local_daif_restore(DAIF_PROCCTX);
 	do_sysinstr(esr, regs);
 }
-NOKPROBE_SYMBOL(el0_sys);
 
-static void notrace el0_pc(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
 
@@ -180,41 +167,36 @@ static void notrace el0_pc(struct pt_regs *regs, unsigned long esr)
 	local_daif_restore(DAIF_PROCCTX);
 	do_sp_pc_abort(far, esr, regs);
 }
-NOKPROBE_SYMBOL(el0_pc);
 
-static void notrace el0_sp(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr)
 {
 	user_exit_irqoff();
 	local_daif_restore(DAIF_PROCCTX);
 	do_sp_pc_abort(regs->sp, esr, regs);
 }
-NOKPROBE_SYMBOL(el0_sp);
 
-static void notrace el0_undef(struct pt_regs *regs)
+static void noinstr el0_undef(struct pt_regs *regs)
 {
 	user_exit_irqoff();
 	local_daif_restore(DAIF_PROCCTX);
 	do_undefinstr(regs);
 }
-NOKPROBE_SYMBOL(el0_undef);
 
-static void notrace el0_bti(struct pt_regs *regs)
+static void noinstr el0_bti(struct pt_regs *regs)
 {
 	user_exit_irqoff();
 	local_daif_restore(DAIF_PROCCTX);
 	do_bti(regs);
 }
-NOKPROBE_SYMBOL(el0_bti);
 
-static void notrace el0_inv(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
 {
 	user_exit_irqoff();
 	local_daif_restore(DAIF_PROCCTX);
 	bad_el0_sync(regs, 0, esr);
 }
-NOKPROBE_SYMBOL(el0_inv);
 
-static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
 {
 	/* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
 	unsigned long far = read_sysreg(far_el1);
@@ -226,26 +208,23 @@ static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr)
 	do_debug_exception(far, esr, regs);
 	local_daif_restore(DAIF_PROCCTX_NOIRQ);
 }
-NOKPROBE_SYMBOL(el0_dbg);
 
-static void notrace el0_svc(struct pt_regs *regs)
+static void noinstr el0_svc(struct pt_regs *regs)
 {
 	if (system_uses_irq_prio_masking())
 		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
 
 	do_el0_svc(regs);
 }
-NOKPROBE_SYMBOL(el0_svc);
 
-static void notrace el0_fpac(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
 {
 	user_exit_irqoff();
 	local_daif_restore(DAIF_PROCCTX);
 	do_ptrauth_fault(regs, esr);
 }
-NOKPROBE_SYMBOL(el0_fpac);
 
-asmlinkage void notrace el0_sync_handler(struct pt_regs *regs)
+asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs)
 {
 	unsigned long esr = read_sysreg(esr_el1);
 
@@ -297,27 +276,24 @@ asmlinkage void notrace el0_sync_handler(struct pt_regs *regs)
 		el0_inv(regs, esr);
 	}
 }
-NOKPROBE_SYMBOL(el0_sync_handler);
 
 #ifdef CONFIG_COMPAT
-static void notrace el0_cp15(struct pt_regs *regs, unsigned long esr)
+static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
 {
 	user_exit_irqoff();
 	local_daif_restore(DAIF_PROCCTX);
 	do_cp15instr(esr, regs);
 }
-NOKPROBE_SYMBOL(el0_cp15);
 
-static void notrace el0_svc_compat(struct pt_regs *regs)
+static void noinstr el0_svc_compat(struct pt_regs *regs)
 {
 	if (system_uses_irq_prio_masking())
 		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
 
 	do_el0_svc_compat(regs);
 }
-NOKPROBE_SYMBOL(el0_svc_compat);
 
-asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs)
+asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs)
 {
 	unsigned long esr = read_sysreg(esr_el1);
 
@@ -360,5 +336,4 @@ asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs)
 		el0_inv(regs, esr);
 	}
 }
-NOKPROBE_SYMBOL(el0_sync_compat_handler);
 #endif /* CONFIG_COMPAT */
-- 
cgit v1.2.3-70-g09d2


From 2f911d494f3f028bbe6346e383a354225682cf1b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 30 Nov 2020 11:59:43 +0000
Subject: arm64: entry: move enter_from_user_mode to entry-common.c

In later patches we'll want to extend enter_from_user_mode() and add a
corresponding exit_to_user_mode(). As these will be common for all
entries/exits from userspace, it'd be better for these to live in
entry-common.c with the rest of the entry logic.

This patch moves enter_from_user_mode() into entry-common.c. As with
other functions in entry-common.c it is marked as noinstr (which
prevents all instrumentation, tracing, and kprobes) but there are no
other functional changes.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201130115950.22492-5-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/entry-common.c | 6 ++++++
 arch/arm64/kernel/traps.c        | 7 -------
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 75e99161f79e..9a685e7686fe 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -101,6 +101,12 @@ asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs)
 	}
 }
 
+asmlinkage void noinstr enter_from_user_mode(void)
+{
+	CT_WARN_ON(ct_state() != CONTEXT_USER);
+	user_exit_irqoff();
+}
+
 static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8af4e0e85736..580c60afc39a 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -876,13 +876,6 @@ asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
 	nmi_exit();
 }
 
-asmlinkage void enter_from_user_mode(void)
-{
-	CT_WARN_ON(ct_state() != CONTEXT_USER);
-	user_exit_irqoff();
-}
-NOKPROBE_SYMBOL(enter_from_user_mode);
-
 /* GENERIC_BUG traps */
 
 int is_valid_bugaddr(unsigned long addr)
-- 
cgit v1.2.3-70-g09d2


From 3cb5ed4d76c15fb97c10e5e9f5268d92c68222ca Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 30 Nov 2020 11:59:44 +0000
Subject: arm64: entry: prepare ret_to_user for function call

In a subsequent patch ret_to_user will need to make a C function call
(in some configurations) which may clobber x0-x18 at the start of the
finish_ret_to_user block, before enable_step_tsk consumes the flags
loaded into x1.

In preparation for this, let's load the flags into x19, which is
preserved across C function calls. This avoids a redundant reload of the
flags and ensures we operate on a consistent shapshot regardless.

There should be no functional change as a result of this patch. At this
point of the entry/exit paths we only need to preserve x28 (tsk) and the
sp, and x19 is free for this use.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201130115950.22492-6-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/entry.S | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b295fb912b12..84aec600eeed 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -774,13 +774,13 @@ SYM_CODE_END(el0_error)
 SYM_CODE_START_LOCAL(ret_to_user)
 	disable_daif
 	gic_prio_kentry_setup tmp=x3
-	ldr	x1, [tsk, #TSK_TI_FLAGS]
-	and	x2, x1, #_TIF_WORK_MASK
+	ldr	x19, [tsk, #TSK_TI_FLAGS]
+	and	x2, x19, #_TIF_WORK_MASK
 	cbnz	x2, work_pending
 finish_ret_to_user:
 	/* Ignore asynchronous tag check faults in the uaccess routines */
 	clear_mte_async_tcf
-	enable_step_tsk x1, x2
+	enable_step_tsk x19, x2
 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
 	bl	stackleak_erase
 #endif
@@ -791,11 +791,12 @@ finish_ret_to_user:
  */
 work_pending:
 	mov	x0, sp				// 'regs'
+	mov	x1, x19
 	bl	do_notify_resume
 #ifdef CONFIG_TRACE_IRQFLAGS
 	bl	trace_hardirqs_on		// enabled while in userspace
 #endif
-	ldr	x1, [tsk, #TSK_TI_FLAGS]	// re-check for single-step
+	ldr	x19, [tsk, #TSK_TI_FLAGS]	// re-check for single-step
 	b	finish_ret_to_user
 SYM_CODE_END(ret_to_user)
 
-- 
cgit v1.2.3-70-g09d2


From 105fc3352077bba5faaf12cf39f7e3aad26fb70b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 30 Nov 2020 11:59:45 +0000
Subject: arm64: entry: move el1 irq/nmi logic to C

In preparation for reworking the EL1 irq/nmi entry code, move the
existing logic to C. We no longer need the asm_nmi_enter() and
asm_nmi_exit() wrappers, so these are removed. The new C functions are
marked noinstr, which prevents compiler instrumentation and runtime
probing.

In subsequent patches we'll want the new C helpers to be called in all
cases, so we don't bother wrapping the calls with ifdeferry. Even when
the new C functions are stubs the trivial calls are unlikely to have a
measurable impact on the IRQ or NMI paths anyway.

Prototypes are added to <asm/exception.h> as otherwise (in some
configurations) GCC will complain about the lack of a forward
declaration. We already do this for existing function, e.g.
enter_from_user_mode().

The new helpers are marked as noinstr (which prevents all
instrumentation, tracing, and kprobes). Otherwise, there should be no
functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201130115950.22492-7-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/exception.h |  2 ++
 arch/arm64/kernel/entry-common.c   | 16 ++++++++++++++++
 arch/arm64/kernel/entry.S          | 34 ++++------------------------------
 arch/arm64/kernel/irq.c            | 15 ---------------
 4 files changed, 22 insertions(+), 45 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 99b9383cd036..d69d53dd7be7 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -31,6 +31,8 @@ static inline u32 disr_to_esr(u64 disr)
 	return esr;
 }
 
+asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs);
+asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs);
 asmlinkage void enter_from_user_mode(void);
 void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
 void do_undefinstr(struct pt_regs *regs);
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 9a685e7686fe..920da254be1d 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -17,6 +17,22 @@
 #include <asm/mmu.h>
 #include <asm/sysreg.h>
 
+asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
+{
+	if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
+		nmi_enter();
+
+	trace_hardirqs_off();
+}
+
+asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
+{
+	if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
+		nmi_exit();
+	else
+		trace_hardirqs_on();
+}
+
 static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 84aec600eeed..53e30750fc28 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -637,16 +637,8 @@ SYM_CODE_START_LOCAL_NOALIGN(el1_irq)
 	gic_prio_irq_setup pmr=x20, tmp=x1
 	enable_da_f
 
-#ifdef CONFIG_ARM64_PSEUDO_NMI
-	test_irqs_unmasked	res=x0, pmr=x20
-	cbz	x0, 1f
-	bl	asm_nmi_enter
-1:
-#endif
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_off
-#endif
+	mov	x0, sp
+	bl	enter_el1_irq_or_nmi
 
 	irq_handler
 
@@ -665,26 +657,8 @@ alternative_else_nop_endif
 1:
 #endif
 
-#ifdef CONFIG_ARM64_PSEUDO_NMI
-	/*
-	 * When using IRQ priority masking, we can get spurious interrupts while
-	 * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a
-	 * section with interrupts disabled. Skip tracing in those cases.
-	 */
-	test_irqs_unmasked	res=x0, pmr=x20
-	cbz	x0, 1f
-	bl	asm_nmi_exit
-1:
-#endif
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-#ifdef CONFIG_ARM64_PSEUDO_NMI
-	test_irqs_unmasked	res=x0, pmr=x20
-	cbnz	x0, 1f
-#endif
-	bl	trace_hardirqs_on
-1:
-#endif
+	mov	x0, sp
+	bl	exit_el1_irq_or_nmi
 
 	kernel_exit 1
 SYM_CODE_END(el1_irq)
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 9cf2fb87584a..60456a62da11 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -67,18 +67,3 @@ void __init init_IRQ(void)
 		local_daif_restore(DAIF_PROCCTX_NOIRQ);
 	}
 }
-
-/*
- * Stubs to make nmi_enter/exit() code callable from ASM
- */
-asmlinkage void notrace asm_nmi_enter(void)
-{
-	nmi_enter();
-}
-NOKPROBE_SYMBOL(asm_nmi_enter);
-
-asmlinkage void notrace asm_nmi_exit(void)
-{
-	nmi_exit();
-}
-NOKPROBE_SYMBOL(asm_nmi_exit);
-- 
cgit v1.2.3-70-g09d2


From 23529049c68423820487304f244144e0d576e85a Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 30 Nov 2020 11:59:46 +0000
Subject: arm64: entry: fix non-NMI user<->kernel transitions

When built with PROVE_LOCKING, NO_HZ_FULL, and CONTEXT_TRACKING_FORCE
will WARN() at boot time that interrupts are enabled when we call
context_tracking_user_enter(), despite the DAIF flags indicating that
IRQs are masked.

The problem is that we're not tracking IRQ flag changes accurately, and
so lockdep believes interrupts are enabled when they are not (and
vice-versa). We can shuffle things so to make this more accurate. For
kernel->user transitions there are a number of constraints we need to
consider:

1) When we call __context_tracking_user_enter() HW IRQs must be disabled
   and lockdep must be up-to-date with this.

2) Userspace should be treated as having IRQs enabled from the PoV of
   both lockdep and tracing.

3) As context_tracking_user_enter() stops RCU from watching, we cannot
   use RCU after calling it.

4) IRQ flag tracing and lockdep have state that must be manipulated
   before RCU is disabled.

... with similar constraints applying for user->kernel transitions, with
the ordering reversed.

The generic entry code has enter_from_user_mode() and
exit_to_user_mode() helpers to handle this. We can't use those directly,
so we add arm64 copies for now (without the instrumentation markers
which aren't used on arm64). These replace the existing user_exit() and
user_exit_irqoff() calls spread throughout handlers, and the exception
unmasking is left as-is.

Note that:

* The accounting for debug exceptions from userspace now happens in
  el0_dbg() and ret_to_user(), so this is removed from
  debug_exception_enter() and debug_exception_exit(). As
  user_exit_irqoff() wakes RCU, the userspace-specific check is removed.

* The accounting for syscalls now happens in el0_svc(),
  el0_svc_compat(), and ret_to_user(), so this is removed from
  el0_svc_common(). This does not adversely affect the workaround for
  erratum 1463225, as this does not depend on any of the state tracking.

* In ret_to_user() we mask interrupts with local_daif_mask(), and so we
  need to inform lockdep and tracing. Here a trace_hardirqs_off() is
  sufficient and safe as we have not yet exited kernel context and RCU
  is usable.

* As PROVE_LOCKING selects TRACE_IRQFLAGS, the ifdeferry in entry.S only
  needs to check for the latter.

* EL0 SError handling will be dealt with in a subsequent patch, as this
  needs to be treated as an NMI.

Prior to this patch, booting an appropriately-configured kernel would
result in spats as below:

| DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled())
| WARNING: CPU: 2 PID: 1 at kernel/locking/lockdep.c:5280 check_flags.part.54+0x1dc/0x1f0
| Modules linked in:
| CPU: 2 PID: 1 Comm: init Not tainted 5.10.0-rc3 #3
| Hardware name: linux,dummy-virt (DT)
| pstate: 804003c5 (Nzcv DAIF +PAN -UAO -TCO BTYPE=--)
| pc : check_flags.part.54+0x1dc/0x1f0
| lr : check_flags.part.54+0x1dc/0x1f0
| sp : ffff80001003bd80
| x29: ffff80001003bd80 x28: ffff66ce801e0000
| x27: 00000000ffffffff x26: 00000000000003c0
| x25: 0000000000000000 x24: ffffc31842527258
| x23: ffffc31842491368 x22: ffffc3184282d000
| x21: 0000000000000000 x20: 0000000000000001
| x19: ffffc318432ce000 x18: 0080000000000000
| x17: 0000000000000000 x16: ffffc31840f18a78
| x15: 0000000000000001 x14: ffffc3184285c810
| x13: 0000000000000001 x12: 0000000000000000
| x11: ffffc318415857a0 x10: ffffc318406614c0
| x9 : ffffc318415857a0 x8 : ffffc31841f1d000
| x7 : 647261685f706564 x6 : ffffc3183ff7c66c
| x5 : ffff66ce801e0000 x4 : 0000000000000000
| x3 : ffffc3183fe00000 x2 : ffffc31841500000
| x1 : e956dc24146b3500 x0 : 0000000000000000
| Call trace:
|  check_flags.part.54+0x1dc/0x1f0
|  lock_is_held_type+0x10c/0x188
|  rcu_read_lock_sched_held+0x70/0x98
|  __context_tracking_enter+0x310/0x350
|  context_tracking_enter.part.3+0x5c/0xc8
|  context_tracking_user_enter+0x6c/0x80
|  finish_ret_to_user+0x2c/0x13cr

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201130115950.22492-8-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/exception.h |  1 +
 arch/arm64/kernel/entry-common.c   | 40 +++++++++++++++++++++++++-------------
 arch/arm64/kernel/entry.S          | 35 +++++++++++++--------------------
 arch/arm64/kernel/syscall.c        |  1 -
 arch/arm64/mm/fault.c              | 22 ++++++++++-----------
 5 files changed, 51 insertions(+), 48 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index d69d53dd7be7..d579b2e6db7a 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -34,6 +34,7 @@ static inline u32 disr_to_esr(u64 disr)
 asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs);
 asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs);
 asmlinkage void enter_from_user_mode(void);
+asmlinkage void exit_to_user_mode(void);
 void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
 void do_undefinstr(struct pt_regs *regs);
 void do_bti(struct pt_regs *regs);
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 920da254be1d..49d1c1dd9baf 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -119,15 +119,25 @@ asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs)
 
 asmlinkage void noinstr enter_from_user_mode(void)
 {
+	lockdep_hardirqs_off(CALLER_ADDR0);
 	CT_WARN_ON(ct_state() != CONTEXT_USER);
 	user_exit_irqoff();
+	trace_hardirqs_off_finish();
+}
+
+asmlinkage void noinstr exit_to_user_mode(void)
+{
+	trace_hardirqs_on_prepare();
+	lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+	user_enter_irqoff();
+	lockdep_hardirqs_on(CALLER_ADDR0);
 }
 
 static void noinstr el0_da(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
 
-	user_exit_irqoff();
+	enter_from_user_mode();
 	local_daif_restore(DAIF_PROCCTX);
 	far = untagged_addr(far);
 	do_mem_abort(far, esr, regs);
@@ -145,35 +155,35 @@ static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr)
 	if (!is_ttbr0_addr(far))
 		arm64_apply_bp_hardening();
 
-	user_exit_irqoff();
+	enter_from_user_mode();
 	local_daif_restore(DAIF_PROCCTX);
 	do_mem_abort(far, esr, regs);
 }
 
 static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
 {
-	user_exit_irqoff();
+	enter_from_user_mode();
 	local_daif_restore(DAIF_PROCCTX);
 	do_fpsimd_acc(esr, regs);
 }
 
 static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
 {
-	user_exit_irqoff();
+	enter_from_user_mode();
 	local_daif_restore(DAIF_PROCCTX);
 	do_sve_acc(esr, regs);
 }
 
 static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
 {
-	user_exit_irqoff();
+	enter_from_user_mode();
 	local_daif_restore(DAIF_PROCCTX);
 	do_fpsimd_exc(esr, regs);
 }
 
 static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr)
 {
-	user_exit_irqoff();
+	enter_from_user_mode();
 	local_daif_restore(DAIF_PROCCTX);
 	do_sysinstr(esr, regs);
 }
@@ -185,35 +195,35 @@ static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr)
 	if (!is_ttbr0_addr(instruction_pointer(regs)))
 		arm64_apply_bp_hardening();
 
-	user_exit_irqoff();
+	enter_from_user_mode();
 	local_daif_restore(DAIF_PROCCTX);
 	do_sp_pc_abort(far, esr, regs);
 }
 
 static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr)
 {
-	user_exit_irqoff();
+	enter_from_user_mode();
 	local_daif_restore(DAIF_PROCCTX);
 	do_sp_pc_abort(regs->sp, esr, regs);
 }
 
 static void noinstr el0_undef(struct pt_regs *regs)
 {
-	user_exit_irqoff();
+	enter_from_user_mode();
 	local_daif_restore(DAIF_PROCCTX);
 	do_undefinstr(regs);
 }
 
 static void noinstr el0_bti(struct pt_regs *regs)
 {
-	user_exit_irqoff();
+	enter_from_user_mode();
 	local_daif_restore(DAIF_PROCCTX);
 	do_bti(regs);
 }
 
 static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr)
 {
-	user_exit_irqoff();
+	enter_from_user_mode();
 	local_daif_restore(DAIF_PROCCTX);
 	bad_el0_sync(regs, 0, esr);
 }
@@ -226,7 +236,7 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
 	if (system_uses_irq_prio_masking())
 		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
 
-	user_exit_irqoff();
+	enter_from_user_mode();
 	do_debug_exception(far, esr, regs);
 	local_daif_restore(DAIF_PROCCTX_NOIRQ);
 }
@@ -236,12 +246,13 @@ static void noinstr el0_svc(struct pt_regs *regs)
 	if (system_uses_irq_prio_masking())
 		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
 
+	enter_from_user_mode();
 	do_el0_svc(regs);
 }
 
 static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
 {
-	user_exit_irqoff();
+	enter_from_user_mode();
 	local_daif_restore(DAIF_PROCCTX);
 	do_ptrauth_fault(regs, esr);
 }
@@ -302,7 +313,7 @@ asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs)
 #ifdef CONFIG_COMPAT
 static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
 {
-	user_exit_irqoff();
+	enter_from_user_mode();
 	local_daif_restore(DAIF_PROCCTX);
 	do_cp15instr(esr, regs);
 }
@@ -312,6 +323,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs)
 	if (system_uses_irq_prio_masking())
 		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
 
+	enter_from_user_mode();
 	do_el0_svc_compat(regs);
 }
 
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 53e30750fc28..d72c818b019c 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -30,18 +30,18 @@
 #include <asm/unistd.h>
 
 /*
- * Context tracking subsystem.  Used to instrument transitions
- * between user and kernel mode.
+ * Context tracking and irqflag tracing need to instrument transitions between
+ * user and kernel mode.
  */
-	.macro ct_user_exit_irqoff
-#ifdef CONFIG_CONTEXT_TRACKING
+	.macro user_exit_irqoff
+#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS)
 	bl	enter_from_user_mode
 #endif
 	.endm
 
-	.macro ct_user_enter
-#ifdef CONFIG_CONTEXT_TRACKING
-	bl	context_tracking_user_enter
+	.macro user_enter_irqoff
+#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS)
+	bl	exit_to_user_mode
 #endif
 	.endm
 
@@ -298,9 +298,6 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
 alternative_else_nop_endif
 
 	ldp	x21, x22, [sp, #S_PC]		// load ELR, SPSR
-	.if	\el == 0
-	ct_user_enter
-	.endif
 
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
 alternative_if_not ARM64_HAS_PAN
@@ -700,21 +697,14 @@ SYM_CODE_START_LOCAL_NOALIGN(el0_irq)
 	kernel_entry 0
 el0_irq_naked:
 	gic_prio_irq_setup pmr=x20, tmp=x0
-	ct_user_exit_irqoff
+	user_exit_irqoff
 	enable_da_f
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_off
-#endif
-
 	tbz	x22, #55, 1f
 	bl	do_el0_irq_bp_hardening
 1:
 	irq_handler
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_on
-#endif
 	b	ret_to_user
 SYM_CODE_END(el0_irq)
 
@@ -733,7 +723,7 @@ SYM_CODE_START_LOCAL(el0_error)
 el0_error_naked:
 	mrs	x25, esr_el1
 	gic_prio_kentry_setup tmp=x2
-	ct_user_exit_irqoff
+	user_exit_irqoff
 	enable_dbg
 	mov	x0, sp
 	mov	x1, x25
@@ -748,10 +738,14 @@ SYM_CODE_END(el0_error)
 SYM_CODE_START_LOCAL(ret_to_user)
 	disable_daif
 	gic_prio_kentry_setup tmp=x3
+#ifdef CONFIG_TRACE_IRQFLAGS
+	bl	trace_hardirqs_off
+#endif
 	ldr	x19, [tsk, #TSK_TI_FLAGS]
 	and	x2, x19, #_TIF_WORK_MASK
 	cbnz	x2, work_pending
 finish_ret_to_user:
+	user_enter_irqoff
 	/* Ignore asynchronous tag check faults in the uaccess routines */
 	clear_mte_async_tcf
 	enable_step_tsk x19, x2
@@ -767,9 +761,6 @@ work_pending:
 	mov	x0, sp				// 'regs'
 	mov	x1, x19
 	bl	do_notify_resume
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_on		// enabled while in userspace
-#endif
 	ldr	x19, [tsk, #TSK_TI_FLAGS]	// re-check for single-step
 	b	finish_ret_to_user
 SYM_CODE_END(ret_to_user)
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index 13fe79f8e2db..f8f758e4a306 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -120,7 +120,6 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
 	 */
 
 	cortex_a76_erratum_1463225_svc_handler();
-	user_exit_irqoff();
 	local_daif_restore(DAIF_PROCCTX);
 
 	if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) {
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 1ee94002801f..1f450b784d2c 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -789,16 +789,14 @@ void __init hook_debug_fault_code(int nr,
  */
 static void debug_exception_enter(struct pt_regs *regs)
 {
-	/*
-	 * Tell lockdep we disabled irqs in entry.S. Do nothing if they were
-	 * already disabled to preserve the last enabled/disabled addresses.
-	 */
-	if (interrupts_enabled(regs))
-		trace_hardirqs_off();
+	if (!user_mode(regs)) {
+		/*
+		 * Tell lockdep we disabled irqs in entry.S. Do nothing if they were
+		 * already disabled to preserve the last enabled/disabled addresses.
+		 */
+		if (interrupts_enabled(regs))
+			trace_hardirqs_off();
 
-	if (user_mode(regs)) {
-		RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
-	} else {
 		/*
 		 * We might have interrupted pretty much anything.  In
 		 * fact, if we're a debug exception, we can even interrupt
@@ -819,8 +817,10 @@ static void debug_exception_exit(struct pt_regs *regs)
 {
 	preempt_enable_no_resched();
 
-	if (!user_mode(regs))
-		rcu_nmi_exit();
+	if (user_mode(regs))
+		return;
+
+	rcu_nmi_exit();
 
 	if (interrupts_enabled(regs))
 		trace_hardirqs_on();
-- 
cgit v1.2.3-70-g09d2


From 1ec2f2c05b2ab845d068bff29bd32dbfc6a6ad4c Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 30 Nov 2020 11:59:47 +0000
Subject: arm64: ptrace: prepare for EL1 irq/rcu tracking

Exceptions from EL1 may be taken when RCU isn't watching (e.g. in idle
sequences), or when the lockdep hardirqs transiently out-of-sync with
the hardware state (e.g. in the middle of local_irq_enable()). To
correctly handle these cases, we'll need to save/restore this state
across some exceptions taken from EL1.

A series of subsequent patches will update EL1 exception handlers to
handle this. In preparation for this, and to avoid dependencies between
those patches, this patch adds two new fields to struct pt_regs so that
exception handlers can track this state.

Note that this is placed in pt_regs as some entry/exit sequences such as
el1_irq are invoked from assembly, which makes it very difficult to add
a separate structure as with the irqentry_state used by x86. We can
separate this once more of the exception logic is moved to C. While the
fields only need to be bool, they are both made u64 to keep pt_regs
16-byte aligned.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201130115950.22492-9-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/ptrace.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 997cf8c8cd52..28c85b87b8cd 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -193,6 +193,10 @@ struct pt_regs {
 	/* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */
 	u64 pmr_save;
 	u64 stackframe[2];
+
+	/* Only valid for some EL1 exceptions. */
+	u64 lockdep_hardirqs;
+	u64 exit_rcu;
 };
 
 static inline bool in_syscall(struct pt_regs const *regs)
-- 
cgit v1.2.3-70-g09d2


From 7cd1ea1010acbede7eb87b6abb6198921fb36957 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 30 Nov 2020 11:59:48 +0000
Subject: arm64: entry: fix non-NMI kernel<->kernel transitions

There are periods in kernel mode when RCU is not watching and/or the
scheduler tick is disabled, but we can still take exceptions such as
interrupts. The arm64 exception handlers do not account for this, and
it's possible that RCU is not watching while an exception handler runs.

The x86/generic entry code handles this by ensuring that all (non-NMI)
kernel exception handlers call irqentry_enter() and irqentry_exit(),
which handle RCU, lockdep, and IRQ flag tracing. We can't yet move to
the generic entry code, and already hadnle the user<->kernel transitions
elsewhere, so we add new kernel<->kernel transition helpers alog the
lines of the generic entry code.

Since we now track interrupts becoming masked when an exception is
taken, local_daif_inherit() is modified to track interrupts becoming
re-enabled when the original context is inherited. To balance the
entry/exit paths, each handler masks all DAIF exceptions before
exit_to_kernel_mode().

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201130115950.22492-10-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/daifflags.h |  3 ++
 arch/arm64/kernel/entry-common.c   | 67 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 67 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
index ec213b4a1650..1c26d7baa67f 100644
--- a/arch/arm64/include/asm/daifflags.h
+++ b/arch/arm64/include/asm/daifflags.h
@@ -128,6 +128,9 @@ static inline void local_daif_inherit(struct pt_regs *regs)
 {
 	unsigned long flags = regs->pstate & DAIF_MASK;
 
+	if (interrupts_enabled(regs))
+		trace_hardirqs_on();
+
 	/*
 	 * We can't use local_daif_restore(regs->pstate) here as
 	 * system_has_prio_mask_debugging() won't restore the I bit if it can
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 49d1c1dd9baf..86622d8dd0d8 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -17,12 +17,58 @@
 #include <asm/mmu.h>
 #include <asm/sysreg.h>
 
+/*
+ * This is intended to match the logic in irqentry_enter(), handling the kernel
+ * mode transitions only.
+ */
+static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
+{
+	regs->exit_rcu = false;
+
+	if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
+		lockdep_hardirqs_off(CALLER_ADDR0);
+		rcu_irq_enter();
+		trace_hardirqs_off_finish();
+
+		regs->exit_rcu = true;
+		return;
+	}
+
+	lockdep_hardirqs_off(CALLER_ADDR0);
+	rcu_irq_enter_check_tick();
+	trace_hardirqs_off_finish();
+}
+
+/*
+ * This is intended to match the logic in irqentry_exit(), handling the kernel
+ * mode transitions only, and with preemption handled elsewhere.
+ */
+static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
+{
+	lockdep_assert_irqs_disabled();
+
+	if (interrupts_enabled(regs)) {
+		if (regs->exit_rcu) {
+			trace_hardirqs_on_prepare();
+			lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+			rcu_irq_exit();
+			lockdep_hardirqs_on(CALLER_ADDR0);
+			return;
+		}
+
+		trace_hardirqs_on();
+	} else {
+		if (regs->exit_rcu)
+			rcu_irq_exit();
+	}
+}
+
 asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
 {
 	if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
 		nmi_enter();
-
-	trace_hardirqs_off();
+	else
+		enter_from_kernel_mode(regs);
 }
 
 asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
@@ -30,36 +76,48 @@ asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
 	if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
 		nmi_exit();
 	else
-		trace_hardirqs_on();
+		exit_to_kernel_mode(regs);
 }
 
 static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
 
+	enter_from_kernel_mode(regs);
 	local_daif_inherit(regs);
 	far = untagged_addr(far);
 	do_mem_abort(far, esr, regs);
+	local_daif_mask();
+	exit_to_kernel_mode(regs);
 }
 
 static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
 
+	enter_from_kernel_mode(regs);
 	local_daif_inherit(regs);
 	do_sp_pc_abort(far, esr, regs);
+	local_daif_mask();
+	exit_to_kernel_mode(regs);
 }
 
 static void noinstr el1_undef(struct pt_regs *regs)
 {
+	enter_from_kernel_mode(regs);
 	local_daif_inherit(regs);
 	do_undefinstr(regs);
+	local_daif_mask();
+	exit_to_kernel_mode(regs);
 }
 
 static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr)
 {
+	enter_from_kernel_mode(regs);
 	local_daif_inherit(regs);
 	bad_mode(regs, 0, esr);
+	local_daif_mask();
+	exit_to_kernel_mode(regs);
 }
 
 static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
@@ -79,8 +137,11 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
 
 static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr)
 {
+	enter_from_kernel_mode(regs);
 	local_daif_inherit(regs);
 	do_ptrauth_fault(regs, esr);
+	local_daif_mask();
+	exit_to_kernel_mode(regs);
 }
 
 asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs)
-- 
cgit v1.2.3-70-g09d2


From f0cd5ac1e4c53cb691b3ed3cda1031e1c42153e2 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 30 Nov 2020 11:59:49 +0000
Subject: arm64: entry: fix NMI {user, kernel}->kernel transitions

Exceptions which can be taken at (almost) any time are consdiered to be
NMIs. On arm64 that includes:

* SDEI events
* GICv3 Pseudo-NMIs
* Kernel stack overflows
* Unexpected/unhandled exceptions

... but currently debug exceptions (BRKs, breakpoints, watchpoints,
single-step) are not considered NMIs.

As these can be taken at any time, kernel features (lockdep, RCU,
ftrace) may not be in a consistent kernel state. For example, we may
take an NMI from the idle code or partway through an entry/exit path.

While nmi_enter() and nmi_exit() handle most of this state, notably they
don't save/restore the lockdep state across an NMI being taken and
handled. When interrupts are enabled and an NMI is taken, lockdep may
see interrupts become disabled within the NMI code, but not see
interrupts become enabled when returning from the NMI, leaving lockdep
believing interrupts are disabled when they are actually disabled.

The x86 code handles this in idtentry_{enter,exit}_nmi(), which will
shortly be moved to the generic entry code. As we can't use either yet,
we copy the x86 approach in arm64-specific helpers. All the NMI
entrypoints are marked as noinstr to prevent any instrumentation
handling code being invoked before the state has been corrected.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201130115950.22492-11-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/exception.h |  2 ++
 arch/arm64/kernel/entry-common.c   | 34 ++++++++++++++++++++++++++++++++--
 arch/arm64/kernel/sdei.c           |  7 ++++---
 arch/arm64/kernel/traps.c          | 15 ++++++++++-----
 4 files changed, 48 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index d579b2e6db7a..0756191f44f6 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -35,6 +35,8 @@ asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs);
 asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs);
 asmlinkage void enter_from_user_mode(void);
 asmlinkage void exit_to_user_mode(void);
+void arm64_enter_nmi(struct pt_regs *regs);
+void arm64_exit_nmi(struct pt_regs *regs);
 void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
 void do_undefinstr(struct pt_regs *regs);
 void do_bti(struct pt_regs *regs);
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 86622d8dd0d8..6d70be0d3e8f 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -63,10 +63,40 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
 	}
 }
 
+void noinstr arm64_enter_nmi(struct pt_regs *regs)
+{
+	regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
+
+	__nmi_enter();
+	lockdep_hardirqs_off(CALLER_ADDR0);
+	lockdep_hardirq_enter();
+	rcu_nmi_enter();
+
+	trace_hardirqs_off_finish();
+	ftrace_nmi_enter();
+}
+
+void noinstr arm64_exit_nmi(struct pt_regs *regs)
+{
+	bool restore = regs->lockdep_hardirqs;
+
+	ftrace_nmi_exit();
+	if (restore) {
+		trace_hardirqs_on_prepare();
+		lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+	}
+
+	rcu_nmi_exit();
+	lockdep_hardirq_exit();
+	if (restore)
+		lockdep_hardirqs_on(CALLER_ADDR0);
+	__nmi_exit();
+}
+
 asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
 {
 	if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
-		nmi_enter();
+		arm64_enter_nmi(regs);
 	else
 		enter_from_kernel_mode(regs);
 }
@@ -74,7 +104,7 @@ asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
 asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
 {
 	if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
-		nmi_exit();
+		arm64_exit_nmi(regs);
 	else
 		exit_to_kernel_mode(regs);
 }
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index 7689f2031c0c..793c46d6a447 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -10,6 +10,7 @@
 #include <linux/uaccess.h>
 
 #include <asm/alternative.h>
+#include <asm/exception.h>
 #include <asm/kprobes.h>
 #include <asm/mmu.h>
 #include <asm/ptrace.h>
@@ -223,16 +224,16 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
 }
 
 
-asmlinkage __kprobes notrace unsigned long
+asmlinkage noinstr unsigned long
 __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
 {
 	unsigned long ret;
 
-	nmi_enter();
+	arm64_enter_nmi(regs);
 
 	ret = _sdei_handler(regs, arg);
 
-	nmi_exit();
+	arm64_exit_nmi(regs);
 
 	return ret;
 }
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 580c60afc39a..2059d8f43f55 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -34,6 +34,7 @@
 #include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
 #include <asm/esr.h>
+#include <asm/exception.h>
 #include <asm/extable.h>
 #include <asm/insn.h>
 #include <asm/kprobes.h>
@@ -753,8 +754,10 @@ const char *esr_get_class_string(u32 esr)
  * bad_mode handles the impossible case in the exception vector. This is always
  * fatal.
  */
-asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
+asmlinkage void notrace bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
 {
+	arm64_enter_nmi(regs);
+
 	console_verbose();
 
 	pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n",
@@ -786,7 +789,7 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
 DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
 	__aligned(16);
 
-asmlinkage void handle_bad_stack(struct pt_regs *regs)
+asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs)
 {
 	unsigned long tsk_stk = (unsigned long)current->stack;
 	unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr);
@@ -794,6 +797,8 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
 	unsigned int esr = read_sysreg(esr_el1);
 	unsigned long far = read_sysreg(far_el1);
 
+	arm64_enter_nmi(regs);
+
 	console_verbose();
 	pr_emerg("Insufficient stack space to handle exception!");
 
@@ -865,15 +870,15 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
 	}
 }
 
-asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
+asmlinkage void noinstr do_serror(struct pt_regs *regs, unsigned int esr)
 {
-	nmi_enter();
+	arm64_enter_nmi(regs);
 
 	/* non-RAS errors are not containable */
 	if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr))
 		arm64_serror_panic(regs, esr);
 
-	nmi_exit();
+	arm64_exit_nmi(regs);
 }
 
 /* GENERIC_BUG traps */
-- 
cgit v1.2.3-70-g09d2


From 2a9b3e6ac69a8bf177d8496a11e749e2dc72fa22 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 30 Nov 2020 11:59:50 +0000
Subject: arm64: entry: fix EL1 debug transitions

In debug_exception_enter() and debug_exception_exit() we trace hardirqs
on/off while RCU isn't guaranteed to be watching, and we don't save and
restore the hardirq state, and so may return with this having changed.

Handle this appropriately with new entry/exit helpers which do the bare
minimum to ensure this is appropriately maintained, without marking
debug exceptions as NMIs. These are placed in entry-common.c with the
other entry/exit helpers.

In future we'll want to reconsider whether some debug exceptions should
be NMIs, but this will require a significant refactoring, and for now
this should prevent issues with lockdep and RCU.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marins <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201130115950.22492-12-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/entry-common.c | 26 ++++++++++++++++++++++++++
 arch/arm64/mm/fault.c            | 25 -------------------------
 2 files changed, 26 insertions(+), 25 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 6d70be0d3e8f..70e0a7591245 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -150,6 +150,30 @@ static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr)
 	exit_to_kernel_mode(regs);
 }
 
+static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
+{
+	regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
+
+	lockdep_hardirqs_off(CALLER_ADDR0);
+	rcu_nmi_enter();
+
+	trace_hardirqs_off_finish();
+}
+
+static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
+{
+	bool restore = regs->lockdep_hardirqs;
+
+	if (restore) {
+		trace_hardirqs_on_prepare();
+		lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+	}
+
+	rcu_nmi_exit();
+	if (restore)
+		lockdep_hardirqs_on(CALLER_ADDR0);
+}
+
 static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
@@ -162,7 +186,9 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr)
 	if (system_uses_irq_prio_masking())
 		gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
 
+	arm64_enter_el1_dbg(regs);
 	do_debug_exception(far, esr, regs);
+	arm64_exit_el1_dbg(regs);
 }
 
 static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr)
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 1f450b784d2c..795d224f184f 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -789,23 +789,6 @@ void __init hook_debug_fault_code(int nr,
  */
 static void debug_exception_enter(struct pt_regs *regs)
 {
-	if (!user_mode(regs)) {
-		/*
-		 * Tell lockdep we disabled irqs in entry.S. Do nothing if they were
-		 * already disabled to preserve the last enabled/disabled addresses.
-		 */
-		if (interrupts_enabled(regs))
-			trace_hardirqs_off();
-
-		/*
-		 * We might have interrupted pretty much anything.  In
-		 * fact, if we're a debug exception, we can even interrupt
-		 * NMI processing. We don't want this code makes in_nmi()
-		 * to return true, but we need to notify RCU.
-		 */
-		rcu_nmi_enter();
-	}
-
 	preempt_disable();
 
 	/* This code is a bit fragile.  Test it. */
@@ -816,14 +799,6 @@ NOKPROBE_SYMBOL(debug_exception_enter);
 static void debug_exception_exit(struct pt_regs *regs)
 {
 	preempt_enable_no_resched();
-
-	if (user_mode(regs))
-		return;
-
-	rcu_nmi_exit();
-
-	if (interrupts_enabled(regs))
-		trace_hardirqs_on();
 }
 NOKPROBE_SYMBOL(debug_exception_exit);
 
-- 
cgit v1.2.3-70-g09d2


From 9e5344e0ffc33f4fee899f98b6939a0682b1d9c3 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 30 Nov 2020 17:07:09 +0000
Subject: arm64: mte: Fix typo in macro definition

UL in the definition of SYS_TFSR_EL1_TF1 was misspelled causing
compilation issues when trying to implement in kernel MTE async
mode.

Fix the macro correcting the typo.

Note: MTE async mode will be introduced with a future series.

Fixes: c058b1c4a5ea ("arm64: mte: system register definitions")
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20201130170709.22309-1-vincenzo.frascino@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/sysreg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 174817ba119c..c8ab9900293f 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -983,7 +983,7 @@
 #define SYS_TFSR_EL1_TF0_SHIFT	0
 #define SYS_TFSR_EL1_TF1_SHIFT	1
 #define SYS_TFSR_EL1_TF0	(UL(1) << SYS_TFSR_EL1_TF0_SHIFT)
-#define SYS_TFSR_EL1_TF1	(UK(2) << SYS_TFSR_EL1_TF1_SHIFT)
+#define SYS_TFSR_EL1_TF1	(UL(1) << SYS_TFSR_EL1_TF1_SHIFT)
 
 /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
 #define SYS_MPIDR_SAFE_VAL	(BIT(31))
-- 
cgit v1.2.3-70-g09d2


From 70e734fed740939704d1b3b76d6f2e6909698586 Mon Sep 17 00:00:00 2001
From: Robert Karszniewicz <r.karszniewicz@phytec.de>
Date: Fri, 20 Nov 2020 18:51:24 +0100
Subject: ARM: imx: Use correct SRC base address

Commit 4a4fb66119eb ("ARM: imx: Add missing of_node_put()") accidentally
forgot to rename a variable, which caused the wrong address to be used
and, in our case, the ULL getting falsely identified as ULZ.

Fixes: 4a4fb66119eb ("ARM: imx: Add missing of_node_put()")
Signed-off-by: Robert Karszniewicz <r.karszniewicz@phytec.de>
Reviewed-by: Fabio Estevam <festevam@gmail.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/mach-imx/anatop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/mach-imx/anatop.c b/arch/arm/mach-imx/anatop.c
index d841bed8664d..7bb47eb3fc07 100644
--- a/arch/arm/mach-imx/anatop.c
+++ b/arch/arm/mach-imx/anatop.c
@@ -136,7 +136,7 @@ void __init imx_init_revision_from_anatop(void)
 
 			src_np = of_find_compatible_node(NULL, NULL,
 						     "fsl,imx6ul-src");
-			src_base = of_iomap(np, 0);
+			src_base = of_iomap(src_np, 0);
 			of_node_put(src_np);
 			WARN_ON(!src_base);
 			sbmr2 = readl_relaxed(src_base + SRC_SBMR2);
-- 
cgit v1.2.3-70-g09d2


From 58d6bca5efc73235b0f84c0d53321737177c651e Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Mon, 30 Nov 2020 17:54:47 -0300
Subject: ARM: dts: imx6qdl-wandboard-revd1: Remove PAD_GPIO_6 from enetgrp

Since commit 8ad2d1dcce54 ("ARM: dts: imx6qdl-wandboard: Add OV5645 camera
support") the PAD_GPIO_6 is used for providing the camera sensor clock.

Remove it from the enetgrp to fix the following IOMXU conflict:

[    9.972414] imx6q-pinctrl 20e0000.pinctrl: pin MX6Q_PAD_GPIO_6 already requested by 2188000.ethernet; cannot claim for 1-003c
[    9.983857] imx6q-pinctrl 20e0000.pinctrl: pin-140 (1-003c) status -22
[    9.990514] imx6q-pinctrl 20e0000.pinctrl: could not request pin 140 (MX6Q_PAD_GPIO_6) from group ov5645grp  on device 20e0000.pinctrl

Fixes: 8ad2d1dcce54 ("ARM: dts: imx6qdl-wandboard: Add OV5645 camera support")
Signed-off-by: Fabio Estevam <festevam@gmail.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi b/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi
index 93909796885a..b9b698f72b26 100644
--- a/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi
@@ -166,7 +166,6 @@
 				MX6QDL_PAD_RGMII_RD2__RGMII_RD2		0x1b030
 				MX6QDL_PAD_RGMII_RD3__RGMII_RD3		0x1b030
 				MX6QDL_PAD_RGMII_RX_CTL__RGMII_RX_CTL	0x1b030
-				MX6QDL_PAD_GPIO_6__ENET_IRQ		0x000b1
 			>;
 		};
 
-- 
cgit v1.2.3-70-g09d2


From 19ba8fb810c60b46869acc9f455613de454e0fca Mon Sep 17 00:00:00 2001
From: Bernd Bauer <bernd.bauer@anton-paar.com>
Date: Thu, 26 Nov 2020 18:56:28 +0100
Subject: ARM: dts: imx6qdl-kontron-samx6i: fix I2C_PM scl pin

Use the correct pin for the i2c scl signal else we can't access the
SoM eeprom.

Fixes: 2a51f9dae13d ("ARM: dts: imx6qdl-kontron-samx6i: Add iMX6-based Kontron SMARC-sAMX6i module")
Signed-off-by: Bernd Bauer <bernd.bauer@anton-paar.com>
[m.felsch@pengutronix.de: Adapt commit message]
Signed-off-by: Marco Felsch <m.felsch@pengutronix.de>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi
index 265f5f3dbff6..24f793ca2886 100644
--- a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi
@@ -551,7 +551,7 @@
 
 	pinctrl_i2c3: i2c3grp {
 		fsl,pins = <
-			MX6QDL_PAD_GPIO_3__I2C3_SCL		0x4001b8b1
+			MX6QDL_PAD_GPIO_5__I2C3_SCL		0x4001b8b1
 			MX6QDL_PAD_GPIO_16__I2C3_SDA		0x4001b8b1
 		>;
 	};
-- 
cgit v1.2.3-70-g09d2


From c2b111e59a7be1534bbd62b3f8f933f714c5ba71 Mon Sep 17 00:00:00 2001
From: Heinrich Schuchardt <xypron.glpk@gmx.de>
Date: Sun, 29 Nov 2020 17:26:27 +0100
Subject: arm64: dts: allwinner: A64 Sopine: phy-mode rgmii-id

Since commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx
delay config") iSCSI booting fails on the Pine A64 LTS.

This patch changes the phy-mode to use internal delays both for RX and TX
as has been done for other boards affected by the same commit.

Fixes: bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config")
Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://lore.kernel.org/r/20201129162627.1244808-1-xypron.glpk@gmx.de
---
 arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
index 9ebb9e07fae3..d4069749d721 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
@@ -79,7 +79,7 @@
 &emac {
 	pinctrl-names = "default";
 	pinctrl-0 = <&rgmii_pins>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-handle = <&ext_rgmii_phy>;
 	phy-supply = <&reg_dc1sw>;
 	status = "okay";
-- 
cgit v1.2.3-70-g09d2


From d0c6707ca4235b78d06bcd62f0e24fbeac3e6d10 Mon Sep 17 00:00:00 2001
From: Heinrich Schuchardt <xypron.glpk@gmx.de>
Date: Sun, 29 Nov 2020 20:45:12 +0100
Subject: arm64: dts: allwinner: H5: NanoPi Neo Plus2: phy-mode rgmii-id

Since commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx
delay config") network is broken on the NanoPi Neo Plus2.

This patch changes the phy-mode to use internal delays both for RX and TX
as has been done for other boards affected by the same commit.

Fixes: bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config")
Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/20201129194512.1475586-1-xypron.glpk@gmx.de
---
 arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts
index 4f9ba53ffaae..9d93fe153689 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts
@@ -96,7 +96,7 @@
 	pinctrl-0 = <&emac_rgmii_pins>;
 	phy-supply = <&reg_gmac_3v3>;
 	phy-handle = <&ext_rgmii_phy>;
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	status = "okay";
 };
 
-- 
cgit v1.2.3-70-g09d2


From f54db39fbe40731c40aefdd3bc26e7d56d668c64 Mon Sep 17 00:00:00 2001
From: Greg Kurz <groug@kaod.org>
Date: Mon, 30 Nov 2020 13:19:27 +0100
Subject: KVM: PPC: Book3S HV: XIVE: Fix vCPU id sanity check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 062cfab7069f ("KVM: PPC: Book3S HV: XIVE: Make VP block size
configurable") updated kvmppc_xive_vcpu_id_valid() in a way that
allows userspace to trigger an assertion in skiboot and crash the host:

[  696.186248988,3] XIVE[ IC 08  ] eq_blk != vp_blk (0 vs. 1) for target 0x4300008c/0
[  696.186314757,0] Assert fail: hw/xive.c:2370:0
[  696.186342458,0] Aborting!
xive-kvCPU 0043 Backtrace:
 S: 0000000031e2b8f0 R: 0000000030013840   .backtrace+0x48
 S: 0000000031e2b990 R: 000000003001b2d0   ._abort+0x4c
 S: 0000000031e2ba10 R: 000000003001b34c   .assert_fail+0x34
 S: 0000000031e2ba90 R: 0000000030058984   .xive_eq_for_target.part.20+0xb0
 S: 0000000031e2bb40 R: 0000000030059fdc   .xive_setup_silent_gather+0x2c
 S: 0000000031e2bc20 R: 000000003005a334   .opal_xive_set_vp_info+0x124
 S: 0000000031e2bd20 R: 00000000300051a4   opal_entry+0x134
 --- OPAL call token: 0x8a caller R1: 0xc000001f28563850 ---

XIVE maintains the interrupt context state of non-dispatched vCPUs in
an internal VP structure. We allocate a bunch of those on startup to
accommodate all possible vCPUs. Each VP has an id, that we derive from
the vCPU id for efficiency:

static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server)
{
	return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
}

The KVM XIVE device used to allocate KVM_MAX_VCPUS VPs. This was
limitting the number of concurrent VMs because the VP space is
limited on the HW. Since most of the time, VMs run with a lot less
vCPUs, commit 062cfab7069f ("KVM: PPC: Book3S HV: XIVE: Make VP
block size configurable") gave the possibility for userspace to
tune the size of the VP block through the KVM_DEV_XIVE_NR_SERVERS
attribute.

The check in kvmppc_pack_vcpu_id() was changed from

	cpu < KVM_MAX_VCPUS * xive->kvm->arch.emul_smt_mode

to

	cpu < xive->nr_servers * xive->kvm->arch.emul_smt_mode

The previous check was based on the fact that the VP block had
KVM_MAX_VCPUS entries and that kvmppc_pack_vcpu_id() guarantees
that packed vCPU ids are below KVM_MAX_VCPUS. We've changed the
size of the VP block, but kvmppc_pack_vcpu_id() has nothing to
do with it and it certainly doesn't ensure that the packed vCPU
ids are below xive->nr_servers. kvmppc_xive_vcpu_id_valid() might
thus return true when the VM was configured with a non-standard
VSMT mode, even if the packed vCPU id is higher than what we
expect. We end up using an unallocated VP id, which confuses
OPAL. The assert in OPAL is probably abusive and should be
converted to a regular error that the kernel can handle, but
we shouldn't really use broken VP ids in the first place.

Fix kvmppc_xive_vcpu_id_valid() so that it checks the packed
vCPU id is below xive->nr_servers, which is explicitly what we
want.

Fixes: 062cfab7069f ("KVM: PPC: Book3S HV: XIVE: Make VP block size configurable")
Cc: stable@vger.kernel.org # v5.5+
Signed-off-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/160673876747.695514.1809676603724514920.stgit@bahia.lan
---
 arch/powerpc/kvm/book3s_xive.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 85215e79db42..a0ebc29f30b2 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -1214,12 +1214,9 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
 static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu)
 {
 	/* We have a block of xive->nr_servers VPs. We just need to check
-	 * raw vCPU ids are below the expected limit for this guest's
-	 * core stride ; kvmppc_pack_vcpu_id() will pack them down to an
-	 * index that can be safely used to compute a VP id that belongs
-	 * to the VP block.
+	 * packed vCPU ids are below that.
 	 */
-	return cpu < xive->nr_servers * xive->kvm->arch.emul_smt_mode;
+	return kvmppc_pack_vcpu_id(xive->kvm, cpu) < xive->nr_servers;
 }
 
 int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp)
-- 
cgit v1.2.3-70-g09d2


From 59612b24f78a0b61fe078ec9dff2e48e9cec52c0 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Thu, 19 Nov 2020 13:46:56 -0700
Subject: kbuild: Hoist '--orphan-handling' into Kconfig

Currently, '--orphan-handling=warn' is spread out across four different
architectures in their respective Makefiles, which makes it a little
unruly to deal with in case it needs to be disabled for a specific
linker version (in this case, ld.lld 10.0.1).

To make it easier to control this, hoist this warning into Kconfig and
the main Makefile so that disabling it is simpler, as the warning will
only be enabled in a couple places (main Makefile and a couple of
compressed boot folders that blow away LDFLAGS_vmlinx) and making it
conditional is easier due to Kconfig syntax. One small additional
benefit of this is saving a call to ld-option on incremental builds
because we will have already evaluated it for CONFIG_LD_ORPHAN_WARN.

To keep the list of supported architectures the same, introduce
CONFIG_ARCH_WANT_LD_ORPHAN_WARN, which an architecture can select to
gain this automatically after all of the sections are specified and size
asserted. A special thanks to Kees Cook for the help text on this
config.

Link: https://github.com/ClangBuiltLinux/linux/issues/1187
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 Makefile                          | 6 ++++++
 arch/Kconfig                      | 9 +++++++++
 arch/arm/Kconfig                  | 1 +
 arch/arm/Makefile                 | 4 ----
 arch/arm/boot/compressed/Makefile | 4 +++-
 arch/arm64/Kconfig                | 1 +
 arch/arm64/Makefile               | 4 ----
 arch/powerpc/Kconfig              | 1 +
 arch/powerpc/Makefile             | 1 -
 arch/x86/Kconfig                  | 1 +
 arch/x86/Makefile                 | 3 ---
 arch/x86/boot/compressed/Makefile | 4 +++-
 init/Kconfig                      | 5 +++++
 13 files changed, 30 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/Makefile b/Makefile
index ae1592c1f5d6..8327725e5d76 100644
--- a/Makefile
+++ b/Makefile
@@ -986,6 +986,12 @@ ifeq ($(CONFIG_RELR),y)
 LDFLAGS_vmlinux	+= --pack-dyn-relocs=relr
 endif
 
+# We never want expected sections to be placed heuristically by the
+# linker. All sections should be explicitly named in the linker script.
+ifdef CONFIG_LD_ORPHAN_WARN
+LDFLAGS_vmlinux += --orphan-handling=warn
+endif
+
 # Align the bit size of userspace programs with the kernel
 KBUILD_USERCFLAGS  += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS))
 KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS))
diff --git a/arch/Kconfig b/arch/Kconfig
index 56b6ccc0e32d..ba4e966484ab 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1028,6 +1028,15 @@ config HAVE_STATIC_CALL_INLINE
 	bool
 	depends on HAVE_STATIC_CALL
 
+config ARCH_WANT_LD_ORPHAN_WARN
+	bool
+	help
+	  An arch should select this symbol once all linker sections are explicitly
+	  included, size-asserted, or discarded in the linker scripts. This is
+	  important because we never want expected sections to be placed heuristically
+	  by the linker, since the locations of such sections can change between linker
+	  versions.
+
 source "kernel/gcov/Kconfig"
 
 source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index fe2f17eb2b50..002e0cf025f5 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -35,6 +35,7 @@ config ARM
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
 	select ARCH_WANT_IPC_PARSE_VERSION
+	select ARCH_WANT_LD_ORPHAN_WARN
 	select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
 	select BUILDTIME_TABLE_SORT if MMU
 	select CLONE_BACKWARDS
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 4d76eab2b22d..e15f76ca2887 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -16,10 +16,6 @@ LDFLAGS_vmlinux	+= --be8
 KBUILD_LDFLAGS_MODULE	+= --be8
 endif
 
-# We never want expected sections to be placed heuristically by the
-# linker. All sections should be explicitly named in the linker script.
-LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn)
-
 GZFLAGS		:=-9
 #KBUILD_CFLAGS	+=-pipe
 
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 47f001ca5499..e1567418a2b1 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -129,7 +129,9 @@ LDFLAGS_vmlinux += --no-undefined
 # Delete all temporary local symbols
 LDFLAGS_vmlinux += -X
 # Report orphan sections
-LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn)
+ifdef CONFIG_LD_ORPHAN_WARN
+LDFLAGS_vmlinux += --orphan-handling=warn
+endif
 # Next argument is a linker script
 LDFLAGS_vmlinux += -T
 
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1515f6f153a0..a6b5b7ef40ae 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -81,6 +81,7 @@ config ARM64
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
+	select ARCH_WANT_LD_ORPHAN_WARN
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARM_AMBA
 	select ARM_ARCH_TIMER
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 5789c2d18d43..6a87d592bd00 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -28,10 +28,6 @@ LDFLAGS_vmlinux	+= --fix-cortex-a53-843419
   endif
 endif
 
-# We never want expected sections to be placed heuristically by the
-# linker. All sections should be explicitly named in the linker script.
-LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn)
-
 ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS), y)
   ifneq ($(CONFIG_ARM64_LSE_ATOMICS), y)
 $(warning LSE atomics not supported by binutils)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e9f13fe08492..5181872f9452 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -152,6 +152,7 @@ config PPC
 	select ARCH_USE_QUEUED_SPINLOCKS	if PPC_QUEUED_SPINLOCKS
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
+	select ARCH_WANT_LD_ORPHAN_WARN
 	select ARCH_WEAK_RELEASE_ACQUIRE
 	select BINFMT_ELF
 	select BUILDTIME_TABLE_SORT
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index a4d56f0a41d9..d9eb0da845e1 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -123,7 +123,6 @@ endif
 LDFLAGS_vmlinux-y := -Bstatic
 LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie
 LDFLAGS_vmlinux	:= $(LDFLAGS_vmlinux-y)
-LDFLAGS_vmlinux += $(call ld-option,--orphan-handling=warn)
 
 ifdef CONFIG_PPC64
 ifeq ($(call cc-option-yn,-mcmodel=medium),y)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f6946b81f74a..fbf26e0f7a6a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -100,6 +100,7 @@ config X86
 	select ARCH_WANT_DEFAULT_BPF_JIT	if X86_64
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
 	select ARCH_WANT_HUGE_PMD_SHARE
+	select ARCH_WANT_LD_ORPHAN_WARN
 	select ARCH_WANTS_THP_SWAP		if X86_64
 	select BUILDTIME_TABLE_SORT
 	select CLKEVT_I8253
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 154259f18b8b..1bf21746f4ce 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -209,9 +209,6 @@ ifdef CONFIG_X86_64
 LDFLAGS_vmlinux += -z max-page-size=0x200000
 endif
 
-# We never want expected sections to be placed heuristically by the
-# linker. All sections should be explicitly named in the linker script.
-LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn)
 
 archscripts: scripts_basic
 	$(Q)$(MAKE) $(build)=arch/x86/tools relocs
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index ee249088cbfe..40b8fd375d52 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -61,7 +61,9 @@ KBUILD_LDFLAGS += $(call ld-option,--no-ld-generated-unwind-info)
 # Compressed kernel should be built as PIE since it may be loaded at any
 # address by the bootloader.
 LDFLAGS_vmlinux := -pie $(call ld-option, --no-dynamic-linker)
-LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn)
+ifdef CONFIG_LD_ORPHAN_WARN
+LDFLAGS_vmlinux += --orphan-handling=warn
+endif
 LDFLAGS_vmlinux += -T
 
 hostprogs	:= mkpiggy
diff --git a/init/Kconfig b/init/Kconfig
index c9446911cf41..92c58b45abb8 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1348,6 +1348,11 @@ config LD_DEAD_CODE_DATA_ELIMINATION
 	  present. This option is not well tested yet, so use at your
 	  own risk.
 
+config LD_ORPHAN_WARN
+	def_bool y
+	depends on ARCH_WANT_LD_ORPHAN_WARN
+	depends on $(ld-option,--orphan-handling=warn)
+
 config SYSCTL
 	bool
 
-- 
cgit v1.2.3-70-g09d2


From fae3a13d2a3d49a89391889808428cf1e72afbd7 Mon Sep 17 00:00:00 2001
From: Babu Moger <babu.moger@amd.com>
Date: Mon, 30 Nov 2020 09:57:20 -0600
Subject: x86/resctrl: Fix AMD L3 QOS CDP enable/disable

When the AMD QoS feature CDP (code and data prioritization) is enabled
or disabled, the CDP bit in MSR 0000_0C81 is written on one of the CPUs
in an L3 domain (core complex). That is not correct - the CDP bit needs
to be updated on all the logical CPUs in the domain.

This was not spelled out clearly in the spec earlier. The specification
has been updated and the updated document, "AMD64 Technology Platform
Quality of Service Extensions Publication # 56375 Revision: 1.02 Issue
Date: October 2020" is available now. Refer the section: Code and Data
Prioritization.

Fix the issue by adding a new flag arch_has_per_cpu_cfg in rdt_cache
data structure.

The documentation can be obtained at:
https://developer.amd.com/wp-content/resources/56375.pdf
Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537

 [ bp: Massage commit message. ]

Fixes: 4d05bf71f157 ("x86/resctrl: Introduce AMD QOS feature")
Signed-off-by: Babu Moger <babu.moger@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lkml.kernel.org/r/160675180380.15628.3309402017215002347.stgit@bmoger-ubuntu
---
 arch/x86/kernel/cpu/resctrl/core.c     | 4 ++++
 arch/x86/kernel/cpu/resctrl/internal.h | 3 +++
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 9 +++++++--
 3 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index e5f4ee8f4c3b..e8b5f1cf1ae8 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -570,6 +570,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
 
 	if (d) {
 		cpumask_set_cpu(cpu, &d->cpu_mask);
+		if (r->cache.arch_has_per_cpu_cfg)
+			rdt_domain_reconfigure_cdp(r);
 		return;
 	}
 
@@ -923,6 +925,7 @@ static __init void rdt_init_res_defs_intel(void)
 		    r->rid == RDT_RESOURCE_L2CODE) {
 			r->cache.arch_has_sparse_bitmaps = false;
 			r->cache.arch_has_empty_bitmaps = false;
+			r->cache.arch_has_per_cpu_cfg = false;
 		} else if (r->rid == RDT_RESOURCE_MBA) {
 			r->msr_base = MSR_IA32_MBA_THRTL_BASE;
 			r->msr_update = mba_wrmsr_intel;
@@ -943,6 +946,7 @@ static __init void rdt_init_res_defs_amd(void)
 		    r->rid == RDT_RESOURCE_L2CODE) {
 			r->cache.arch_has_sparse_bitmaps = true;
 			r->cache.arch_has_empty_bitmaps = true;
+			r->cache.arch_has_per_cpu_cfg = true;
 		} else if (r->rid == RDT_RESOURCE_MBA) {
 			r->msr_base = MSR_IA32_MBA_BW_BASE;
 			r->msr_update = mba_wrmsr_amd;
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 80fa997fae60..f65d3c0dbc41 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -360,6 +360,8 @@ struct msr_param {
  *			executing entities
  * @arch_has_sparse_bitmaps:	True if a bitmap like f00f is valid.
  * @arch_has_empty_bitmaps:	True if the '0' bitmap is valid.
+ * @arch_has_per_cpu_cfg:	True if QOS_CFG register for this cache
+ *				level has CPU scope.
  */
 struct rdt_cache {
 	unsigned int	cbm_len;
@@ -369,6 +371,7 @@ struct rdt_cache {
 	unsigned int	shareable_bits;
 	bool		arch_has_sparse_bitmaps;
 	bool		arch_has_empty_bitmaps;
+	bool		arch_has_per_cpu_cfg;
 };
 
 /**
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 6f4ca4bea625..f3418428682b 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -1909,8 +1909,13 @@ static int set_cache_qos_cfg(int level, bool enable)
 
 	r_l = &rdt_resources_all[level];
 	list_for_each_entry(d, &r_l->domains, list) {
-		/* Pick one CPU from each domain instance to update MSR */
-		cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
+		if (r_l->cache.arch_has_per_cpu_cfg)
+			/* Pick all the CPUs in the domain instance */
+			for_each_cpu(cpu, &d->cpu_mask)
+				cpumask_set_cpu(cpu, cpu_mask);
+		else
+			/* Pick one CPU from each domain instance to update MSR */
+			cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
 	}
 	cpu = get_cpu();
 	/* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
-- 
cgit v1.2.3-70-g09d2


From 5c646b7e1d8bcb12317426287c516dfa4c5171c2 Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Wed, 2 Dec 2020 04:10:32 +0800
Subject: KVM: arm64: Fix memory leak on stage2 update of a valid PTE

When installing a new leaf PTE onto an invalid ptep, we need to
get_page(ptep) to account for the new mapping.

However, simply updating a valid PTE shouldn't result in any
additional refcounting, as there is new mapping. This otherwise
results in a page being forever wasted.

Address this by fixing-up the refcount in stage2_map_walker_try_leaf()
if the PTE was already valid, balancing out the later get_page()
in stage2_map_walk_leaf().

Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
[maz: update commit message, add comment in the code]
Signed-off-by: Marc Zyngier <maz@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201201201034.116760-2-wangyanan55@huawei.com
---
 arch/arm64/kvm/hyp/pgtable.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 0271b4a3b9fe..2beba1dc40ec 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -470,6 +470,15 @@ static bool stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
 	if (!kvm_block_mapping_supported(addr, end, phys, level))
 		return false;
 
+	/*
+	 * If the PTE was already valid, drop the refcount on the table
+	 * early, as it will be bumped-up again in stage2_map_walk_leaf().
+	 * This ensures that the refcount stays constant across a valid to
+	 * valid PTE update.
+	 */
+	if (kvm_pte_valid(*ptep))
+		put_page(virt_to_page(ptep));
+
 	if (kvm_set_valid_leaf_pte(ptep, phys, data->attr, level))
 		goto out;
 
-- 
cgit v1.2.3-70-g09d2


From 3a0b870e3448302ca2ba703bea1b79b61c3f33c6 Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Wed, 2 Dec 2020 04:10:33 +0800
Subject: KVM: arm64: Fix handling of merging tables into a block entry

When dirty logging is enabled, we collapse block entries into tables
as necessary. If dirty logging gets canceled, we can end-up merging
tables back into block entries.

When this happens, we must not only free the non-huge page-table
pages but also invalidate all the TLB entries that can potentially
cover the block. Otherwise, we end-up with multiple possible translations
for the same physical page, which can legitimately result in a TLB
conflict.

To address this, replease the bogus invalidation by IPA with a full
VM invalidation. Although this is pretty heavy handed, it happens
very infrequently and saves a bunch of invalidations by IPA.

Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
[maz: fixup commit message]
Signed-off-by: Marc Zyngier <maz@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201201201034.116760-3-wangyanan55@huawei.com
---
 arch/arm64/kvm/hyp/pgtable.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 2beba1dc40ec..bdf8e55ed308 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -502,7 +502,13 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
 		return 0;
 
 	kvm_set_invalid_pte(ptep);
-	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, 0);
+
+	/*
+	 * Invalidate the whole stage-2, as we may have numerous leaf
+	 * entries below us which would otherwise need invalidating
+	 * individually.
+	 */
+	kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
 	data->anchor = ptep;
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 7d894834a305568a0168c55d4729216f5f8cb4e6 Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Wed, 2 Dec 2020 04:10:34 +0800
Subject: KVM: arm64: Add usage of stage 2 fault lookup level in
 user_mem_abort()

If we get a FSC_PERM fault, just using (logging_active && writable) to
determine calling kvm_pgtable_stage2_map(). There will be two more cases
we should consider.

(1) After logging_active is configged back to false from true. When we
get a FSC_PERM fault with write_fault and adjustment of hugepage is needed,
we should merge tables back to a block entry. This case is ignored by still
calling kvm_pgtable_stage2_relax_perms(), which will lead to an endless
loop and guest panic due to soft lockup.

(2) We use (FSC_PERM && logging_active && writable) to determine
collapsing a block entry into a table by calling kvm_pgtable_stage2_map().
But sometimes we may only need to relax permissions when trying to write
to a page other than a block.
In this condition,using kvm_pgtable_stage2_relax_perms() will be fine.

The ISS filed bit[1:0] in ESR_EL2 regesiter indicates the stage2 lookup
level at which a D-abort or I-abort occurred. By comparing granule of
the fault lookup level with vma_pagesize, we can strictly distinguish
conditions of calling kvm_pgtable_stage2_relax_perms() or
kvm_pgtable_stage2_map(), and the above two cases will be well considered.

Suggested-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201201201034.116760-4-wangyanan55@huawei.com
---
 arch/arm64/include/asm/esr.h         |  1 +
 arch/arm64/include/asm/kvm_emulate.h |  5 +++++
 arch/arm64/kvm/mmu.c                 | 11 +++++++++--
 3 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 22c81f1edda2..85a3e49f92f4 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -104,6 +104,7 @@
 /* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */
 #define ESR_ELx_FSC		(0x3F)
 #define ESR_ELx_FSC_TYPE	(0x3C)
+#define ESR_ELx_FSC_LEVEL	(0x03)
 #define ESR_ELx_FSC_EXTABT	(0x10)
 #define ESR_ELx_FSC_SERROR	(0x11)
 #define ESR_ELx_FSC_ACCESS	(0x08)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 5ef2669ccd6c..00bc6f1234ba 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -350,6 +350,11 @@ static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vc
 	return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE;
 }
 
+static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL;
+}
+
 static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
 {
 	switch (kvm_vcpu_trap_get_fault(vcpu)) {
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 1a01da9fdc99..75814a02d189 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -754,10 +754,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	gfn_t gfn;
 	kvm_pfn_t pfn;
 	bool logging_active = memslot_is_logging(memslot);
-	unsigned long vma_pagesize;
+	unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
+	unsigned long vma_pagesize, fault_granule;
 	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
 	struct kvm_pgtable *pgt;
 
+	fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level);
 	write_fault = kvm_is_write_fault(vcpu);
 	exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
 	VM_BUG_ON(write_fault && exec_fault);
@@ -896,7 +898,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	else if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
 		prot |= KVM_PGTABLE_PROT_X;
 
-	if (fault_status == FSC_PERM && !(logging_active && writable)) {
+	/*
+	 * Under the premise of getting a FSC_PERM fault, we just need to relax
+	 * permissions only if vma_pagesize equals fault_granule. Otherwise,
+	 * kvm_pgtable_stage2_map() should be called to change block size.
+	 */
+	if (fault_status == FSC_PERM && vma_pagesize == fault_granule) {
 		ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
 	} else {
 		ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
-- 
cgit v1.2.3-70-g09d2


From a1ee28117077c3bf24e5ab6324c835eaab629c45 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 28 Nov 2020 17:07:21 +1000
Subject: powerpc/64s/powernv: Fix memory corruption when saving SLB entries on
 MCE

This can be hit by an HPT guest running on an HPT host and bring down
the host, so it's quite important to fix.

Fixes: 7290f3b3d3e6 ("powerpc/64s/powernv: machine check dump SLB contents")
Cc: stable@vger.kernel.org # v5.4+
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Acked-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201128070728.825934-2-npiggin@gmail.com
---
 arch/powerpc/platforms/powernv/setup.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 46115231a3b2..4426a109ec2f 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -211,11 +211,16 @@ static void __init pnv_init(void)
 		add_preferred_console("hvc", 0, NULL);
 
 	if (!radix_enabled()) {
+		size_t size = sizeof(struct slb_entry) * mmu_slb_size;
 		int i;
 
 		/* Allocate per cpu area to save old slb contents during MCE */
-		for_each_possible_cpu(i)
-			paca_ptrs[i]->mce_faulty_slbs = memblock_alloc_node(mmu_slb_size, __alignof__(*paca_ptrs[i]->mce_faulty_slbs), cpu_to_node(i));
+		for_each_possible_cpu(i) {
+			paca_ptrs[i]->mce_faulty_slbs =
+					memblock_alloc_node(size,
+						__alignof__(struct slb_entry),
+						cpu_to_node(i));
+		}
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From a2bd4097b3ec242f4de4924db463a9c94530e03a Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Thu, 26 Nov 2020 18:00:37 +0100
Subject: s390/pci: fix CPU address in MSI for directed IRQ

The directed MSIs are delivered to CPUs whose address is
written to the MSI message address. The current code assumes
that a CPU logical number (as it is seen by the kernel)
is also the CPU address.

The above assumption is not correct, as the CPU address
is rather the value returned by STAP instruction. That
value does not necessarily match the kernel logical CPU
number.

Fixes: e979ce7bced2 ("s390/pci: provide support for CPU directed interrupts")
Cc: <stable@vger.kernel.org> # v5.2+
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.ibm.com>
Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/pci/pci_irq.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 743f257cf2cb..75217fb63d7b 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -103,9 +103,10 @@ static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *de
 {
 	struct msi_desc *entry = irq_get_msi_desc(data->irq);
 	struct msi_msg msg = entry->msg;
+	int cpu_addr = smp_cpu_get_cpu_address(cpumask_first(dest));
 
 	msg.address_lo &= 0xff0000ff;
-	msg.address_lo |= (cpumask_first(dest) << 8);
+	msg.address_lo |= (cpu_addr << 8);
 	pci_write_msi_msg(data->irq, &msg);
 
 	return IRQ_SET_MASK_OK;
@@ -238,6 +239,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 	unsigned long bit;
 	struct msi_desc *msi;
 	struct msi_msg msg;
+	int cpu_addr;
 	int rc, irq;
 
 	zdev->aisb = -1UL;
@@ -287,9 +289,15 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 					 handle_percpu_irq);
 		msg.data = hwirq - bit;
 		if (irq_delivery == DIRECTED) {
+			if (msi->affinity)
+				cpu = cpumask_first(&msi->affinity->mask);
+			else
+				cpu = 0;
+			cpu_addr = smp_cpu_get_cpu_address(cpu);
+
 			msg.address_lo = zdev->msi_addr & 0xff0000ff;
-			msg.address_lo |= msi->affinity ?
-				(cpumask_first(&msi->affinity->mask) << 8) : 0;
+			msg.address_lo |= (cpu_addr << 8);
+
 			for_each_possible_cpu(cpu) {
 				airq_iv_set_data(zpci_ibv[cpu], hwirq, irq);
 			}
-- 
cgit v1.2.3-70-g09d2


From b1cae1f84a0f609a34ebcaa087fbecef32f69882 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Wed, 2 Dec 2020 11:46:01 +0100
Subject: s390: fix irq state tracing

With commit 58c644ba512c ("sched/idle: Fix arch_cpu_idle() vs
tracing") common code calls arch_cpu_idle() with a lockdep state that
tells irqs are on.

This doesn't work very well for s390: psw_idle() will enable interrupts
to wait for an interrupt. As soon as an interrupt occurs the interrupt
handler will verify if the old context was psw_idle(). If that is the
case the interrupt enablement bits in the old program status word will
be cleared.

A subsequent test in both the external as well as the io interrupt
handler checks if in the old context interrupts were enabled. Due to
the above patching of the old program status word it is assumed the
old context had interrupts disabled, and therefore a call to
TRACE_IRQS_OFF (aka trace_hardirqs_off_caller) is skipped. Which in
turn makes lockdep incorrectly "think" that interrupts are enabled
within the interrupt handler.

Fix this by unconditionally calling TRACE_IRQS_OFF when entering
interrupt handlers. Also call unconditionally TRACE_IRQS_ON when
leaving interrupts handlers.

This leaves the special psw_idle() case, which now returns with
interrupts disabled, but has an "irqs on" lockdep state. So callers of
psw_idle() must adjust the state on their own, if required. This is
currently only __udelay_disabled().

Fixes: 58c644ba512c ("sched/idle: Fix arch_cpu_idle() vs tracing")
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kernel/entry.S | 15 ---------------
 arch/s390/lib/delay.c    |  5 ++---
 2 files changed, 2 insertions(+), 18 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 26bb0603c5a1..92beb1444644 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -763,12 +763,7 @@ ENTRY(io_int_handler)
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	TSTMSK	__LC_CPU_FLAGS,_CIF_IGNORE_IRQ
 	jo	.Lio_restore
-#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS)
-	tmhh	%r8,0x300
-	jz	1f
 	TRACE_IRQS_OFF
-1:
-#endif
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 .Lio_loop:
 	lgr	%r2,%r11		# pass pointer to pt_regs
@@ -791,12 +786,7 @@ ENTRY(io_int_handler)
 	TSTMSK	__LC_CPU_FLAGS,_CIF_WORK
 	jnz	.Lio_work
 .Lio_restore:
-#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS)
-	tm	__PT_PSW(%r11),3
-	jno	0f
 	TRACE_IRQS_ON
-0:
-#endif
 	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
 	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
 	jno	.Lio_exit_kernel
@@ -976,12 +966,7 @@ ENTRY(ext_int_handler)
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	TSTMSK	__LC_CPU_FLAGS,_CIF_IGNORE_IRQ
 	jo	.Lio_restore
-#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS)
-	tmhh	%r8,0x300
-	jz	1f
 	TRACE_IRQS_OFF
-1:
-#endif
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	lghi	%r3,EXT_INTERRUPT
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index daca7bad66de..8c0c68e7770e 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -33,7 +33,7 @@ EXPORT_SYMBOL(__delay);
 
 static void __udelay_disabled(unsigned long long usecs)
 {
-	unsigned long cr0, cr0_new, psw_mask, flags;
+	unsigned long cr0, cr0_new, psw_mask;
 	struct s390_idle_data idle;
 	u64 end;
 
@@ -45,9 +45,8 @@ static void __udelay_disabled(unsigned long long usecs)
 	psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT;
 	set_clock_comparator(end);
 	set_cpu_flag(CIF_IGNORE_IRQ);
-	local_irq_save(flags);
 	psw_idle(&idle, psw_mask);
-	local_irq_restore(flags);
+	trace_hardirqs_off();
 	clear_cpu_flag(CIF_IGNORE_IRQ);
 	set_clock_comparator(S390_lowcore.clock_comparator);
 	__ctl_load(cr0, 0, 0);
-- 
cgit v1.2.3-70-g09d2


From 5debf02131227d39988e44adf5090fb796fa8466 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 26 Nov 2020 20:09:21 +0900
Subject: perf/x86/intel: Fix a warning on x86_pmu_stop() with large PEBS

The commit 3966c3feca3f ("x86/perf/amd: Remove need to check "running"
bit in NMI handler") introduced this.  It seems x86_pmu_stop can be
called recursively (like when it losts some samples) like below:

  x86_pmu_stop
    intel_pmu_disable_event  (x86_pmu_disable)
      intel_pmu_pebs_disable
        intel_pmu_drain_pebs_nhm  (x86_pmu_drain_pebs_buffer)
          x86_pmu_stop

While commit 35d1ce6bec13 ("perf/x86/intel/ds: Fix x86_pmu_stop
warning for large PEBS") fixed it for the normal cases, there's
another path to call x86_pmu_stop() recursively when a PEBS error was
detected (like two or more counters overflowed at the same time).

Like in the Kan's previous fix, we can skip the interrupt accounting
for large PEBS, so check the iregs which is set for PMI only.

Fixes: 3966c3feca3f ("x86/perf/amd: Remove need to check "running" bit in NMI handler")
Reported-by: John Sperbeck <jsperbeck@google.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20201126110922.317681-1-namhyung@kernel.org
---
 arch/x86/events/intel/ds.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index b47cc4226934..89dba588636e 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1940,7 +1940,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
 		if (error[bit]) {
 			perf_log_lost_samples(event, error[bit]);
 
-			if (perf_event_account_interrupt(event))
+			if (iregs && perf_event_account_interrupt(event))
 				x86_pmu_stop(event, 0);
 		}
 
-- 
cgit v1.2.3-70-g09d2


From fc17db8aa4c53cbd2d5469bb0521ea0f0a6dbb27 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 26 Nov 2020 20:09:22 +0900
Subject: perf/x86/intel: Check PEBS status correctly

The kernel cannot disambiguate when 2+ PEBS counters overflow at the
same time. This is what the comment for this code suggests.  However,
I see the comparison is done with the unfiltered p->status which is a
copy of IA32_PERF_GLOBAL_STATUS at the time of the sample. This
register contains more than the PEBS counter overflow bits. It also
includes many other bits which could also be set.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20201126110922.317681-2-namhyung@kernel.org
---
 arch/x86/events/intel/ds.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 89dba588636e..485c5066f8b8 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1916,7 +1916,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
 		 * that caused the PEBS record. It's called collision.
 		 * If collision happened, the record will be dropped.
 		 */
-		if (p->status != (1ULL << bit)) {
+		if (pebs_status != (1ULL << bit)) {
 			for_each_set_bit(i, (unsigned long *)&pebs_status, size)
 				error[i]++;
 			continue;
-- 
cgit v1.2.3-70-g09d2


From 8dcc0e19dfbd73ad6b3172924d6da8f7f3f8b3b0 Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Thu, 3 Dec 2020 09:22:52 -0600
Subject: x86/platform/uv: Fix UV4 hub revision adjustment

Currently, UV4 is incorrectly identified as UV4A and UV4A as UV5. Hub
chip starts with revision 1, fix it.

 [ bp: Massage commit message. ]

Fixes: 647128f1536e ("x86/platform/uv: Update UV MMRs for UV5")
Signed-off-by: Mike Travis <mike.travis@hpe.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Steve Wahl <steve.wahl@hpe.com>
Acked-by: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Link: https://lkml.kernel.org/r/20201203152252.371199-1-mike.travis@hpe.com
---
 arch/x86/kernel/apic/x2apic_uv_x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 1b98f8c12b96..235f5cde06fc 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -161,7 +161,7 @@ static int __init early_set_hub_type(void)
 	/* UV4/4A only have a revision difference */
 	case UV4_HUB_PART_NUMBER:
 		uv_min_hub_revision_id = node_id.s.revision
-					 + UV4_HUB_REVISION_BASE;
+					 + UV4_HUB_REVISION_BASE - 1;
 		uv_hub_type_set(UV4);
 		if (uv_min_hub_revision_id == UV4A_HUB_REVISION_BASE)
 			uv_hub_type_set(UV4|UV4A);
-- 
cgit v1.2.3-70-g09d2


From a2b2d4bf5076832339762556b816eec58ca38f77 Mon Sep 17 00:00:00 2001
From: Jacob Xu <jacobhxu@google.com>
Date: Thu, 3 Dec 2020 12:59:39 -0800
Subject: kvm: svm: de-allocate svm_cpu_data for all cpus in svm_cpu_uninit()

The cpu arg for svm_cpu_uninit() was previously ignored resulting in the
per cpu structure svm_cpu_data not being de-allocated for all cpus.

Signed-off-by: Jacob Xu <jacobhxu@google.com>
Message-Id: <20201203205939.1783969-1-jacobhxu@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 79b3a564f1c9..da7eb4aaf44f 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -530,12 +530,12 @@ static int svm_hardware_enable(void)
 
 static void svm_cpu_uninit(int cpu)
 {
-	struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
+	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
 
 	if (!sd)
 		return;
 
-	per_cpu(svm_data, raw_smp_processor_id()) = NULL;
+	per_cpu(svm_data, cpu) = NULL;
 	kfree(sd->sev_vmcbs);
 	__free_page(sd->save_area);
 	kfree(sd);
-- 
cgit v1.2.3-70-g09d2


From 339f5a7fb2d6350fdb11f067da5240fd97e4f284 Mon Sep 17 00:00:00 2001
From: Rick Edgecombe <rick.p.edgecombe@intel.com>
Date: Thu, 3 Dec 2020 15:11:20 -0800
Subject: kvm: x86/mmu: Use cpuid to determine max gfn

In the TDP MMU, use shadow_phys_bits to dermine the maximum possible GFN
mapped in the guest for zapping operations. boot_cpu_data.x86_phys_bits
may be reduced in the case of HW features that steal HPA bits for other
purposes. However, this doesn't necessarily reduce GPA space that can be
accessed via TDP. So zap based on a maximum gfn calculated with MAXPHYADDR
retrieved from CPUID. This is already stored in shadow_phys_bits, so use
it instead of x86_phys_bits.

Fixes: faaf05b00aec ("kvm: x86/mmu: Support zapping SPTEs in the TDP MMU")
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Message-Id: <20201203231120.27307-1-rick.p.edgecombe@intel.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index ff28a5c6abd6..84c8f06bec26 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -66,7 +66,7 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 
 void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root)
 {
-	gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT);
+	gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
 
 	lockdep_assert_held(&kvm->mmu_lock);
 
@@ -456,7 +456,7 @@ bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end)
 
 void kvm_tdp_mmu_zap_all(struct kvm *kvm)
 {
-	gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT);
+	gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
 	bool flush;
 
 	flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn);
-- 
cgit v1.2.3-70-g09d2


From 45c5775460f32ed8cdb7c16986ae1a2c254346b3 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 30 Nov 2020 09:30:33 +0100
Subject: usb: ohci-omap: Fix descriptor conversion

There were a bunch of issues with the patch converting the
OMAP1 OSK board to use descriptors for controlling the USB
host:

- The chip label was incorrect
- The GPIO offset was off-by-one
- The code should use sleeping accessors

This patch tries to fix all issues at the same time.

Cc: Aaro Koskinen <aaro.koskinen@iki.fi>
Reported-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Fixes: 15d157e87443 ("usb: ohci-omap: Convert to use GPIO descriptors")
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20201130083033.29435-1-linus.walleij@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap1/board-osk.c | 2 +-
 drivers/usb/host/ohci-omap.c    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c
index 144b9caa935c..a720259099ed 100644
--- a/arch/arm/mach-omap1/board-osk.c
+++ b/arch/arm/mach-omap1/board-osk.c
@@ -288,7 +288,7 @@ static struct gpiod_lookup_table osk_usb_gpio_table = {
 	.dev_id = "ohci",
 	.table = {
 		/* Power GPIO on the I2C-attached TPS65010 */
-		GPIO_LOOKUP("i2c-tps65010", 1, "power", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("tps65010", 0, "power", GPIO_ACTIVE_HIGH),
 		GPIO_LOOKUP(OMAP_GPIO_LABEL, 9, "overcurrent",
 			    GPIO_ACTIVE_HIGH),
 	},
diff --git a/drivers/usb/host/ohci-omap.c b/drivers/usb/host/ohci-omap.c
index 9ccdf2c216b5..6374501ba139 100644
--- a/drivers/usb/host/ohci-omap.c
+++ b/drivers/usb/host/ohci-omap.c
@@ -91,14 +91,14 @@ static int omap_ohci_transceiver_power(struct ohci_omap_priv *priv, int on)
 				| ((1 << 5/*usb1*/) | (1 << 3/*usb2*/)),
 			       INNOVATOR_FPGA_CAM_USB_CONTROL);
 		else if (priv->power)
-			gpiod_set_value(priv->power, 0);
+			gpiod_set_value_cansleep(priv->power, 0);
 	} else {
 		if (machine_is_omap_innovator() && cpu_is_omap1510())
 			__raw_writeb(__raw_readb(INNOVATOR_FPGA_CAM_USB_CONTROL)
 				& ~((1 << 5/*usb1*/) | (1 << 3/*usb2*/)),
 			       INNOVATOR_FPGA_CAM_USB_CONTROL);
 		else if (priv->power)
-			gpiod_set_value(priv->power, 1);
+			gpiod_set_value_cansleep(priv->power, 1);
 	}
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 4e9a5ae8df5b3365183150f6df49e49dece80d8c Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 3 Dec 2020 13:50:37 +0900
Subject: x86/uprobes: Do not use prefixes.nbytes when looping over
 prefixes.bytes

Since insn.prefixes.nbytes can be bigger than the size of
insn.prefixes.bytes[] when a prefix is repeated, the proper check must
be

  insn.prefixes.bytes[i] != 0 and i < 4

instead of using insn.prefixes.nbytes.

Introduce a for_each_insn_prefix() macro for this purpose. Debugged by
Kees Cook <keescook@chromium.org>.

 [ bp: Massage commit message, sync with the respective header in tools/
   and drop "we". ]

Fixes: 2b1444983508 ("uprobes, mm, x86: Add the ability to install and remove uprobes breakpoints")
Reported-by: syzbot+9b64b619f10f19d19a7c@syzkaller.appspotmail.com
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/160697103739.3146288.7437620795200799020.stgit@devnote2
---
 arch/x86/include/asm/insn.h       | 15 +++++++++++++++
 arch/x86/kernel/uprobes.c         | 10 ++++++----
 tools/arch/x86/include/asm/insn.h | 15 +++++++++++++++
 3 files changed, 36 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 5c1ae3eff9d4..a8c3d284fa46 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -201,6 +201,21 @@ static inline int insn_offset_immediate(struct insn *insn)
 	return insn_offset_displacement(insn) + insn->displacement.nbytes;
 }
 
+/**
+ * for_each_insn_prefix() -- Iterate prefixes in the instruction
+ * @insn: Pointer to struct insn.
+ * @idx:  Index storage.
+ * @prefix: Prefix byte.
+ *
+ * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
+ * and the index is stored in @idx (note that this @idx is just for a cursor,
+ * do not change it.)
+ * Since prefixes.nbytes can be bigger than 4 if some prefixes
+ * are repeated, it cannot be used for looping over the prefixes.
+ */
+#define for_each_insn_prefix(insn, idx, prefix)	\
+	for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
+
 #define POP_SS_OPCODE 0x1f
 #define MOV_SREG_OPCODE 0x8e
 
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 3fdaa042823d..138bdb1fd136 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -255,12 +255,13 @@ static volatile u32 good_2byte_insns[256 / 32] = {
 
 static bool is_prefix_bad(struct insn *insn)
 {
+	insn_byte_t p;
 	int i;
 
-	for (i = 0; i < insn->prefixes.nbytes; i++) {
+	for_each_insn_prefix(insn, i, p) {
 		insn_attr_t attr;
 
-		attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]);
+		attr = inat_get_opcode_attribute(p);
 		switch (attr) {
 		case INAT_MAKE_PREFIX(INAT_PFX_ES):
 		case INAT_MAKE_PREFIX(INAT_PFX_CS):
@@ -715,6 +716,7 @@ static const struct uprobe_xol_ops push_xol_ops = {
 static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
 {
 	u8 opc1 = OPCODE1(insn);
+	insn_byte_t p;
 	int i;
 
 	switch (opc1) {
@@ -746,8 +748,8 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
 	 * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix.
 	 * No one uses these insns, reject any branch insns with such prefix.
 	 */
-	for (i = 0; i < insn->prefixes.nbytes; i++) {
-		if (insn->prefixes.bytes[i] == 0x66)
+	for_each_insn_prefix(insn, i, p) {
+		if (p == 0x66)
 			return -ENOTSUPP;
 	}
 
diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h
index 568854b14d0a..52c6262e6bfd 100644
--- a/tools/arch/x86/include/asm/insn.h
+++ b/tools/arch/x86/include/asm/insn.h
@@ -201,6 +201,21 @@ static inline int insn_offset_immediate(struct insn *insn)
 	return insn_offset_displacement(insn) + insn->displacement.nbytes;
 }
 
+/**
+ * for_each_insn_prefix() -- Iterate prefixes in the instruction
+ * @insn: Pointer to struct insn.
+ * @idx:  Index storage.
+ * @prefix: Prefix byte.
+ *
+ * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
+ * and the index is stored in @idx (note that this @idx is just for a cursor,
+ * do not change it.)
+ * Since prefixes.nbytes can be bigger than 4 if some prefixes
+ * are repeated, it cannot be used for looping over the prefixes.
+ */
+#define for_each_insn_prefix(insn, idx, prefix)	\
+	for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
+
 #define POP_SS_OPCODE 0x1f
 #define MOV_SREG_OPCODE 0x8e
 
-- 
cgit v1.2.3-70-g09d2


From 12cb908a11b2544b5f53e9af856e6b6a90ed5533 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 3 Dec 2020 13:50:50 +0900
Subject: x86/insn-eval: Use new for_each_insn_prefix() macro to loop over
 prefixes bytes

Since insn.prefixes.nbytes can be bigger than the size of
insn.prefixes.bytes[] when a prefix is repeated, the proper check must
be

  insn.prefixes.bytes[i] != 0 and i < 4

instead of using insn.prefixes.nbytes. Use the new
for_each_insn_prefix() macro which does it correctly.

Debugged by Kees Cook <keescook@chromium.org>.

 [ bp: Massage commit message. ]

Fixes: 32d0b95300db ("x86/insn-eval: Add utility functions to get segment selector")
Reported-by: syzbot+9b64b619f10f19d19a7c@syzkaller.appspotmail.com
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/160697104969.3146288.16329307586428270032.stgit@devnote2
---
 arch/x86/lib/insn-eval.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 58f7fb95c7f4..4229950a5d78 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -63,13 +63,12 @@ static bool is_string_insn(struct insn *insn)
  */
 bool insn_has_rep_prefix(struct insn *insn)
 {
+	insn_byte_t p;
 	int i;
 
 	insn_get_prefixes(insn);
 
-	for (i = 0; i < insn->prefixes.nbytes; i++) {
-		insn_byte_t p = insn->prefixes.bytes[i];
-
+	for_each_insn_prefix(insn, i, p) {
 		if (p == 0xf2 || p == 0xf3)
 			return true;
 	}
@@ -95,14 +94,15 @@ static int get_seg_reg_override_idx(struct insn *insn)
 {
 	int idx = INAT_SEG_REG_DEFAULT;
 	int num_overrides = 0, i;
+	insn_byte_t p;
 
 	insn_get_prefixes(insn);
 
 	/* Look for any segment override prefixes. */
-	for (i = 0; i < insn->prefixes.nbytes; i++) {
+	for_each_insn_prefix(insn, i, p) {
 		insn_attr_t attr;
 
-		attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]);
+		attr = inat_get_opcode_attribute(p);
 		switch (attr) {
 		case INAT_MAKE_PREFIX(INAT_PFX_CS):
 			idx = INAT_SEG_REG_CS;
-- 
cgit v1.2.3-70-g09d2


From 84da009f06e60cf59d5e861f8e2101d2d3885517 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 3 Dec 2020 13:51:01 +0900
Subject: x86/sev-es: Use new for_each_insn_prefix() macro to loop over
 prefixes bytes

Since insn.prefixes.nbytes can be bigger than the size of
insn.prefixes.bytes[] when a prefix is repeated, the proper
check must be:

  insn.prefixes.bytes[i] != 0 and i < 4

instead of using insn.prefixes.nbytes. Use the new
for_each_insn_prefix() macro which does it correctly.

Debugged by Kees Cook <keescook@chromium.org>.

 [ bp: Massage commit message. ]

Fixes: 25189d08e516 ("x86/sev-es: Add support for handling IOIO exceptions")
Reported-by: syzbot+9b64b619f10f19d19a7c@syzkaller.appspotmail.com
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/160697106089.3146288.2052422845039649176.stgit@devnote2
---
 arch/x86/boot/compressed/sev-es.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c
index 954cb2702e23..27826c265aab 100644
--- a/arch/x86/boot/compressed/sev-es.c
+++ b/arch/x86/boot/compressed/sev-es.c
@@ -32,13 +32,12 @@ struct ghcb *boot_ghcb;
  */
 static bool insn_has_rep_prefix(struct insn *insn)
 {
+	insn_byte_t p;
 	int i;
 
 	insn_get_prefixes(insn);
 
-	for (i = 0; i < insn->prefixes.nbytes; i++) {
-		insn_byte_t p = insn->prefixes.bytes[i];
-
+	for_each_insn_prefix(insn, i, p) {
 		if (p == 0xf2 || p == 0xf3)
 			return true;
 	}
-- 
cgit v1.2.3-70-g09d2


From e91d8d78237de8d7120c320b3645b7100848f24d Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Sat, 5 Dec 2020 22:14:51 -0800
Subject: mm/zsmalloc.c: drop ZSMALLOC_PGTABLE_MAPPING

While I was doing zram testing, I found sometimes decompression failed
since the compression buffer was corrupted.  With investigation, I found
below commit calls cond_resched unconditionally so it could make a
problem in atomic context if the task is reschedule.

  BUG: sleeping function called from invalid context at mm/vmalloc.c:108
  in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 946, name: memhog
  3 locks held by memhog/946:
   #0: ffff9d01d4b193e8 (&mm->mmap_lock#2){++++}-{4:4}, at: __mm_populate+0x103/0x160
   #1: ffffffffa3d53de0 (fs_reclaim){+.+.}-{0:0}, at: __alloc_pages_slowpath.constprop.0+0xa98/0x1160
   #2: ffff9d01d56b8110 (&zspage->lock){.+.+}-{3:3}, at: zs_map_object+0x8e/0x1f0
  CPU: 0 PID: 946 Comm: memhog Not tainted 5.9.3-00011-gc5bfc0287345-dirty #316
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
  Call Trace:
    unmap_kernel_range_noflush+0x2eb/0x350
    unmap_kernel_range+0x14/0x30
    zs_unmap_object+0xd5/0xe0
    zram_bvec_rw.isra.0+0x38c/0x8e0
    zram_rw_page+0x90/0x101
    bdev_write_page+0x92/0xe0
    __swap_writepage+0x94/0x4a0
    pageout+0xe3/0x3a0
    shrink_page_list+0xb94/0xd60
    shrink_inactive_list+0x158/0x460

We can fix this by removing the ZSMALLOC_PGTABLE_MAPPING feature (which
contains the offending calling code) from zsmalloc.

Even though this option showed some amount improvement(e.g., 30%) in
some arm32 platforms, it has been headache to maintain since it have
abused APIs[1](e.g., unmap_kernel_range in atomic context).

Since we are approaching to deprecate 32bit machines and already made
the config option available for only builtin build since v5.8, lastly it
has been not default option in zsmalloc, it's time to drop the option
for better maintenance.

[1] http://lore.kernel.org/linux-mm/20201105170249.387069-1-minchan@kernel.org

Fixes: e47110e90584 ("mm/vunmap: add cond_resched() in vunmap_pmd_range")
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Harish Sriram <harish@linux.ibm.com>
Cc: Uladzislau Rezki <urezki@gmail.com>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/20201117202916.GA3856507@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/configs/omap2plus_defconfig |  1 -
 include/linux/zsmalloc.h             |  1 -
 mm/Kconfig                           | 13 ---------
 mm/zsmalloc.c                        | 54 ------------------------------------
 4 files changed, 69 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 34793aabdb65..58df9fd79a76 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -81,7 +81,6 @@ CONFIG_PARTITION_ADVANCED=y
 CONFIG_BINFMT_MISC=y
 CONFIG_CMA=y
 CONFIG_ZSMALLOC=m
-CONFIG_ZSMALLOC_PGTABLE_MAPPING=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 0fdbf653b173..4807ca4d52e0 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -20,7 +20,6 @@
  * zsmalloc mapping modes
  *
  * NOTE: These only make a difference when a mapped object spans pages.
- * They also have no effect when ZSMALLOC_PGTABLE_MAPPING is selected.
  */
 enum zs_mapmode {
 	ZS_MM_RW, /* normal read-write mapping */
diff --git a/mm/Kconfig b/mm/Kconfig
index d42423f884a7..390165ffbb0f 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -707,19 +707,6 @@ config ZSMALLOC
 	  returned by an alloc().  This handle must be mapped in order to
 	  access the allocated space.
 
-config ZSMALLOC_PGTABLE_MAPPING
-	bool "Use page table mapping to access object in zsmalloc"
-	depends on ZSMALLOC=y
-	help
-	  By default, zsmalloc uses a copy-based object mapping method to
-	  access allocations that span two pages. However, if a particular
-	  architecture (ex, ARM) performs VM mapping faster than copying,
-	  then you should select this. This causes zsmalloc to use page table
-	  mapping rather than copying for object mapping.
-
-	  You can check speed with zsmalloc benchmark:
-	  https://github.com/spartacus06/zsmapbench
-
 config ZSMALLOC_STAT
 	bool "Export zsmalloc statistics"
 	depends on ZSMALLOC
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 918c7b019b3d..cdfaaadea8ff 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -293,11 +293,7 @@ struct zspage {
 };
 
 struct mapping_area {
-#ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING
-	struct vm_struct *vm; /* vm area for mapping object that span pages */
-#else
 	char *vm_buf; /* copy buffer for objects that span pages */
-#endif
 	char *vm_addr; /* address of kmap_atomic()'ed pages */
 	enum zs_mapmode vm_mm; /* mapping mode */
 };
@@ -1113,54 +1109,6 @@ static struct zspage *find_get_zspage(struct size_class *class)
 	return zspage;
 }
 
-#ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING
-static inline int __zs_cpu_up(struct mapping_area *area)
-{
-	/*
-	 * Make sure we don't leak memory if a cpu UP notification
-	 * and zs_init() race and both call zs_cpu_up() on the same cpu
-	 */
-	if (area->vm)
-		return 0;
-	area->vm = get_vm_area(PAGE_SIZE * 2, 0);
-	if (!area->vm)
-		return -ENOMEM;
-
-	/*
-	 * Populate ptes in advance to avoid pte allocation with GFP_KERNEL
-	 * in non-preemtible context of zs_map_object.
-	 */
-	return apply_to_page_range(&init_mm, (unsigned long)area->vm->addr,
-			PAGE_SIZE * 2, NULL, NULL);
-}
-
-static inline void __zs_cpu_down(struct mapping_area *area)
-{
-	if (area->vm)
-		free_vm_area(area->vm);
-	area->vm = NULL;
-}
-
-static inline void *__zs_map_object(struct mapping_area *area,
-				struct page *pages[2], int off, int size)
-{
-	unsigned long addr = (unsigned long)area->vm->addr;
-
-	BUG_ON(map_kernel_range(addr, PAGE_SIZE * 2, PAGE_KERNEL, pages) < 0);
-	area->vm_addr = area->vm->addr;
-	return area->vm_addr + off;
-}
-
-static inline void __zs_unmap_object(struct mapping_area *area,
-				struct page *pages[2], int off, int size)
-{
-	unsigned long addr = (unsigned long)area->vm_addr;
-
-	unmap_kernel_range(addr, PAGE_SIZE * 2);
-}
-
-#else /* CONFIG_ZSMALLOC_PGTABLE_MAPPING */
-
 static inline int __zs_cpu_up(struct mapping_area *area)
 {
 	/*
@@ -1241,8 +1189,6 @@ out:
 	pagefault_enable();
 }
 
-#endif /* CONFIG_ZSMALLOC_PGTABLE_MAPPING */
-
 static int zs_cpu_prepare(unsigned int cpu)
 {
 	struct mapping_area *area;
-- 
cgit v1.2.3-70-g09d2


From 9280f726097b436c8c907825131cd346d7eb0c0f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 4 Dec 2020 00:18:40 +0100
Subject: ARM: keystone: remove SECTION_SIZE_BITS/MAX_PHYSMEM_BITS

These definitions are evidently left over from the days when
sparsemem settings were platform specific. This was no longer
the case when the platform got merged.

There was no warning in the past, but now the asm/sparsemem.h
header ends up being included indirectly, causing this warning:

In file included from /git/arm-soc/arch/arm/mach-keystone/keystone.c:24:
arch/arm/mach-keystone/memory.h:10:9: warning: 'SECTION_SIZE_BITS' macro redefined [-Wmacro-redefined]
 #define SECTION_SIZE_BITS       34
        ^
arch/arm/include/asm/sparsemem.h:23:9: note: previous definition is here
 #define SECTION_SIZE_BITS       28
        ^

Clearly the definitions never had any effect here, so remove them.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20201203231847.1484900-1-arnd@kernel.org'
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-keystone/memory.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mach-keystone/memory.h b/arch/arm/mach-keystone/memory.h
index 9147565d0581..1b9ed1271e05 100644
--- a/arch/arm/mach-keystone/memory.h
+++ b/arch/arm/mach-keystone/memory.h
@@ -6,9 +6,6 @@
 #ifndef __MEMORY_H
 #define __MEMORY_H
 
-#define MAX_PHYSMEM_BITS	36
-#define SECTION_SIZE_BITS	34
-
 #define KEYSTONE_LOW_PHYS_START		0x80000000ULL
 #define KEYSTONE_LOW_PHYS_SIZE		0x80000000ULL /* 2G */
 #define KEYSTONE_LOW_PHYS_END		(KEYSTONE_LOW_PHYS_START + \
-- 
cgit v1.2.3-70-g09d2


From 5eedf9fe8db23313df104576845cec5f481b9b60 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 7 Dec 2020 16:58:01 +0000
Subject: powerpc/mm: Fix KUAP warning by providing
 copy_from_kernel_nofault_allowed()

Since commit c33165253492 ("powerpc: use non-set_fs based maccess
routines"), userspace access is not granted anymore when using
copy_from_kernel_nofault()

However, kthread_probe_data() uses copy_from_kernel_nofault()
to check validity of pointers. When the pointer is NULL,
it points to userspace, leading to a KUAP fault and triggering
the following big hammer warning many times when you request
a sysrq "show task":

[ 1117.202054] ------------[ cut here ]------------
[ 1117.202102] Bug: fault blocked by AP register !
[ 1117.202261] WARNING: CPU: 0 PID: 377 at arch/powerpc/include/asm/nohash/32/kup-8xx.h:66 do_page_fault+0x4a8/0x5ec
[ 1117.202310] Modules linked in:
[ 1117.202428] CPU: 0 PID: 377 Comm: sh Tainted: G        W         5.10.0-rc5-01340-g83f53be2de31-dirty #4175
[ 1117.202499] NIP:  c0012048 LR: c0012048 CTR: 00000000
[ 1117.202573] REGS: cacdbb88 TRAP: 0700   Tainted: G        W          (5.10.0-rc5-01340-g83f53be2de31-dirty)
[ 1117.202625] MSR:  00021032 <ME,IR,DR,RI>  CR: 24082222  XER: 20000000
[ 1117.202899]
[ 1117.202899] GPR00: c0012048 cacdbc40 c2929290 00000023 c092e554 00000001 c09865e8 c092e640
[ 1117.202899] GPR08: 00001032 00000000 00000000 00014efc 28082224 100d166a 100a0920 00000000
[ 1117.202899] GPR16: 100cac0c 100b0000 1080c3fc 1080d685 100d0000 100d0000 00000000 100a0900
[ 1117.202899] GPR24: 100d0000 c07892ec 00000000 c0921510 c21f4440 0000005c c0000000 cacdbc80
[ 1117.204362] NIP [c0012048] do_page_fault+0x4a8/0x5ec
[ 1117.204461] LR [c0012048] do_page_fault+0x4a8/0x5ec
[ 1117.204509] Call Trace:
[ 1117.204609] [cacdbc40] [c0012048] do_page_fault+0x4a8/0x5ec (unreliable)
[ 1117.204771] [cacdbc70] [c00112f0] handle_page_fault+0x8/0x34
[ 1117.204911] --- interrupt: 301 at copy_from_kernel_nofault+0x70/0x1c0
[ 1117.204979] NIP:  c010dbec LR: c010dbac CTR: 00000001
[ 1117.205053] REGS: cacdbc80 TRAP: 0301   Tainted: G        W          (5.10.0-rc5-01340-g83f53be2de31-dirty)
[ 1117.205104] MSR:  00009032 <EE,ME,IR,DR,RI>  CR: 28082224  XER: 00000000
[ 1117.205416] DAR: 0000005c DSISR: c0000000
[ 1117.205416] GPR00: c0045948 cacdbd38 c2929290 00000001 00000017 00000017 00000027 0000000f
[ 1117.205416] GPR08: c09926ec 00000000 00000000 3ffff000 24082224
[ 1117.206106] NIP [c010dbec] copy_from_kernel_nofault+0x70/0x1c0
[ 1117.206202] LR [c010dbac] copy_from_kernel_nofault+0x30/0x1c0
[ 1117.206258] --- interrupt: 301
[ 1117.206372] [cacdbd38] [c004bbb0] kthread_probe_data+0x44/0x70 (unreliable)
[ 1117.206561] [cacdbd58] [c0045948] print_worker_info+0xe0/0x194
[ 1117.206717] [cacdbdb8] [c00548ac] sched_show_task+0x134/0x168
[ 1117.206851] [cacdbdd8] [c005a268] show_state_filter+0x70/0x100
[ 1117.206989] [cacdbe08] [c039baa0] sysrq_handle_showstate+0x14/0x24
[ 1117.207122] [cacdbe18] [c039bf18] __handle_sysrq+0xac/0x1d0
[ 1117.207257] [cacdbe48] [c039c0c0] write_sysrq_trigger+0x4c/0x74
[ 1117.207407] [cacdbe68] [c01fba48] proc_reg_write+0xb4/0x114
[ 1117.207550] [cacdbe88] [c0179968] vfs_write+0x12c/0x478
[ 1117.207686] [cacdbf08] [c0179e60] ksys_write+0x78/0x128
[ 1117.207826] [cacdbf38] [c00110d0] ret_from_syscall+0x0/0x34
[ 1117.207938] --- interrupt: c01 at 0xfd4e784
[ 1117.208008] NIP:  0fd4e784 LR: 0fe0f244 CTR: 10048d38
[ 1117.208083] REGS: cacdbf48 TRAP: 0c01   Tainted: G        W          (5.10.0-rc5-01340-g83f53be2de31-dirty)
[ 1117.208134] MSR:  0000d032 <EE,PR,ME,IR,DR,RI>  CR: 44002222  XER: 00000000
[ 1117.208470]
[ 1117.208470] GPR00: 00000004 7fc34090 77bfb4e0 00000001 1080fa40 00000002 7400000f fefefeff
[ 1117.208470] GPR08: 7f7f7f7f 10048d38 1080c414 7fc343c0 00000000
[ 1117.209104] NIP [0fd4e784] 0xfd4e784
[ 1117.209180] LR [0fe0f244] 0xfe0f244
[ 1117.209236] --- interrupt: c01
[ 1117.209274] Instruction dump:
[ 1117.209353] 714a4000 418200f0 73ca0001 40820084 73ca0032 408200f8 73c90040 4082ff60
[ 1117.209727] 0fe00000 3c60c082 386399f4 48013b65 <0fe00000> 80010034 3860000b 7c0803a6
[ 1117.210102] ---[ end trace 1927c0323393af3e ]---

To avoid that, copy_from_kernel_nofault_allowed() is used to check
whether the address is a valid kernel address. But the default
version of it returns true for any address.

Provide a powerpc version of copy_from_kernel_nofault_allowed()
that returns false when the address is below TASK_USER_MAX,
so that copy_from_kernel_nofault() will return -ERANGE.

Fixes: c33165253492 ("powerpc: use non-set_fs based maccess routines")
Reported-by: Qian Cai <qcai@redhat.com>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/18bcb456d32a3e74f5ae241fd6f1580c092d07f5.1607360230.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/mm/Makefile  | 2 +-
 arch/powerpc/mm/maccess.c | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/mm/maccess.c

(limited to 'arch')

diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 5e147986400d..55b4a8bd408a 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -5,7 +5,7 @@
 
 ccflags-$(CONFIG_PPC64)	:= $(NO_MINIMAL_TOC)
 
-obj-y				:= fault.o mem.o pgtable.o mmap.o \
+obj-y				:= fault.o mem.o pgtable.o mmap.o maccess.o \
 				   init_$(BITS).o pgtable_$(BITS).o \
 				   pgtable-frag.o ioremap.o ioremap_$(BITS).o \
 				   init-common.o mmu_context.o drmem.o
diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c
new file mode 100644
index 000000000000..fa9a7a718fc6
--- /dev/null
+++ b/arch/powerpc/mm/maccess.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+
+bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
+{
+	return is_kernel_addr((unsigned long)unsafe_src);
+}
-- 
cgit v1.2.3-70-g09d2


From 6220e48d9640538ff700f2e7d24c2f9277555fd6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 8 Dec 2020 16:37:47 -0500
Subject: [regression fix] really dumb fuckup in sparc64 __csum_partial_copy()
 changes

~0U is -1, not 1

Reported-by: Anatoly Pugachev <matorola@gmail.com>
Tested-by: Anatoly Pugachev <matorola@gmail.com>
Fixes: fdf8bee96f9a "sparc64: propagate the calling convention changes down to __csum_partial_copy_...()"
X-brown-paperbag: yes
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/sparc/lib/csum_copy.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/sparc/lib/csum_copy.S b/arch/sparc/lib/csum_copy.S
index 0c0268e77155..d839956407a7 100644
--- a/arch/sparc/lib/csum_copy.S
+++ b/arch/sparc/lib/csum_copy.S
@@ -71,7 +71,7 @@
 FUNC_NAME:		/* %o0=src, %o1=dst, %o2=len */
 	LOAD(prefetch, %o0 + 0x000, #n_reads)
 	xor		%o0, %o1, %g1
-	mov		1, %o3
+	mov		-1, %o3
 	clr		%o4
 	andcc		%g1, 0x3, %g0
 	bne,pn		%icc, 95f
-- 
cgit v1.2.3-70-g09d2


From a493d1ca1a03b532871f1da27f8dbda2b28b04c4 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Thu, 3 Dec 2020 21:07:03 -0800
Subject: x86/membarrier: Get rid of a dubious optimization

sync_core_before_usermode() had an incorrect optimization.  If the kernel
returns from an interrupt, it can get to usermode without IRET. It just has
to schedule to a different task in the same mm and do SYSRET.  Fortunately,
there were no callers of sync_core_before_usermode() that could have had
in_irq() or in_nmi() equal to true, because it's only ever called from the
scheduler.

While at it, clarify a related comment.

Fixes: 70216e18e519 ("membarrier: Provide core serializing command, *_SYNC_CORE")
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/5afc7632be1422f91eaf7611aaaa1b5b8580a086.1607058304.git.luto@kernel.org
---
 arch/x86/include/asm/sync_core.h |  9 +++++----
 arch/x86/mm/tlb.c                | 10 ++++++++--
 2 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h
index 0fd4a9dfb29c..ab7382f92aff 100644
--- a/arch/x86/include/asm/sync_core.h
+++ b/arch/x86/include/asm/sync_core.h
@@ -98,12 +98,13 @@ static inline void sync_core_before_usermode(void)
 	/* With PTI, we unconditionally serialize before running user code. */
 	if (static_cpu_has(X86_FEATURE_PTI))
 		return;
+
 	/*
-	 * Return from interrupt and NMI is done through iret, which is core
-	 * serializing.
+	 * Even if we're in an interrupt, we might reschedule before returning,
+	 * in which case we could switch to a different thread in the same mm
+	 * and return using SYSRET or SYSEXIT.  Instead of trying to keep
+	 * track of our need to sync the core, just sync right away.
 	 */
-	if (in_irq() || in_nmi())
-		return;
 	sync_core();
 }
 
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 11666ba19b62..569ac1d57f55 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -474,8 +474,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	/*
 	 * The membarrier system call requires a full memory barrier and
 	 * core serialization before returning to user-space, after
-	 * storing to rq->curr. Writing to CR3 provides that full
-	 * memory barrier and core serializing instruction.
+	 * storing to rq->curr, when changing mm.  This is because
+	 * membarrier() sends IPIs to all CPUs that are in the target mm
+	 * to make them issue memory barriers.  However, if another CPU
+	 * switches to/from the target mm concurrently with
+	 * membarrier(), it can cause that CPU not to receive an IPI
+	 * when it really should issue a memory barrier.  Writing to CR3
+	 * provides that full memory barrier and core serializing
+	 * instruction.
 	 */
 	if (real_prev == next) {
 		VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
-- 
cgit v1.2.3-70-g09d2


From 387270cb0b4035491c4812effd8b5af0e385a66c Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Mon, 7 Dec 2020 16:47:52 +0800
Subject: ARM: dts: mmp2-olpc-xo-1-75: clear the warnings when make dtbs

The check_spi_bus_bridge() in scripts/dtc/checks.c requires that the node
have "spi-slave" property must with "#address-cells = <0>" and
"#size-cells = <0>". But currently both "#address-cells" and "#size-cells"
properties are deleted, the corresponding default values are 2 and 1. As a
result, the check fails and below warnings is displayed.

arch/arm/boot/dts/mmp2.dtsi:472.23-480.6: Warning (spi_bus_bridge): \
/soc/apb@d4000000/spi@d4037000: incorrect #address-cells for SPI bus
  also defined at arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts:225.7-237.3
arch/arm/boot/dts/mmp2.dtsi:472.23-480.6: Warning (spi_bus_bridge): \
/soc/apb@d4000000/spi@d4037000: incorrect #size-cells for SPI bus
  also defined at arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts:225.7-237.3
arch/arm/boot/dts/mmp2-olpc-xo-1-75.dtb: Warning (spi_bus_reg): \
Failed prerequisite 'spi_bus_bridge'

Because the value of "#size-cells" is already defined as zero in the node
"ssp3: spi@d4037000" in arch/arm/boot/dts/mmp2.dtsi. So we only need to
explicitly add "#address-cells = <0>" and keep "#size-cells" no change.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Link: https://lore.kernel.org/r/20201207084752.1665-2-thunder.leizhen@huawei.com'
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts b/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts
index adde62d6fce7..342304f5653a 100644
--- a/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts
+++ b/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts
@@ -223,8 +223,7 @@
 };
 
 &ssp3 {
-	/delete-property/ #address-cells;
-	/delete-property/ #size-cells;
+	#address-cells = <0>;
 	spi-slave;
 	status = "okay";
 	ready-gpios = <&gpio 125 GPIO_ACTIVE_HIGH>;
-- 
cgit v1.2.3-70-g09d2


From 29ac40cbed2bc06fa218ca25d7f5e280d3d08a25 Mon Sep 17 00:00:00 2001
From: Arvind Sankar <nivedita@alum.mit.edu>
Date: Wed, 11 Nov 2020 11:09:45 -0500
Subject: x86/mm/mem_encrypt: Fix definition of PMD_FLAGS_DEC_WP

The PAT bit is in different locations for 4k and 2M/1G page table
entries.

Add a definition for _PAGE_LARGE_CACHE_MASK to represent the three
caching bits (PWT, PCD, PAT), similar to _PAGE_CACHE_MASK for 4k pages,
and use it in the definition of PMD_FLAGS_DEC_WP to get the correct PAT
index for write-protected pages.

Fixes: 6ebcb060713f ("x86/mm: Add support to encrypt the kernel in-place")
Signed-off-by: Arvind Sankar <nivedita@alum.mit.edu>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Tom Lendacky <thomas.lendacky@amd.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20201111160946.147341-1-nivedita@alum.mit.edu
---
 arch/x86/include/asm/pgtable_types.h | 1 +
 arch/x86/mm/mem_encrypt_identity.c   | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 816b31c68550..394757ee030a 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -155,6 +155,7 @@ enum page_cache_mode {
 #define _PAGE_ENC		(_AT(pteval_t, sme_me_mask))
 
 #define _PAGE_CACHE_MASK	(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)
+#define _PAGE_LARGE_CACHE_MASK	(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT_LARGE)
 
 #define _PAGE_NOCACHE		(cachemode2protval(_PAGE_CACHE_MODE_UC))
 #define _PAGE_CACHE_WP		(cachemode2protval(_PAGE_CACHE_MODE_WP))
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 733b983f3a89..6c5eb6f3f14f 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -45,8 +45,8 @@
 #define PMD_FLAGS_LARGE		(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
 
 #define PMD_FLAGS_DEC		PMD_FLAGS_LARGE
-#define PMD_FLAGS_DEC_WP	((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
-				 (_PAGE_PAT | _PAGE_PWT))
+#define PMD_FLAGS_DEC_WP	((PMD_FLAGS_DEC & ~_PAGE_LARGE_CACHE_MASK) | \
+				 (_PAGE_PAT_LARGE | _PAGE_PWT))
 
 #define PMD_FLAGS_ENC		(PMD_FLAGS_LARGE | _PAGE_ENC)
 
-- 
cgit v1.2.3-70-g09d2


From 06c5fe9b12dde1b62821f302f177c972bb1c81f9 Mon Sep 17 00:00:00 2001
From: Xiaochen Shen <xiaochen.shen@intel.com>
Date: Fri, 4 Dec 2020 14:27:59 +0800
Subject: x86/resctrl: Fix incorrect local bandwidth when mba_sc is enabled

The MBA software controller (mba_sc) is a feedback loop which
periodically reads MBM counters and tries to restrict the bandwidth
below a user-specified value. It tags along the MBM counter overflow
handler to do the updates with 1s interval in mbm_update() and
update_mba_bw().

The purpose of mbm_update() is to periodically read the MBM counters to
make sure that the hardware counter doesn't wrap around more than once
between user samplings. mbm_update() calls __mon_event_count() for local
bandwidth updating when mba_sc is not enabled, but calls mbm_bw_count()
instead when mba_sc is enabled. __mon_event_count() will not be called
for local bandwidth updating in MBM counter overflow handler, but it is
still called when reading MBM local bandwidth counter file
'mbm_local_bytes', the call path is as below:

  rdtgroup_mondata_show()
    mon_event_read()
      mon_event_count()
        __mon_event_count()

In __mon_event_count(), m->chunks is updated by delta chunks which is
calculated from previous MSR value (m->prev_msr) and current MSR value.
When mba_sc is enabled, m->chunks is also updated in mbm_update() by
mistake by the delta chunks which is calculated from m->prev_bw_msr
instead of m->prev_msr. But m->chunks is not used in update_mba_bw() in
the mba_sc feedback loop.

When reading MBM local bandwidth counter file, m->chunks was changed
unexpectedly by mbm_bw_count(). As a result, the incorrect local
bandwidth counter which calculated from incorrect m->chunks is shown to
the user.

Fix this by removing incorrect m->chunks updating in mbm_bw_count() in
MBM counter overflow handler, and always calling __mon_event_count() in
mbm_update() to make sure that the hardware local bandwidth counter
doesn't wrap around.

Test steps:
  # Run workload with aggressive memory bandwidth (e.g., 10 GB/s)
  git clone https://github.com/intel/intel-cmt-cat && cd intel-cmt-cat
  && make
  ./tools/membw/membw -c 0 -b 10000 --read

  # Enable MBA software controller
  mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl

  # Create control group c1
  mkdir /sys/fs/resctrl/c1

  # Set MB throttle to 6 GB/s
  echo "MB:0=6000;1=6000" > /sys/fs/resctrl/c1/schemata

  # Write PID of the workload to tasks file
  echo `pidof membw` > /sys/fs/resctrl/c1/tasks

  # Read local bytes counters twice with 1s interval, the calculated
  # local bandwidth is not as expected (approaching to 6 GB/s):
  local_1=`cat /sys/fs/resctrl/c1/mon_data/mon_L3_00/mbm_local_bytes`
  sleep 1
  local_2=`cat /sys/fs/resctrl/c1/mon_data/mon_L3_00/mbm_local_bytes`
  echo "local b/w (bytes/s):" `expr $local_2 - $local_1`

Before fix:
  local b/w (bytes/s): 11076796416

After fix:
  local b/w (bytes/s): 5465014272

Fixes: ba0f26d8529c (x86/intel_rdt/mba_sc: Prepare for feedback loop)
Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/1607063279-19437-1-git-send-email-xiaochen.shen@intel.com
---
 arch/x86/kernel/cpu/resctrl/monitor.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 54dffe574e67..a98519a3a2e6 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -279,7 +279,6 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
 		return;
 
 	chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width);
-	m->chunks += chunks;
 	cur_bw = (chunks * r->mon_scale) >> 20;
 
 	if (m->delta_comp)
@@ -450,15 +449,14 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
 	}
 	if (is_mbm_local_enabled()) {
 		rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
+		__mon_event_count(rmid, &rr);
 
 		/*
 		 * Call the MBA software controller only for the
 		 * control groups and when user has enabled
 		 * the software controller explicitly.
 		 */
-		if (!is_mba_sc(NULL))
-			__mon_event_count(rmid, &rr);
-		else
+		if (is_mba_sc(NULL))
 			mbm_bw_count(rmid, &rr);
 	}
 }
-- 
cgit v1.2.3-70-g09d2


From 190113b4c6531c8e09b31d5235f9b5175cbb0f72 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 10 Dec 2020 21:18:22 +0100
Subject: x86/apic/vector: Fix ordering in vector assignment

Prarit reported that depending on the affinity setting the

 ' irq $N: Affinity broken due to vector space exhaustion.'

message is showing up in dmesg, but the vector space on the CPUs in the
affinity mask is definitely not exhausted.

Shung-Hsi provided traces and analysis which pinpoints the problem:

The ordering of trying to assign an interrupt vector in
assign_irq_vector_any_locked() is simply wrong if the interrupt data has a
valid node assigned. It does:

 1) Try the intersection of affinity mask and node mask
 2) Try the node mask
 3) Try the full affinity mask
 4) Try the full online mask

Obviously #2 and #3 are in the wrong order as the requested affinity
mask has to take precedence.

In the observed cases #1 failed because the affinity mask did not contain
CPUs from node 0. That made it allocate a vector from node 0, thereby
breaking affinity and emitting the misleading message.

Revert the order of #2 and #3 so the full affinity mask without the node
intersection is tried before actually affinity is broken.

If no node is assigned then only the full affinity mask and if that fails
the full online mask is tried.

Fixes: d6ffc6ac83b1 ("x86/vector: Respect affinity mask in irq descriptor")
Reported-by: Prarit Bhargava <prarit@redhat.com>
Reported-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/87ft4djtyp.fsf@nanos.tec.linutronix.de
---
 arch/x86/kernel/apic/vector.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 1eac53632786..758bbf25ef74 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -273,20 +273,24 @@ static int assign_irq_vector_any_locked(struct irq_data *irqd)
 	const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
 	int node = irq_data_get_node(irqd);
 
-	if (node == NUMA_NO_NODE)
-		goto all;
-	/* Try the intersection of @affmsk and node mask */
-	cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk);
-	if (!assign_vector_locked(irqd, vector_searchmask))
-		return 0;
-	/* Try the node mask */
-	if (!assign_vector_locked(irqd, cpumask_of_node(node)))
-		return 0;
-all:
+	if (node != NUMA_NO_NODE) {
+		/* Try the intersection of @affmsk and node mask */
+		cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk);
+		if (!assign_vector_locked(irqd, vector_searchmask))
+			return 0;
+	}
+
 	/* Try the full affinity mask */
 	cpumask_and(vector_searchmask, affmsk, cpu_online_mask);
 	if (!assign_vector_locked(irqd, vector_searchmask))
 		return 0;
+
+	if (node != NUMA_NO_NODE) {
+		/* Try the node mask */
+		if (!assign_vector_locked(irqd, cpumask_of_node(node)))
+			return 0;
+	}
+
 	/* Try the full online mask */
 	return assign_vector_locked(irqd, cpu_online_mask);
 }
-- 
cgit v1.2.3-70-g09d2


From ccbbfd1cbf365b38d014351d1482fedd26282041 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmerdabbelt@google.com>
Date: Wed, 25 Nov 2020 11:57:03 -0800
Subject: RISC-V: Define get_cycles64() regardless of M-mode

The timer driver uses get_cycles64() unconditionally to obtain the current
time.  A recent refactoring lost the common definition for some configs, which
is now the only one we need.

Fixes: d5be89a8d118 ("RISC-V: Resurrect the MMIO timer implementation for M-mode systems")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/include/asm/timex.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
index ab104905d4db..81de51e6aa32 100644
--- a/arch/riscv/include/asm/timex.h
+++ b/arch/riscv/include/asm/timex.h
@@ -60,6 +60,8 @@ static inline u32 get_cycles_hi(void)
 }
 #define get_cycles_hi get_cycles_hi
 
+#endif /* !CONFIG_RISCV_M_MODE */
+
 #ifdef CONFIG_64BIT
 static inline u64 get_cycles64(void)
 {
@@ -79,8 +81,6 @@ static inline u64 get_cycles64(void)
 }
 #endif /* CONFIG_64BIT */
 
-#endif /* !CONFIG_RISCV_M_MODE */
-
 #define ARCH_HAS_READ_CURRENT_TIMER
 static inline int read_current_timer(unsigned long *timer_val)
 {
-- 
cgit v1.2.3-70-g09d2


From 34c0f6f2695a2db81e09a3ab7bdb2853f45d4d3d Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
Date: Sat, 5 Dec 2020 01:48:08 +0100
Subject: KVM: mmu: Fix SPTE encoding of MMIO generation upper half

Commit cae7ed3c2cb0 ("KVM: x86: Refactor the MMIO SPTE generation handling")
cleaned up the computation of MMIO generation SPTE masks, however it
introduced a bug how the upper part was encoded:
SPTE bits 52-61 were supposed to contain bits 10-19 of the current
generation number, however a missing shift encoded bits 1-10 there instead
(mostly duplicating the lower part of the encoded generation number that
then consisted of bits 1-9).

In the meantime, the upper part was shrunk by one bit and moved by
subsequent commits to become an upper half of the encoded generation number
(bits 9-17 of bits 0-17 encoded in a SPTE).

In addition to the above, commit 56871d444bc4 ("KVM: x86: fix overlap between SPTE_MMIO_MASK and generation")
has changed the SPTE bit range assigned to encode the generation number and
the total number of bits encoded but did not update them in the comment
attached to their defines, nor in the KVM MMU doc.
Let's do it here, too, since it is too trivial thing to warrant a separate
commit.

Fixes: cae7ed3c2cb0 ("KVM: x86: Refactor the MMIO SPTE generation handling")
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Message-Id: <156700708db2a5296c5ed7a8b9ac71f1e9765c85.1607129096.git.maciej.szmigiero@oracle.com>
Cc: stable@vger.kernel.org
[Reorganize macros so that everything is computed from the bit ranges. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/mmu.rst |  2 +-
 arch/x86/kvm/mmu/spte.c        |  4 ++--
 arch/x86/kvm/mmu/spte.h        | 25 ++++++++++++++++++-------
 3 files changed, 21 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/Documentation/virt/kvm/mmu.rst b/Documentation/virt/kvm/mmu.rst
index 1c030dbac7c4..5bfe28b0728e 100644
--- a/Documentation/virt/kvm/mmu.rst
+++ b/Documentation/virt/kvm/mmu.rst
@@ -455,7 +455,7 @@ If the generation number of the spte does not equal the global generation
 number, it will ignore the cached MMIO information and handle the page
 fault through the slow path.
 
-Since only 19 bits are used to store generation-number on mmio spte, all
+Since only 18 bits are used to store generation-number on mmio spte, all
 pages are zapped when there is an overflow.
 
 Unfortunately, a single memory access might access kvm_memslots(kvm) multiple
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index fcac2cac78fe..c51ad544f25b 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -40,8 +40,8 @@ static u64 generation_mmio_spte_mask(u64 gen)
 	WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
 	BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK);
 
-	mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK;
-	mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK;
+	mask = (gen << MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_SPTE_GEN_LOW_MASK;
+	mask |= (gen << MMIO_SPTE_GEN_HIGH_SHIFT) & MMIO_SPTE_GEN_HIGH_MASK;
 	return mask;
 }
 
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 5c75a451c000..2b3a30bd38b0 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -56,11 +56,11 @@
 #define SPTE_MMU_WRITEABLE	(1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
 
 /*
- * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of
+ * Due to limited space in PTEs, the MMIO generation is a 18 bit subset of
  * the memslots generation and is derived as follows:
  *
  * Bits 0-8 of the MMIO generation are propagated to spte bits 3-11
- * Bits 9-18 of the MMIO generation are propagated to spte bits 52-61
+ * Bits 9-17 of the MMIO generation are propagated to spte bits 54-62
  *
  * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in
  * the MMIO generation number, as doing so would require stealing a bit from
@@ -69,18 +69,29 @@
  * requires a full MMU zap).  The flag is instead explicitly queried when
  * checking for MMIO spte cache hits.
  */
-#define MMIO_SPTE_GEN_MASK		GENMASK_ULL(17, 0)
 
 #define MMIO_SPTE_GEN_LOW_START		3
 #define MMIO_SPTE_GEN_LOW_END		11
-#define MMIO_SPTE_GEN_LOW_MASK		GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
-						    MMIO_SPTE_GEN_LOW_START)
 
 #define MMIO_SPTE_GEN_HIGH_START	PT64_SECOND_AVAIL_BITS_SHIFT
 #define MMIO_SPTE_GEN_HIGH_END		62
+
+#define MMIO_SPTE_GEN_LOW_MASK		GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
+						    MMIO_SPTE_GEN_LOW_START)
 #define MMIO_SPTE_GEN_HIGH_MASK		GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \
 						    MMIO_SPTE_GEN_HIGH_START)
 
+#define MMIO_SPTE_GEN_LOW_BITS		(MMIO_SPTE_GEN_LOW_END - MMIO_SPTE_GEN_LOW_START + 1)
+#define MMIO_SPTE_GEN_HIGH_BITS		(MMIO_SPTE_GEN_HIGH_END - MMIO_SPTE_GEN_HIGH_START + 1)
+
+/* remember to adjust the comment above as well if you change these */
+static_assert(MMIO_SPTE_GEN_LOW_BITS == 9 && MMIO_SPTE_GEN_HIGH_BITS == 9);
+
+#define MMIO_SPTE_GEN_LOW_SHIFT		(MMIO_SPTE_GEN_LOW_START - 0)
+#define MMIO_SPTE_GEN_HIGH_SHIFT	(MMIO_SPTE_GEN_HIGH_START - MMIO_SPTE_GEN_LOW_BITS)
+
+#define MMIO_SPTE_GEN_MASK		GENMASK_ULL(MMIO_SPTE_GEN_LOW_BITS + MMIO_SPTE_GEN_HIGH_BITS - 1, 0)
+
 extern u64 __read_mostly shadow_nx_mask;
 extern u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
 extern u64 __read_mostly shadow_user_mask;
@@ -228,8 +239,8 @@ static inline u64 get_mmio_spte_generation(u64 spte)
 {
 	u64 gen;
 
-	gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START;
-	gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START;
+	gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_SHIFT;
+	gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_SHIFT;
 	return gen;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 0d07c0ec4381f630c801539c79ad8dcc627f6e4a Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 11 Dec 2020 16:04:17 +0900
Subject: x86/kprobes: Fix optprobe to detect INT3 padding correctly

Commit

  7705dc855797 ("x86/vmlinux: Use INT3 instead of NOP for linker fill bytes")

changed the padding bytes between functions from NOP to INT3. However,
when optprobe decodes a target function it finds INT3 and gives up the
jump optimization.

Instead of giving up any INT3 detection, check whether the rest of the
bytes to the end of the function are INT3. If all of them are INT3,
those come from the linker. In that case, continue the optprobe jump
optimization.

 [ bp: Massage commit message. ]

Fixes: 7705dc855797 ("x86/vmlinux: Use INT3 instead of NOP for linker fill bytes")
Reported-by: Adam Zabrocki <pi3@pi3.com.pl>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/160767025681.3880685.16021570341428835411.stgit@devnote2
---
 arch/x86/kernel/kprobes/opt.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 041f0b50bc27..08eb23074f92 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -272,6 +272,19 @@ static int insn_is_indirect_jump(struct insn *insn)
 	return ret;
 }
 
+static bool is_padding_int3(unsigned long addr, unsigned long eaddr)
+{
+	unsigned char ops;
+
+	for (; addr < eaddr; addr++) {
+		if (get_kernel_nofault(ops, (void *)addr) < 0 ||
+		    ops != INT3_INSN_OPCODE)
+			return false;
+	}
+
+	return true;
+}
+
 /* Decode whole function to ensure any instructions don't jump into target */
 static int can_optimize(unsigned long paddr)
 {
@@ -310,9 +323,14 @@ static int can_optimize(unsigned long paddr)
 			return 0;
 		kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
 		insn_get_length(&insn);
-		/* Another subsystem puts a breakpoint */
+		/*
+		 * In the case of detecting unknown breakpoint, this could be
+		 * a padding INT3 between functions. Let's check that all the
+		 * rest of the bytes are also INT3.
+		 */
 		if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
-			return 0;
+			return is_padding_int3(addr, paddr - offset + size) ? 1 : 0;
+
 		/* Recover address */
 		insn.kaddr = (void *)addr;
 		insn.next_byte = (void *)(addr + insn.length);
-- 
cgit v1.2.3-70-g09d2


From 2aa078932ff6c66bf10cc5b3144440dbfa7d813d Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 17 Dec 2020 16:31:36 -0800
Subject: KVM: x86/mmu: Use -1 to flag an undefined spte in get_mmio_spte()

Return -1 from the get_walk() helpers if the shadow walk doesn't fill at
least one spte, which can theoretically happen if the walk hits a
not-present PDPTR.  Returning the root level in such a case will cause
get_mmio_spte() to return garbage (uninitialized stack data).  In
practice, such a scenario should be impossible as KVM shouldn't get a
reserved-bit page fault with a not-present PDPTR.

Note, using mmu->root_level in get_walk() is wrong for other reasons,
too, but that's now a moot point.

Fixes: 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU")
Cc: Ben Gardon <bgardon@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20201218003139.2167891-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c     | 7 ++++++-
 arch/x86/kvm/mmu/tdp_mmu.c | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 7a6ae9e90bd7..a48cd12c01d7 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3488,7 +3488,7 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
 {
 	struct kvm_shadow_walk_iterator iterator;
-	int leaf = vcpu->arch.mmu->root_level;
+	int leaf = -1;
 	u64 spte;
 
 
@@ -3532,6 +3532,11 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 	else
 		leaf = get_walk(vcpu, addr, sptes);
 
+	if (unlikely(leaf < 0)) {
+		*sptep = 0ull;
+		return reserved;
+	}
+
 	rsvd_check = &vcpu->arch.mmu->shadow_zero_check;
 
 	for (level = root; level >= leaf; level--) {
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 84c8f06bec26..50cec7a15ddb 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1152,8 +1152,8 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
 {
 	struct tdp_iter iter;
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
-	int leaf = vcpu->arch.mmu->shadow_root_level;
 	gfn_t gfn = addr >> PAGE_SHIFT;
+	int leaf = -1;
 
 	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
 		leaf = iter.level;
-- 
cgit v1.2.3-70-g09d2


From 39b4d43e6003cee51cd119596d3c33d0449eb44c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 17 Dec 2020 16:31:37 -0800
Subject: KVM: x86/mmu: Get root level from walkers when retrieving MMIO SPTE

Get the so called "root" level from the low level shadow page table
walkers instead of manually attempting to calculate it higher up the
stack, e.g. in get_mmio_spte().  When KVM is using PAE shadow paging,
the starting level of the walk, from the callers perspective, is not
the CR3 root but rather the PDPTR "root".  Checking for reserved bits
from the CR3 root causes get_mmio_spte() to consume uninitialized stack
data due to indexing into sptes[] for a level that was not filled by
get_walk().  This can result in false positives and/or negatives
depending on what garbage happens to be on the stack.

Opportunistically nuke a few extra newlines.

Fixes: 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU")
Reported-by: Richard Herbert <rherbert@sympatico.ca>
Cc: Ben Gardon <bgardon@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20201218003139.2167891-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c     | 15 ++++++---------
 arch/x86/kvm/mmu/tdp_mmu.c |  5 ++++-
 arch/x86/kvm/mmu/tdp_mmu.h |  4 +++-
 3 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index a48cd12c01d7..52f36c879086 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3485,16 +3485,16 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
  * Return the level of the lowest level SPTE added to sptes.
  * That SPTE may be non-present.
  */
-static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
+static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level)
 {
 	struct kvm_shadow_walk_iterator iterator;
 	int leaf = -1;
 	u64 spte;
 
-
 	walk_shadow_page_lockless_begin(vcpu);
 
-	for (shadow_walk_init(&iterator, vcpu, addr);
+	for (shadow_walk_init(&iterator, vcpu, addr),
+	     *root_level = iterator.level;
 	     shadow_walk_okay(&iterator);
 	     __shadow_walk_next(&iterator, spte)) {
 		leaf = iterator.level;
@@ -3504,7 +3504,6 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
 
 		if (!is_shadow_present_pte(spte))
 			break;
-
 	}
 
 	walk_shadow_page_lockless_end(vcpu);
@@ -3517,9 +3516,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 {
 	u64 sptes[PT64_ROOT_MAX_LEVEL];
 	struct rsvd_bits_validate *rsvd_check;
-	int root = vcpu->arch.mmu->shadow_root_level;
-	int leaf;
-	int level;
+	int root, leaf, level;
 	bool reserved = false;
 
 	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) {
@@ -3528,9 +3525,9 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 	}
 
 	if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
-		leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes);
+		leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
 	else
-		leaf = get_walk(vcpu, addr, sptes);
+		leaf = get_walk(vcpu, addr, sptes, &root);
 
 	if (unlikely(leaf < 0)) {
 		*sptep = 0ull;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 50cec7a15ddb..a4f9447f8327 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1148,13 +1148,16 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
  * Return the level of the lowest level SPTE added to sptes.
  * That SPTE may be non-present.
  */
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+			 int *root_level)
 {
 	struct tdp_iter iter;
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
 	gfn_t gfn = addr >> PAGE_SHIFT;
 	int leaf = -1;
 
+	*root_level = vcpu->arch.mmu->shadow_root_level;
+
 	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
 		leaf = iter.level;
 		sptes[leaf - 1] = iter.old_spte;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 556e065503f6..cbbdbadd1526 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -44,5 +44,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
 bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
 				   struct kvm_memory_slot *slot, gfn_t gfn);
 
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes);
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+			 int *root_level);
+
 #endif /* __KVM_X86_MMU_TDP_MMU_H */
-- 
cgit v1.2.3-70-g09d2


From dde81f9477d018a96fba991c5928c6ab8cc109f8 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 17 Dec 2020 16:31:38 -0800
Subject: KVM: x86/mmu: Use raw level to index into MMIO walks' sptes array

Bump the size of the sptes array by one and use the raw level of the
SPTE to index into the sptes array.  Using the SPTE level directly
improves readability by eliminating the need to reason out why the level
is being adjusted when indexing the array.  The array is on the stack
and is not explicitly initialized; bumping its size is nothing more than
a superficial adjustment to the stack frame.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20201218003139.2167891-4-seanjc@google.com>
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c     | 15 +++++++--------
 arch/x86/kvm/mmu/tdp_mmu.c |  2 +-
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 52f36c879086..4798a4472066 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3500,7 +3500,7 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
 		leaf = iterator.level;
 		spte = mmu_spte_get_lockless(iterator.sptep);
 
-		sptes[leaf - 1] = spte;
+		sptes[leaf] = spte;
 
 		if (!is_shadow_present_pte(spte))
 			break;
@@ -3514,7 +3514,7 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
 /* return true if reserved bit is detected on spte. */
 static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 {
-	u64 sptes[PT64_ROOT_MAX_LEVEL];
+	u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
 	struct rsvd_bits_validate *rsvd_check;
 	int root, leaf, level;
 	bool reserved = false;
@@ -3537,16 +3537,15 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 	rsvd_check = &vcpu->arch.mmu->shadow_zero_check;
 
 	for (level = root; level >= leaf; level--) {
-		if (!is_shadow_present_pte(sptes[level - 1]))
+		if (!is_shadow_present_pte(sptes[level]))
 			break;
 		/*
 		 * Use a bitwise-OR instead of a logical-OR to aggregate the
 		 * reserved bit and EPT's invalid memtype/XWR checks to avoid
 		 * adding a Jcc in the loop.
 		 */
-		reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level - 1]) |
-			    __is_rsvd_bits_set(rsvd_check, sptes[level - 1],
-					       level);
+		reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level]) |
+			    __is_rsvd_bits_set(rsvd_check, sptes[level], level);
 	}
 
 	if (reserved) {
@@ -3554,10 +3553,10 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 		       __func__, addr);
 		for (level = root; level >= leaf; level--)
 			pr_err("------ spte 0x%llx level %d.\n",
-			       sptes[level - 1], level);
+			       sptes[level], level);
 	}
 
-	*sptep = sptes[leaf - 1];
+	*sptep = sptes[leaf];
 
 	return reserved;
 }
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index a4f9447f8327..efef571806ad 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1160,7 +1160,7 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 
 	tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
 		leaf = iter.level;
-		sptes[leaf - 1] = iter.old_spte;
+		sptes[leaf] = iter.old_spte;
 	}
 
 	return leaf;
-- 
cgit v1.2.3-70-g09d2


From 9aa418792f5f11ef5d6f72265e1f8ae07efd5784 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 17 Dec 2020 16:31:39 -0800
Subject: KVM: x86/mmu: Optimize not-present/MMIO SPTE check in get_mmio_spte()

Check only the terminal leaf for a "!PRESENT || MMIO" SPTE when looking
for reserved bits on valid, non-MMIO SPTEs.  The get_walk() helpers
terminate their walks if a not-present or MMIO SPTE is encountered, i.e.
the non-terminal SPTEs have already been verified to be regular SPTEs.
This eliminates an extra check-and-branch in a relatively hot loop.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20201218003139.2167891-5-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4798a4472066..769855f5f0a1 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3511,7 +3511,7 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
 	return leaf;
 }
 
-/* return true if reserved bit is detected on spte. */
+/* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */
 static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 {
 	u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
@@ -3534,11 +3534,20 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 		return reserved;
 	}
 
+	*sptep = sptes[leaf];
+
+	/*
+	 * Skip reserved bits checks on the terminal leaf if it's not a valid
+	 * SPTE.  Note, this also (intentionally) skips MMIO SPTEs, which, by
+	 * design, always have reserved bits set.  The purpose of the checks is
+	 * to detect reserved bits on non-MMIO SPTEs. i.e. buggy SPTEs.
+	 */
+	if (!is_shadow_present_pte(sptes[leaf]))
+		leaf++;
+
 	rsvd_check = &vcpu->arch.mmu->shadow_zero_check;
 
-	for (level = root; level >= leaf; level--) {
-		if (!is_shadow_present_pte(sptes[level]))
-			break;
+	for (level = root; level >= leaf; level--)
 		/*
 		 * Use a bitwise-OR instead of a logical-OR to aggregate the
 		 * reserved bit and EPT's invalid memtype/XWR checks to avoid
@@ -3546,7 +3555,6 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 		 */
 		reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level]) |
 			    __is_rsvd_bits_set(rsvd_check, sptes[level], level);
-	}
 
 	if (reserved) {
 		pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
@@ -3556,8 +3564,6 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 			       sptes[level], level);
 	}
 
-	*sptep = sptes[leaf];
-
 	return reserved;
 }
 
-- 
cgit v1.2.3-70-g09d2