Merge tag 'kvmarm-fixes-6.9-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm64 fixes for 6.9, part #1 - Ensure perf events programmed to count during guest execution are actually enabled before entering the guest in the nVHE configuration. - Restore out-of-range handler for stage-2 translation faults. - Several fixes to stage-2 TLB invalidations to avoid stale translations, possibly including partial walk caches. - Fix early handling of architectural VHE-only systems to ensure E2H is appropriately set. - Correct a format specifier warning in the arch_timer selftest. - Make the KVM banner message correctly handle all of the possible configurations.
author: Paolo Bonzini <pbonzini@redhat.com> 2024-04-02 12:26:15 -0400
committer: Paolo Bonzini <pbonzini@redhat.com> 2024-04-02 12:26:15 -0400
commit: 52b761b48f8e23399fafe3834a173c990357b8de (patch)
tree: 06f7fe191b277dde269ecb11ff3e089da6f2aff9 /arch
parent: 0d1756482e66f326eb65fe08eed24ce2efabb168 (diff)
parent: d96c66ab9fb3ad8b243669cf6b41e68d0f7f9ecd (diff)
230 files changed, 7146 insertions, 1955 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index fd18b7db2c77..9f066785bb71 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -799,7 +799,7 @@ config CFI_CLANG
 	depends on ARCH_SUPPORTS_CFI_CLANG
 	depends on $(cc-option,-fsanitize=kcfi)
 	help
-	  This option enables Clang’s forward-edge Control Flow Integrity
+	  This option enables Clang's forward-edge Control Flow Integrity
 	  (CFI) checking, where the compiler injects a runtime check to each
 	  indirect function call to ensure the target is a valid function with
 	  the correct static type. This restricts possible call targets and
@@ -1597,4 +1597,16 @@ config FUNCTION_ALIGNMENT
 	default 4 if FUNCTION_ALIGNMENT_4B
 	default 0
 
+config CC_HAS_MIN_FUNCTION_ALIGNMENT
+	# Detect availability of the GCC option -fmin-function-alignment which
+	# guarantees minimal alignment for all functions, unlike
+	# -falign-functions which the compiler ignores for cold functions.
+	def_bool $(cc-option, -fmin-function-alignment=8)
+
+config CC_HAS_SANE_FUNCTION_ALIGNMENT
+	# Set if the guaranteed alignment with -fmin-function-alignment is
+	# available or extra care is required in the kernel. Clang provides
+	# strict alignment always, even with -falign-functions.
+	def_bool CC_HAS_MIN_FUNCTION_ALIGNMENT || CC_IS_CLANG
+
 endmenu
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 4f490250d323..3afd042150f8 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -339,6 +339,7 @@ config ALPHA_EV4
 	bool
 	depends on ALPHA_JENSEN || (ALPHA_SABLE && !ALPHA_GAMMA) || ALPHA_LYNX || ALPHA_NORITAKE && !ALPHA_PRIMO || ALPHA_MIKASA && !ALPHA_PRIMO || ALPHA_CABRIOLET || ALPHA_AVANTI_CH || ALPHA_EB64P_CH || ALPHA_XL || ALPHA_NONAME || ALPHA_EB66 || ALPHA_EB66P || ALPHA_P2K
 	default y if !ALPHA_LYNX
+	default y if !ALPHA_EV5
 
 config ALPHA_LCA
 	bool
@@ -366,10 +367,6 @@ config ALPHA_EV5
 	bool "EV5 CPU(s) (model 5/xxx)?" if ALPHA_LYNX
 	default y if ALPHA_RX164 || ALPHA_RAWHIDE || ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_RUFFIAN || ALPHA_SABLE && ALPHA_GAMMA || ALPHA_NORITAKE && ALPHA_PRIMO || ALPHA_MIKASA && ALPHA_PRIMO || ALPHA_PC164 || ALPHA_TAKARA || ALPHA_EB164 || ALPHA_ALCOR
 
-config ALPHA_EV4
-	bool
-	default y if ALPHA_LYNX && !ALPHA_EV5
-
 config ALPHA_CIA
 	bool
 	depends on ALPHA_MIATA || ALPHA_LX164 || ALPHA_SX164 || ALPHA_RUFFIAN || ALPHA_NORITAKE && ALPHA_PRIMO || ALPHA_MIKASA && ALPHA_PRIMO || ALPHA_PC164 || ALPHA_TAKARA || ALPHA_EB164 || ALPHA_ALCOR
@@ -394,16 +391,12 @@ config ALPHA_PRIMO
 	  Say Y if you have an AS 1000 5/xxx or an AS 1000A 5/xxx.
 
 config ALPHA_GAMMA
-	bool "EV5 CPU(s) (model 5/xxx)?"
-	depends on ALPHA_SABLE
+	bool "EV5 CPU(s) (model 5/xxx)?" if ALPHA_SABLE
+	depends on ALPHA_SABLE || ALPHA_LYNX
+	default ALPHA_LYNX
 	help
 	  Say Y if you have an AS 2000 5/xxx or an AS 2100 5/xxx.
 
-config ALPHA_GAMMA
-	bool
-	depends on ALPHA_LYNX
-	default y
-
 config ALPHA_T2
 	bool
 	depends on ALPHA_SABLE || ALPHA_LYNX
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 702d97a9c304..b14aed3a17ab 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -505,8 +505,8 @@ source "arch/arm/mm/Kconfig"
 
 config IWMMXT
 	bool "Enable iWMMXt support"
-	depends on CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_PJ4 || CPU_PJ4B
-	default y if PXA27x || PXA3xx || ARCH_MMP || CPU_PJ4 || CPU_PJ4B
+	depends on CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK
+	default y if PXA27x || PXA3xx || ARCH_MMP
 	help
 	  Enable support for iWMMXt context switching at run time if
 	  running on a CPU that supports it.
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index f1fc278081d0..7f47b4f335c3 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -90,9 +90,6 @@ config BACKTRACE_VERBOSE
 	  In most cases, say N here, unless you are intending to debug the
 	  kernel and have access to the kernel binary image.
 
-config FRAME_POINTER
-	bool
-
 config DEBUG_USER
 	bool "Verbose user fault messages"
 	help
diff --git a/arch/arm/boot/dts/ti/omap/am33xx-clocks.dtsi b/arch/arm/boot/dts/ti/omap/am33xx-clocks.dtsi
index d34483ae1778..99b62c6b4ce8 100644
--- a/arch/arm/boot/dts/ti/omap/am33xx-clocks.dtsi
+++ b/arch/arm/boot/dts/ti/omap/am33xx-clocks.dtsi
@@ -108,30 +108,31 @@
 		compatible = "ti,clksel";
 		reg = <0x664>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		ehrpwm0_tbclk: clock-ehrpwm0-tbclk {
+		ehrpwm0_tbclk: clock-ehrpwm0-tbclk@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,gate-clock";
 			clock-output-names = "ehrpwm0_tbclk";
 			clocks = <&l4ls_gclk>;
-			ti,bit-shift = <0>;
 		};
 
-		ehrpwm1_tbclk: clock-ehrpwm1-tbclk {
+		ehrpwm1_tbclk: clock-ehrpwm1-tbclk@1 {
+			reg = <1>;
 			#clock-cells = <0>;
 			compatible = "ti,gate-clock";
 			clock-output-names = "ehrpwm1_tbclk";
 			clocks = <&l4ls_gclk>;
-			ti,bit-shift = <1>;
 		};
 
-		ehrpwm2_tbclk: clock-ehrpwm2-tbclk {
+		ehrpwm2_tbclk: clock-ehrpwm2-tbclk@2 {
+			reg = <2>;
 			#clock-cells = <0>;
 			compatible = "ti,gate-clock";
 			clock-output-names = "ehrpwm2_tbclk";
 			clocks = <&l4ls_gclk>;
-			ti,bit-shift = <2>;
 		};
 	};
 };
@@ -566,17 +567,19 @@
 		compatible = "ti,clksel";
 		reg = <0x52c>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		gfx_fclk_clksel_ck: clock-gfx-fclk-clksel {
+		gfx_fclk_clksel_ck: clock-gfx-fclk-clksel@1 {
+			reg = <1>;
 			#clock-cells = <0>;
 			compatible = "ti,mux-clock";
 			clock-output-names = "gfx_fclk_clksel_ck";
 			clocks = <&dpll_core_m4_ck>, <&dpll_per_m2_ck>;
-			ti,bit-shift = <1>;
 		};
 
-		gfx_fck_div_ck: clock-gfx-fck-div {
+		gfx_fck_div_ck: clock-gfx-fck-div@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,divider-clock";
 			clock-output-names = "gfx_fck_div_ck";
@@ -589,30 +592,32 @@
 		compatible = "ti,clksel";
 		reg = <0x700>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		sysclkout_pre_ck: clock-sysclkout-pre {
+		sysclkout_pre_ck: clock-sysclkout-pre@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,mux-clock";
 			clock-output-names = "sysclkout_pre_ck";
 			clocks = <&clk_32768_ck>, <&l3_gclk>, <&dpll_ddr_m2_ck>, <&dpll_per_m2_ck>, <&lcd_gclk>;
 		};
 
-		clkout2_div_ck: clock-clkout2-div {
+		clkout2_div_ck: clock-clkout2-div@3 {
+			reg = <3>;
 			#clock-cells = <0>;
 			compatible = "ti,divider-clock";
 			clock-output-names = "clkout2_div_ck";
 			clocks = <&sysclkout_pre_ck>;
-			ti,bit-shift = <3>;
 			ti,max-div = <8>;
 		};
 
-		clkout2_ck: clock-clkout2 {
+		clkout2_ck: clock-clkout2@7 {
+			reg = <7>;
 			#clock-cells = <0>;
 			compatible = "ti,gate-clock";
 			clock-output-names = "clkout2_ck";
 			clocks = <&clkout2_div_ck>;
-			ti,bit-shift = <7>;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/ti/omap/am35xx-clocks.dtsi b/arch/arm/boot/dts/ti/omap/am35xx-clocks.dtsi
index 0ee7afaa0e8e..b521139e6f51 100644
--- a/arch/arm/boot/dts/ti/omap/am35xx-clocks.dtsi
+++ b/arch/arm/boot/dts/ti/omap/am35xx-clocks.dtsi
@@ -66,22 +66,23 @@
 		compatible = "ti,clksel";
 		reg = <0xa10>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		ipss_ick: clock-ipss-ick {
+		ipss_ick: clock-ipss-ick@4 {
+			reg = <4>;
 			#clock-cells = <0>;
 			compatible = "ti,am35xx-interface-clock";
 			clock-output-names = "ipss_ick";
 			clocks = <&core_l3_ick>;
-			ti,bit-shift = <4>;
 		};
 
-		uart4_ick_am35xx: clock-uart4-ick-am35xx {
+		uart4_ick_am35xx: clock-uart4-ick-am35xx@23 {
+			reg = <23>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "uart4_ick_am35xx";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <23>;
 		};
 	};
 
@@ -101,14 +102,15 @@
 		compatible = "ti,clksel";
 		reg = <0xa00>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		uart4_fck_am35xx: clock-uart4-fck-am35xx {
+		uart4_fck_am35xx: clock-uart4-fck-am35xx@23 {
+			reg = <23>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "uart4_fck_am35xx";
 			clocks = <&core_48m_fck>;
-			ti,bit-shift = <23>;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/ti/omap/omap3430es1-clocks.dtsi b/arch/arm/boot/dts/ti/omap/omap3430es1-clocks.dtsi
index 24adfac26be0..6e754d265f18 100644
--- a/arch/arm/boot/dts/ti/omap/omap3430es1-clocks.dtsi
+++ b/arch/arm/boot/dts/ti/omap/omap3430es1-clocks.dtsi
@@ -50,30 +50,31 @@
 		compatible = "ti,clksel";
 		reg = <0xa00>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		d2d_26m_fck: clock-d2d-26m-fck {
+		d2d_26m_fck: clock-d2d-26m-fck@3 {
+			reg = <3>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "d2d_26m_fck";
 			clocks = <&sys_ck>;
-			ti,bit-shift = <3>;
 		};
 
-		fshostusb_fck: clock-fshostusb-fck {
+		fshostusb_fck: clock-fshostusb-fck@5 {
+			reg = <5>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "fshostusb_fck";
 			clocks = <&core_48m_fck>;
-			ti,bit-shift = <5>;
 		};
 
-		ssi_ssr_gate_fck_3430es1: clock-ssi-ssr-gate-fck-3430es1 {
+		ssi_ssr_gate_fck_3430es1: clock-ssi-ssr-gate-fck-3430es1@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-no-wait-gate-clock";
 			clock-output-names = "ssi_ssr_gate_fck_3430es1";
 			clocks = <&corex2_fck>;
-			ti,bit-shift = <0>;
 		};
 	};
 
@@ -81,23 +82,24 @@
 		compatible = "ti,clksel";
 		reg = <0xa40>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		ssi_ssr_div_fck_3430es1: clock-ssi-ssr-div-fck-3430es1 {
+		ssi_ssr_div_fck_3430es1: clock-ssi-ssr-div-fck-3430es1@8 {
+			reg = <8>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-divider-clock";
 			clock-output-names = "ssi_ssr_div_fck_3430es1";
 			clocks = <&corex2_fck>;
-			ti,bit-shift = <8>;
 			ti,dividers = <0>, <1>, <2>, <3>, <4>, <0>, <6>, <0>, <8>;
 		};
 
-		usb_l4_div_ick: clock-usb-l4-div-ick {
+		usb_l4_div_ick: clock-usb-l4-div-ick@4 {
+			reg = <4>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-divider-clock";
 			clock-output-names = "usb_l4_div_ick";
 			clocks = <&l4_ick>;
-			ti,bit-shift = <4>;
 			ti,max-div = <1>;
 			ti,index-starts-at-one;
 		};
@@ -121,38 +123,39 @@
 		compatible = "ti,clksel";
 		reg = <0xa10>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		hsotgusb_ick_3430es1: clock-hsotgusb-ick-3430es1 {
+		hsotgusb_ick_3430es1: clock-hsotgusb-ick-3430es1@4 {
+			reg = <4>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-no-wait-interface-clock";
 			clock-output-names = "hsotgusb_ick_3430es1";
 			clocks = <&core_l3_ick>;
-			ti,bit-shift = <4>;
 		};
 
-		fac_ick: clock-fac-ick {
+		fac_ick: clock-fac-ick@8 {
+			reg = <8>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "fac_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <8>;
 		};
 
-		ssi_ick: clock-ssi-ick-3430es1 {
+		ssi_ick: clock-ssi-ick-3430es1@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-no-wait-interface-clock";
 			clock-output-names = "ssi_ick_3430es1";
 			clocks = <&ssi_l4_ick>;
-			ti,bit-shift = <0>;
 		};
 
-		usb_l4_gate_ick: clock-usb-l4-gate-ick {
+		usb_l4_gate_ick: clock-usb-l4-gate-ick@5 {
+			reg = <5>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-interface-clock";
 			clock-output-names = "usb_l4_gate_ick";
 			clocks = <&l4_ick>;
-			ti,bit-shift = <5>;
 		};
 	};
 
@@ -174,14 +177,15 @@
 		compatible = "ti,clksel";
 		reg = <0xe00>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		dss1_alwon_fck: clock-dss1-alwon-fck-3430es1 {
+		dss1_alwon_fck: clock-dss1-alwon-fck-3430es1@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,gate-clock";
 			clock-output-names = "dss1_alwon_fck_3430es1";
 			clocks = <&dpll4_m4x2_ck>;
-			ti,bit-shift = <0>;
 			ti,set-rate-parent;
 		};
 	};
diff --git a/arch/arm/boot/dts/ti/omap/omap34xx-omap36xx-clocks.dtsi b/arch/arm/boot/dts/ti/omap/omap34xx-omap36xx-clocks.dtsi
index 8374532f20e2..ca6372711baf 100644
--- a/arch/arm/boot/dts/ti/omap/omap34xx-omap36xx-clocks.dtsi
+++ b/arch/arm/boot/dts/ti/omap/omap34xx-omap36xx-clocks.dtsi
@@ -17,46 +17,47 @@
 		compatible = "ti,clksel";
 		reg = <0xa14>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		aes1_ick: clock-aes1-ick {
+		aes1_ick: clock-aes1-ick@3 {
+			reg = <3>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "aes1_ick";
 			clocks = <&security_l4_ick2>;
-			ti,bit-shift = <3>;
 		};
 
-		rng_ick: clock-rng-ick {
+		rng_ick: clock-rng-ick@2 {
+			reg = <2>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "rng_ick";
 			clocks = <&security_l4_ick2>;
-			ti,bit-shift = <2>;
 		};
 
-		sha11_ick: clock-sha11-ick {
+		sha11_ick: clock-sha11-ick@1 {
+			reg = <1>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "sha11_ick";
 			clocks = <&security_l4_ick2>;
-			ti,bit-shift = <1>;
 		};
 
-		des1_ick: clock-des1-ick {
+		des1_ick: clock-des1-ick@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "des1_ick";
 			clocks = <&security_l4_ick2>;
-			ti,bit-shift = <0>;
 		};
 
-		pka_ick: clock-pka-ick {
+		pka_ick: clock-pka-ick@4 {
+			reg = <4>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "pka_ick";
 			clocks = <&security_l3_ick>;
-			ti,bit-shift = <4>;
 		};
 	};
 
@@ -65,23 +66,24 @@
 		compatible = "ti,clksel";
 		reg = <0xf00>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		cam_mclk: clock-cam-mclk {
+		cam_mclk: clock-cam-mclk@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,gate-clock";
 			clock-output-names = "cam_mclk";
 			clocks = <&dpll4_m5x2_ck>;
-			ti,bit-shift = <0>;
 			ti,set-rate-parent;
 		};
 
-		csi2_96m_fck: clock-csi2-96m-fck {
+		csi2_96m_fck: clock-csi2-96m-fck@1 {
+			reg = <1>;
 			#clock-cells = <0>;
 			compatible = "ti,gate-clock";
 			clock-output-names = "csi2_96m_fck";
 			clocks = <&core_96m_fck>;
-			ti,bit-shift = <1>;
 		};
 	};
 
@@ -105,46 +107,47 @@
 		compatible = "ti,clksel";
 		reg = <0xa10>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		icr_ick: clock-icr-ick {
+		icr_ick: clock-icr-ick@29 {
+			reg = <29>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "icr_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <29>;
 		};
 
-		des2_ick: clock-des2-ick {
+		des2_ick: clock-des2-ick@26 {
+			reg = <26>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "des2_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <26>;
 		};
 
-		mspro_ick: clock-mspro-ick {
+		mspro_ick: clock-mspro-ick@23 {
+			reg = <23>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "mspro_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <23>;
 		};
 
-		mailboxes_ick: clock-mailboxes-ick {
+		mailboxes_ick: clock-mailboxes-ick@7 {
+			reg = <7>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "mailboxes_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <7>;
 		};
 
-		sad2d_ick: clock-sad2d-ick {
+		sad2d_ick: clock-sad2d-ick@3 {
+			reg = <3>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "sad2d_ick";
 			clocks = <&l3_ick>;
-			ti,bit-shift = <3>;
 		};
 	};
 
@@ -160,22 +163,23 @@
 		compatible = "ti,clksel";
 		reg = <0xc00>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		sr1_fck: clock-sr1-fck {
+		sr1_fck: clock-sr1-fck@6 {
+			reg = <6>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "sr1_fck";
 			clocks = <&sys_ck>;
-			ti,bit-shift = <6>;
 		};
 
-		sr2_fck: clock-sr2-fck {
+		sr2_fck: clock-sr2-fck@7 {
+			reg = <7>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "sr2_fck";
 			clocks = <&sys_ck>;
-			ti,bit-shift = <7>;
 		};
 	};
 
@@ -228,22 +232,23 @@
 		compatible = "ti,clksel";
 		reg = <0xa00>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		modem_fck: clock-modem-fck {
+		modem_fck: clock-modem-fck@31 {
+			reg = <31>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "modem_fck";
 			clocks = <&sys_ck>;
-			ti,bit-shift = <31>;
 		};
 
-		mspro_fck: clock-mspro-fck {
+		mspro_fck: clock-mspro-fck@23 {
+			reg = <23>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "mspro_fck";
 			clocks = <&core_96m_fck>;
-			ti,bit-shift = <23>;
 		};
 	};
 
@@ -252,14 +257,15 @@
 		compatible = "ti,clksel";
 		reg = <0xa18>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#ssize-cells = <0>;
 
-		mad2d_ick: clock-mad2d-ick {
+		mad2d_ick: clock-mad2d-ick@3 {
+			reg = <3>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "mad2d_ick";
 			clocks = <&l3_ick>;
-			ti,bit-shift = <3>;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/ti/omap/omap36xx-am35xx-omap3430es2plus-clocks.dtsi b/arch/arm/boot/dts/ti/omap/omap36xx-am35xx-omap3430es2plus-clocks.dtsi
index dcc5cfcd1fe6..656cf80f878a 100644
--- a/arch/arm/boot/dts/ti/omap/omap36xx-am35xx-omap3430es2plus-clocks.dtsi
+++ b/arch/arm/boot/dts/ti/omap/omap36xx-am35xx-omap3430es2plus-clocks.dtsi
@@ -138,14 +138,15 @@
 		compatible = "ti,clksel";
 		reg = <0xa18>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		usbtll_ick: clock-usbtll-ick {
+		usbtll_ick: clock-usbtll-ick@2 {
+			reg = <2>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "usbtll_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <2>;
 		};
 	};
 
@@ -153,14 +154,15 @@
 		compatible = "ti,clksel";
 		reg = <0xa10>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		mmchs3_ick: clock-mmchs3-ick {
+		mmchs3_ick: clock-mmchs3-ick@30 {
+			reg = <30>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "mmchs3_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <30>;
 		};
 	};
 
@@ -168,14 +170,15 @@
 		compatible = "ti,clksel";
 		reg = <0xa00>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		mmchs3_fck: clock-mmchs3-fck {
+		mmchs3_fck: clock-mmchs3-fck@30 {
+			reg = <30>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "mmchs3_fck";
 			clocks = <&core_96m_fck>;
-			ti,bit-shift = <30>;
 		};
 	};
 
@@ -183,14 +186,15 @@
 		compatible = "ti,clksel";
 		reg = <0xe00>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		dss1_alwon_fck: clock-dss1-alwon-fck-3430es2 {
+		dss1_alwon_fck: clock-dss1-alwon-fck-3430es2@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,dss-gate-clock";
 			clock-output-names = "dss1_alwon_fck_3430es2";
 			clocks = <&dpll4_m4x2_ck>;
-			ti,bit-shift = <0>;
 			ti,set-rate-parent;
 		};
 	};
diff --git a/arch/arm/boot/dts/ti/omap/omap36xx-clocks.dtsi b/arch/arm/boot/dts/ti/omap/omap36xx-clocks.dtsi
index c5fdb2bd765d..1e90f2b1ef8b 100644
--- a/arch/arm/boot/dts/ti/omap/omap36xx-clocks.dtsi
+++ b/arch/arm/boot/dts/ti/omap/omap36xx-clocks.dtsi
@@ -62,14 +62,15 @@
 		compatible = "ti,clksel";
 		reg = <0x1000>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		uart4_fck: clock-uart4-fck {
+		uart4_fck: clock-uart4-fck@18 {
+			reg = <18>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "uart4_fck";
 			clocks = <&per_48m_fck>;
-			ti,bit-shift = <18>;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/ti/omap/omap36xx-omap3430es2plus-clocks.dtsi b/arch/arm/boot/dts/ti/omap/omap36xx-omap3430es2plus-clocks.dtsi
index c94eb86d3da7..798acb839db4 100644
--- a/arch/arm/boot/dts/ti/omap/omap36xx-omap3430es2plus-clocks.dtsi
+++ b/arch/arm/boot/dts/ti/omap/omap36xx-omap3430es2plus-clocks.dtsi
@@ -9,14 +9,15 @@
 		compatible = "ti,clksel";
 		reg = <0xa00>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		ssi_ssr_gate_fck_3430es2: clock-ssi-ssr-gate-fck-3430es2 {
+		ssi_ssr_gate_fck_3430es2: clock-ssi-ssr-gate-fck-3430es2@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-no-wait-gate-clock";
 			clock-output-names = "ssi_ssr_gate_fck_3430es2";
 			clocks = <&corex2_fck>;
-			ti,bit-shift = <0>;
 		};
 	};
 
@@ -24,14 +25,15 @@
 		compatible = "ti,clksel";
 		reg = <0xa40>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		ssi_ssr_div_fck_3430es2: clock-ssi-ssr-div-fck-3430es2 {
+		ssi_ssr_div_fck_3430es2: clock-ssi-ssr-div-fck-3430es2@8 {
+			reg = <8>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-divider-clock";
 			clock-output-names = "ssi_ssr_div_fck_3430es2";
 			clocks = <&corex2_fck>;
-			ti,bit-shift = <8>;
 			ti,dividers = <0>, <1>, <2>, <3>, <4>, <0>, <6>, <0>, <8>;
 		};
 	};
@@ -54,22 +56,23 @@
 		compatible = "ti,clksel";
 		reg = <0xa10>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		hsotgusb_ick_3430es2: clock-hsotgusb-ick-3430es2 {
+		hsotgusb_ick_3430es2: clock-hsotgusb-ick-3430es2@4 {
+			reg = <4>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-hsotgusb-interface-clock";
 			clock-output-names = "hsotgusb_ick_3430es2";
 			clocks = <&core_l3_ick>;
-			ti,bit-shift = <4>;
 		};
 
-		ssi_ick: clock-ssi-ick-3430es2 {
+		ssi_ick: clock-ssi-ick-3430es2@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-ssi-interface-clock";
 			clock-output-names = "ssi_ick_3430es2";
 			clocks = <&ssi_l4_ick>;
-			ti,bit-shift = <0>;
 		};
 	};
 
@@ -85,14 +88,15 @@
 		compatible = "ti,clksel";
 		reg = <0xc00>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		usim_gate_fck: clock-usim-gate-fck {
+		usim_gate_fck: clock-usim-gate-fck@9 {
+			reg = <9>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-gate-clock";
 			clock-output-names = "usim_gate_fck";
 			clocks = <&omap_96m_fck>;
-			ti,bit-shift = <9>;
 		};
 	};
 
@@ -172,14 +176,15 @@
 		compatible = "ti,clksel";
 		reg = <0xc40>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		usim_mux_fck: clock-usim-mux-fck {
+		usim_mux_fck: clock-usim-mux-fck@3 {
+			reg = <3>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-mux-clock";
 			clock-output-names = "usim_mux_fck";
 			clocks = <&sys_ck>, <&sys_d2_ck>, <&omap_96m_d2_fck>, <&omap_96m_d4_fck>, <&omap_96m_d8_fck>, <&omap_96m_d10_fck>, <&dpll5_m2_d4_ck>, <&dpll5_m2_d8_ck>, <&dpll5_m2_d16_ck>, <&dpll5_m2_d20_ck>;
-			ti,bit-shift = <3>;
 			ti,index-starts-at-one;
 		};
 	};
@@ -194,14 +199,15 @@
 		compatible = "ti,clksel";
 		reg = <0xc10>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		usim_ick: clock-usim-ick {
+		usim_ick: clock-usim-ick@9 {
+			reg = <9>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "usim_ick";
 			clocks = <&wkup_l4_ick>;
-			ti,bit-shift = <9>;
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/ti/omap/omap3xxx-clocks.dtsi b/arch/arm/boot/dts/ti/omap/omap3xxx-clocks.dtsi
index 2e13ca11ceea..901ee79a66f1 100644
--- a/arch/arm/boot/dts/ti/omap/omap3xxx-clocks.dtsi
+++ b/arch/arm/boot/dts/ti/omap/omap3xxx-clocks.dtsi
@@ -83,29 +83,31 @@
 		compatible = "ti,clksel";
 		reg = <0x68>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		mcbsp5_mux_fck: clock-mcbsp5-mux-fck {
+		mcbsp5_mux_fck: clock-mcbsp5-mux-fck@4 {
+			reg = <4>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-mux-clock";
 			clock-output-names = "mcbsp5_mux_fck";
 			clocks = <&core_96m_fck>, <&mcbsp_clks>;
-			ti,bit-shift = <4>;
 		};
 
-		mcbsp3_mux_fck: clock-mcbsp3-mux-fck {
+		mcbsp3_mux_fck: clock-mcbsp3-mux-fck@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-mux-clock";
 			clock-output-names = "mcbsp3_mux_fck";
 			clocks = <&per_96m_fck>, <&mcbsp_clks>;
 		};
 
-		mcbsp4_mux_fck: clock-mcbsp4-mux-fck {
+		mcbsp4_mux_fck: clock-mcbsp4-mux-fck@2 {
+			reg = <2>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-mux-clock";
 			clock-output-names = "mcbsp4_mux_fck";
 			clocks = <&per_96m_fck>, <&mcbsp_clks>;
-			ti,bit-shift = <2>;
 		};
 	};
 
@@ -120,22 +122,23 @@
 		compatible = "ti,clksel";
 		reg = <0x4>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		mcbsp1_mux_fck: clock-mcbsp1-mux-fck {
+		mcbsp1_mux_fck: clock-mcbsp1-mux-fck@2 {
+			reg = <2>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-mux-clock";
 			clock-output-names = "mcbsp1_mux_fck";
 			clocks = <&core_96m_fck>, <&mcbsp_clks>;
-			ti,bit-shift = <2>;
 		};
 
-		mcbsp2_mux_fck: clock-mcbsp2-mux-fck {
+		mcbsp2_mux_fck: clock-mcbsp2-mux-fck@6 {
+			reg = <6>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-mux-clock";
 			clock-output-names = "mcbsp2_mux_fck";
 			clocks = <&per_96m_fck>, <&mcbsp_clks>;
-			ti,bit-shift = <6>;
 		};
 	};
 
@@ -259,79 +262,81 @@
 		compatible = "ti,clksel";
 		reg = <0x1140>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		dpll3_m3_ck: clock-dpll3-m3 {
+		dpll3_m3_ck: clock-dpll3-m3@16 {
+			reg = <16>;
 			#clock-cells = <0>;
 			compatible = "ti,divider-clock";
 			clock-output-names = "dpll3_m3_ck";
 			clocks = <&dpll3_ck>;
-			ti,bit-shift = <16>;
 			ti,max-div = <31>;
 			ti,index-starts-at-one;
 		};
 
-		dpll4_m6_ck: clock-dpll4-m6 {
+		dpll4_m6_ck: clock-dpll4-m6@24 {
+			reg = <24>;
 			#clock-cells = <0>;
 			compatible = "ti,divider-clock";
 			clock-output-names = "dpll4_m6_ck";
 			clocks = <&dpll4_ck>;
-			ti,bit-shift = <24>;
 			ti,max-div = <63>;
 			ti,index-starts-at-one;
 		};
 
-		emu_src_mux_ck: clock-emu-src-mux {
+		emu_src_mux_ck: clock-emu-src-mux@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,mux-clock";
 			clock-output-names = "emu_src_mux_ck";
 			clocks = <&sys_ck>, <&emu_core_alwon_ck>, <&emu_per_alwon_ck>, <&emu_mpu_alwon_ck>;
 		};
 
-		pclk_fck: clock-pclk-fck {
+		pclk_fck: clock-pclk-fck@8 {
+			reg = <8>;
 			#clock-cells = <0>;
 			compatible = "ti,divider-clock";
 			clock-output-names = "pclk_fck";
 			clocks = <&emu_src_ck>;
-			ti,bit-shift = <8>;
 			ti,max-div = <7>;
 			ti,index-starts-at-one;
 		};
 
-		pclkx2_fck: clock-pclkx2-fck {
+		pclkx2_fck: clock-pclkx2-fck@6 {
+			reg = <6>;
 			#clock-cells = <0>;
 			compatible = "ti,divider-clock";
 			clock-output-names = "pclkx2_fck";
 			clocks = <&emu_src_ck>;
-			ti,bit-shift = <6>;
 			ti,max-div = <3>;
 			ti,index-starts-at-one;
 		};
 
-		atclk_fck: clock-atclk-fck {
+		atclk_fck: clock-atclk-fck@4 {
+			reg = <4>;
 			#clock-cells = <0>;
 			compatible = "ti,divider-clock";
 			clock-output-names = "atclk_fck";
 			clocks = <&emu_src_ck>;
-			ti,bit-shift = <4>;
 			ti,max-div = <3>;
 			ti,index-starts-at-one;
 		};
 
-		traceclk_src_fck: clock-traceclk-src-fck {
+		traceclk_src_fck: clock-traceclk-src-fck@2 {
+			reg = <2>;
 			#clock-cells = <0>;
 			compatible = "ti,mux-clock";
 			clock-output-names = "traceclk_src_fck";
 			clocks = <&sys_ck>, <&emu_core_alwon_ck>, <&emu_per_alwon_ck>, <&emu_mpu_alwon_ck>;
-			ti,bit-shift = <2>;
 		};
 
-		traceclk_fck: clock-traceclk-fck {
+		traceclk_fck: clock-traceclk-fck@11 {
+			reg = <11>;
 			#clock-cells = <0>;
 			compatible = "ti,divider-clock";
 			clock-output-names = "traceclk_fck";
 			clocks = <&traceclk_src_fck>;
-			ti,bit-shift = <11>;
 			ti,max-div = <7>;
 			ti,index-starts-at-one;
 		};
@@ -429,40 +434,41 @@
 		compatible = "ti,clksel";
 		reg = <0xd40>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		dpll3_m2_ck: clock-dpll3-m2 {
+		dpll3_m2_ck: clock-dpll3-m2@27 {
+			reg = <27>;
 			#clock-cells = <0>;
 			compatible = "ti,divider-clock";
 			clock-output-names = "dpll3_m2_ck";
 			clocks = <&dpll3_ck>;
-			ti,bit-shift = <27>;
 			ti,max-div = <31>;
 			ti,index-starts-at-one;
 		};
 
-		omap_96m_fck: clock-omap-96m-fck {
+		omap_96m_fck: clock-omap-96m-fck@6 {
+			reg = <6>;
 			#clock-cells = <0>;
 			compatible = "ti,mux-clock";
 			clock-output-names = "omap_96m_fck";
 			clocks = <&cm_96m_fck>, <&sys_ck>;
-			ti,bit-shift = <6>;
 		};
 
-		omap_54m_fck: clock-omap-54m-fck {
+		omap_54m_fck: clock-omap-54m-fck@5 {
+			reg = <5>;
 			#clock-cells = <0>;
 			compatible = "ti,mux-clock";
 			clock-output-names = "omap_54m_fck";
 			clocks = <&dpll4_m3x2_ck>, <&sys_altclk>;
-			ti,bit-shift = <5>;
 		};
 
-		omap_48m_fck: clock-omap-48m-fck {
+		omap_48m_fck: clock-omap-48m-fck@3 {
+			reg = <3>;
 			#clock-cells = <0>;
 			compatible = "ti,mux-clock";
 			clock-output-names = "omap_48m_fck";
 			clocks = <&cm_96m_d2_fck>, <&sys_altclk>;
-			ti,bit-shift = <3>;
 		};
 	};
 
@@ -471,19 +477,21 @@
 		compatible = "ti,clksel";
 		reg = <0xe40>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		dpll4_m3_ck: clock-dpll4-m3 {
+		dpll4_m3_ck: clock-dpll4-m3@8 {
+			reg = <8>;
 			#clock-cells = <0>;
 			compatible = "ti,divider-clock";
 			clock-output-names = "dpll4_m3_ck";
 			clocks = <&dpll4_ck>;
-			ti,bit-shift = <8>;
 			ti,max-div = <32>;
 			ti,index-starts-at-one;
 		};
 
-		dpll4_m4_ck: clock-dpll4-m4 {
+		dpll4_m4_ck: clock-dpll4-m4@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,divider-clock";
 			clock-output-names = "dpll4_m4_ck";
@@ -603,29 +611,31 @@
 		compatible = "ti,clksel";
 		reg = <0xd70>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		clkout2_src_gate_ck: clock-clkout2-src-gate {
+		clkout2_src_gate_ck: clock-clkout2-src-gate@7 {
+			reg = <7>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-no-wait-gate-clock";
 			clock-output-names = "clkout2_src_gate_ck";
 			clocks = <&core_ck>;
-			ti,bit-shift = <7>;
 		};
 
-		clkout2_src_mux_ck: clock-clkout2-src-mux {
+		clkout2_src_mux_ck: clock-clkout2-src-mux@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-mux-clock";
 			clock-output-names = "clkout2_src_mux_ck";
 			clocks = <&core_ck>, <&sys_ck>, <&cm_96m_fck>, <&omap_54m_fck>;
 		};
 
-		sys_clkout2: clock-sys-clkout2 {
+		sys_clkout2: clock-sys-clkout2@3 {
+			reg = <3>;
 			#clock-cells = <0>;
 			compatible = "ti,divider-clock";
 			clock-output-names = "sys_clkout2";
 			clocks = <&clkout2_src_ck>;
-			ti,bit-shift = <3>;
 			ti,max-div = <64>;
 			ti,index-power-of-two;
 		};
@@ -666,9 +676,11 @@
 		compatible = "ti,clksel";
 		reg = <0xa40>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		l3_ick: clock-l3-ick {
+		l3_ick: clock-l3-ick@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,divider-clock";
 			clock-output-names = "l3_ick";
@@ -677,30 +689,30 @@
 			ti,index-starts-at-one;
 		};
 
-		l4_ick: clock-l4-ick {
+		l4_ick: clock-l4-ick@2 {
+			reg = <2>;
 			#clock-cells = <0>;
 			compatible = "ti,divider-clock";
 			clock-output-names = "l4_ick";
 			clocks = <&l3_ick>;
-			ti,bit-shift = <2>;
 			ti,max-div = <3>;
 			ti,index-starts-at-one;
 		};
 
-		gpt10_mux_fck: clock-gpt10-mux-fck {
+		gpt10_mux_fck: clock-gpt10-mux-fck@6 {
+			reg = <6>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-mux-clock";
 			clock-output-names = "gpt10_mux_fck";
 			clocks = <&omap_32k_fck>, <&sys_ck>;
-			ti,bit-shift = <6>;
 		};
 
-		gpt11_mux_fck: clock-gpt11-mux-fck {
+		gpt11_mux_fck: clock-gpt11-mux-fck@7 {
+			reg = <7>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-mux-clock";
 			clock-output-names = "gpt11_mux_fck";
 			clocks = <&omap_32k_fck>, <&sys_ck>;
-			ti,bit-shift = <7>;
 		};
 	};
 
@@ -709,19 +721,21 @@
 		compatible = "ti,clksel";
 		reg = <0xc40>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		rm_ick: clock-rm-ick {
+		rm_ick: clock-rm-ick@1 {
+			reg = <1>;
 			#clock-cells = <0>;
 			compatible = "ti,divider-clock";
 			clock-output-names = "rm_ick";
 			clocks = <&l4_ick>;
-			ti,bit-shift = <1>;
 			ti,max-div = <3>;
 			ti,index-starts-at-one;
 		};
 
-		gpt1_mux_fck: clock-gpt1-mux-fck {
+		gpt1_mux_fck: clock-gpt1-mux-fck@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-mux-clock";
 			clock-output-names = "gpt1_mux_fck";
@@ -734,134 +748,135 @@
 		compatible = "ti,clksel";
 		reg = <0xa00>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		gpt10_gate_fck: clock-gpt10-gate-fck {
+		gpt10_gate_fck: clock-gpt10-gate-fck@11 {
+			reg = <11>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-gate-clock";
 			clock-output-names = "gpt10_gate_fck";
 			clocks = <&sys_ck>;
-			ti,bit-shift = <11>;
 		};
 
-		gpt11_gate_fck: clock-gpt11-gate-fck {
+		gpt11_gate_fck: clock-gpt11-gate-fck@12 {
+			reg = <12>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-gate-clock";
 			clock-output-names = "gpt11_gate_fck";
 			clocks = <&sys_ck>;
-			ti,bit-shift = <12>;
 		};
 
-		mmchs2_fck: clock-mmchs2-fck {
+		mmchs2_fck: clock-mmchs2-fck@25 {
+			reg = <25>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "mmchs2_fck";
 			clocks = <&core_96m_fck>;
-			ti,bit-shift = <25>;
 		};
 
-		mmchs1_fck: clock-mmchs1-fck {
+		mmchs1_fck: clock-mmchs1-fck@24 {
+			reg = <24>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "mmchs1_fck";
 			clocks = <&core_96m_fck>;
-			ti,bit-shift = <24>;
 		};
 
-		i2c3_fck: clock-i2c3-fck {
+		i2c3_fck: clock-i2c3-fck@17 {
+			reg = <17>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "i2c3_fck";
 			clocks = <&core_96m_fck>;
-			ti,bit-shift = <17>;
 		};
 
-		i2c2_fck: clock-i2c2-fck {
+		i2c2_fck: clock-i2c2-fck@16 {
+			reg = <16>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "i2c2_fck";
 			clocks = <&core_96m_fck>;
-			ti,bit-shift = <16>;
 		};
 
-		i2c1_fck: clock-i2c1-fck {
+		i2c1_fck: clock-i2c1-fck@15 {
+			reg = <15>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "i2c1_fck";
 			clocks = <&core_96m_fck>;
-			ti,bit-shift = <15>;
 		};
 
-		mcbsp5_gate_fck: clock-mcbsp5-gate-fck {
+		mcbsp5_gate_fck: clock-mcbsp5-gate-fck@10 {
+			reg = <10>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-gate-clock";
 			clock-output-names = "mcbsp5_gate_fck";
 			clocks = <&mcbsp_clks>;
-			ti,bit-shift = <10>;
 		};
 
-		mcbsp1_gate_fck: clock-mcbsp1-gate-fck {
+		mcbsp1_gate_fck: clock-mcbsp1-gate-fck@9 {
+			reg = <9>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-gate-clock";
 			clock-output-names = "mcbsp1_gate_fck";
 			clocks = <&mcbsp_clks>;
-			ti,bit-shift = <9>;
 		};
 
-		mcspi4_fck: clock-mcspi4-fck {
+		mcspi4_fck: clock-mcspi4-fck@21 {
+			reg = <21>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "mcspi4_fck";
 			clocks = <&core_48m_fck>;
-			ti,bit-shift = <21>;
 		};
 
-		mcspi3_fck: clock-mcspi3-fck {
+		mcspi3_fck: clock-mcspi3-fck@20 {
+			reg = <20>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "mcspi3_fck";
 			clocks = <&core_48m_fck>;
-			ti,bit-shift = <20>;
 		};
 
-		mcspi2_fck: clock-mcspi2-fck {
+		mcspi2_fck: clock-mcspi2-fck@19 {
+			reg = <19>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "mcspi2_fck";
 			clocks = <&core_48m_fck>;
-			ti,bit-shift = <19>;
 		};
 
-		mcspi1_fck: clock-mcspi1-fck {
+		mcspi1_fck: clock-mcspi1-fck@18 {
+			reg = <18>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "mcspi1_fck";
 			clocks = <&core_48m_fck>;
-			ti,bit-shift = <18>;
 		};
 
-		uart2_fck: clock-uart2-fck {
+		uart2_fck: clock-uart2-fck@14 {
+			reg = <14>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "uart2_fck";
 			clocks = <&core_48m_fck>;
-			ti,bit-shift = <14>;
 		};
 
-		uart1_fck: clock-uart1-fck {
+		uart1_fck: clock-uart1-fck@13 {
+			reg = <13>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "uart1_fck";
 			clocks = <&core_48m_fck>;
-			ti,bit-shift = <13>;
 		};
 
-		hdq_fck: clock-hdq-fck {
+		hdq_fck: clock-hdq-fck@22 {
+			reg = <22>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "hdq_fck";
 			clocks = <&core_12m_fck>;
-			ti,bit-shift = <22>;
 		};
 	};
 
@@ -914,166 +929,167 @@
 		compatible = "ti,clksel";
 		reg = <0xa10>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		sdrc_ick: clock-sdrc-ick {
+		sdrc_ick: clock-sdrc-ick@1 {
+			reg = <1>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "sdrc_ick";
 			clocks = <&core_l3_ick>;
-			ti,bit-shift = <1>;
 		};
 
-		mmchs2_ick: clock-mmchs2-ick {
+		mmchs2_ick: clock-mmchs2-ick@25 {
+			reg = <25>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "mmchs2_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <25>;
 		};
 
-		mmchs1_ick: clock-mmchs1-ick {
+		mmchs1_ick: clock-mmchs1-ick@24 {
+			reg = <24>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "mmchs1_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <24>;
 		};
 
-		hdq_ick: clock-hdq-ick {
+		hdq_ick: clock-hdq-ick@22 {
+			reg = <22>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "hdq_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <22>;
 		};
 
-		mcspi4_ick: clock-mcspi4-ick {
+		mcspi4_ick: clock-mcspi4-ick@21 {
+			reg = <21>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "mcspi4_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <21>;
 		};
 
-		mcspi3_ick: clock-mcspi3-ick {
+		mcspi3_ick: clock-mcspi3-ick@20 {
+			reg = <20>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "mcspi3_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <20>;
 		};
 
-		mcspi2_ick: clock-mcspi2-ick {
+		mcspi2_ick: clock-mcspi2-ick@19 {
+			reg = <19>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "mcspi2_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <19>;
 		};
 
-		mcspi1_ick: clock-mcspi1-ick {
+		mcspi1_ick: clock-mcspi1-ick@18 {
+			reg = <18>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "mcspi1_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <18>;
 		};
 
-		i2c3_ick: clock-i2c3-ick {
+		i2c3_ick: clock-i2c3-ick@17 {
+			reg = <17>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "i2c3_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <17>;
 		};
 
-		i2c2_ick: clock-i2c2-ick {
+		i2c2_ick: clock-i2c2-ick@16 {
+			reg = <16>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "i2c2_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <16>;
 		};
 
-		i2c1_ick: clock-i2c1-ick {
+		i2c1_ick: clock-i2c1-ick@15 {
+			reg = <15>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "i2c1_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <15>;
 		};
 
-		uart2_ick: clock-uart2-ick {
+		uart2_ick: clock-uart2-ick@14 {
+			reg = <14>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "uart2_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <14>;
 		};
 
-		uart1_ick: clock-uart1-ick {
+		uart1_ick: clock-uart1-ick@13 {
+			reg = <13>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "uart1_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <13>;
 		};
 
-		gpt11_ick: clock-gpt11-ick {
+		gpt11_ick: clock-gpt11-ick@12 {
+			reg = <12>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "gpt11_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <12>;
 		};
 
-		gpt10_ick: clock-gpt10-ick {
+		gpt10_ick: clock-gpt10-ick@11 {
+			reg = <11>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "gpt10_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <11>;
 		};
 
-		mcbsp5_ick: clock-mcbsp5-ick {
+		mcbsp5_ick: clock-mcbsp5-ick@10 {
+			reg = <10>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "mcbsp5_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <10>;
 		};
 
-		mcbsp1_ick: clock-mcbsp1-ick {
+		mcbsp1_ick: clock-mcbsp1-ick@9 {
+			reg = <9>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "mcbsp1_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <9>;
 		};
 
-		omapctrl_ick: clock-omapctrl-ick {
+		omapctrl_ick: clock-omapctrl-ick@6 {
+			reg = <6>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "omapctrl_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <6>;
 		};
 
-		aes2_ick: clock-aes2-ick {
+		aes2_ick: clock-aes2-ick@28 {
+			reg = <28>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "aes2_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <28>;
 		};
 
-		sha12_ick: clock-sha12-ick {
+		sha12_ick: clock-sha12-ick@27 {
+			reg = <27>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "sha12_ick";
 			clocks = <&core_l4_ick>;
-			ti,bit-shift = <27>;
 		};
 	};
 
@@ -1136,30 +1152,31 @@
 		compatible = "ti,clksel";
 		reg = <0xc00>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		gpt1_gate_fck: clock-gpt1-gate-fck {
+		gpt1_gate_fck: clock-gpt1-gate-fck@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-gate-clock";
 			clock-output-names = "gpt1_gate_fck";
 			clocks = <&sys_ck>;
-			ti,bit-shift = <0>;
 		};
 
-		gpio1_dbck: clock-gpio1-dbck {
+		gpio1_dbck: clock-gpio1-dbck@3 {
+			reg = <3>;
 			#clock-cells = <0>;
 			compatible = "ti,gate-clock";
 			clock-output-names = "gpio1_dbck";
 			clocks = <&wkup_32k_fck>;
-			ti,bit-shift = <3>;
 		};
 
-		wdt2_fck: clock-wdt2-fck {
+		wdt2_fck: clock-wdt2-fck@5 {
+			reg = <5>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "wdt2_fck";
 			clocks = <&wkup_32k_fck>;
-			ti,bit-shift = <5>;
 		};
 	};
 
@@ -1182,54 +1199,55 @@
 		compatible = "ti,clksel";
 		reg = <0xc10>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		wdt2_ick: clock-wdt2-ick {
+		wdt2_ick: clock-wdt2-ick@5 {
+			reg = <5>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "wdt2_ick";
 			clocks = <&wkup_l4_ick>;
-			ti,bit-shift = <5>;
 		};
 
-		wdt1_ick: clock-wdt1-ick {
+		wdt1_ick: clock-wdt1-ick@4 {
+			reg = <4>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "wdt1_ick";
 			clocks = <&wkup_l4_ick>;
-			ti,bit-shift = <4>;
 		};
 
-		gpio1_ick: clock-gpio1-ick {
+		gpio1_ick: clock-gpio1-ick@3 {
+			reg = <3>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "gpio1_ick";
 			clocks = <&wkup_l4_ick>;
-			ti,bit-shift = <3>;
 		};
 
-		omap_32ksync_ick: clock-omap-32ksync-ick {
+		omap_32ksync_ick: clock-omap-32ksync-ick@2 {
+			reg = <2>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "omap_32ksync_ick";
 			clocks = <&wkup_l4_ick>;
-			ti,bit-shift = <2>;
 		};
 
-		gpt12_ick: clock-gpt12-ick {
+		gpt12_ick: clock-gpt12-ick@1 {
+			reg = <1>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "gpt12_ick";
 			clocks = <&wkup_l4_ick>;
-			ti,bit-shift = <1>;
 		};
 
-		gpt1_ick: clock-gpt1-ick {
+		gpt1_ick: clock-gpt1-ick@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "gpt1_ick";
 			clocks = <&wkup_l4_ick>;
-			ti,bit-shift = <0>;
 		};
 	};
 
@@ -1254,150 +1272,151 @@
 		compatible = "ti,clksel";
 		reg = <0x1000>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		uart3_fck: clock-uart3-fck {
+		uart3_fck: clock-uart3-fck@11 {
+			reg = <11>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "uart3_fck";
 			clocks = <&per_48m_fck>;
-			ti,bit-shift = <11>;
 		};
 
-		gpt2_gate_fck: clock-gpt2-gate-fck {
+		gpt2_gate_fck: clock-gpt2-gate-fck@3 {
+			reg = <3>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-gate-clock";
 			clock-output-names = "gpt2_gate_fck";
 			clocks = <&sys_ck>;
-			ti,bit-shift = <3>;
 		};
 
-		gpt3_gate_fck: clock-gpt3-gate-fck {
+		gpt3_gate_fck: clock-gpt3-gate-fck@4 {
+			reg = <4>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-gate-clock";
 			clock-output-names = "gpt3_gate_fck";
 			clocks = <&sys_ck>;
-			ti,bit-shift = <4>;
 		};
 
-		gpt4_gate_fck: clock-gpt4-gate-fck {
+		gpt4_gate_fck: clock-gpt4-gate-fck@5 {
+			reg = <5>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-gate-clock";
 			clock-output-names = "gpt4_gate_fck";
 			clocks = <&sys_ck>;
-			ti,bit-shift = <5>;
 		};
 
-		gpt5_gate_fck: clock-gpt5-gate-fck {
+		gpt5_gate_fck: clock-gpt5-gate-fck@6 {
+			reg = <6>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-gate-clock";
 			clock-output-names = "gpt5_gate_fck";
 			clocks = <&sys_ck>;
-			ti,bit-shift = <6>;
 		};
 
-		gpt6_gate_fck: clock-gpt6-gate-fck {
+		gpt6_gate_fck: clock-gpt6-gate-fck@7 {
+			reg = <7>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-gate-clock";
 			clock-output-names = "gpt6_gate_fck";
 			clocks = <&sys_ck>;
-			ti,bit-shift = <7>;
 		};
 
-		gpt7_gate_fck: clock-gpt7-gate-fck {
+		gpt7_gate_fck: clock-gpt7-gate-fck@8 {
+			reg = <8>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-gate-clock";
 			clock-output-names = "gpt7_gate_fck";
 			clocks = <&sys_ck>;
-			ti,bit-shift = <8>;
 		};
 
-		gpt8_gate_fck: clock-gpt8-gate-fck {
+		gpt8_gate_fck: clock-gpt8-gate-fck@9 {
+			reg = <9>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-gate-clock";
 			clock-output-names = "gpt8_gate_fck";
 			clocks = <&sys_ck>;
-			ti,bit-shift = <9>;
 		};
 
-		gpt9_gate_fck: clock-gpt9-gate-fck {
+		gpt9_gate_fck: clock-gpt9-gate-fck@10 {
+			reg = <10>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-gate-clock";
 			clock-output-names = "gpt9_gate_fck";
 			clocks = <&sys_ck>;
-			ti,bit-shift = <10>;
 		};
 
-		gpio6_dbck: clock-gpio6-dbck {
+		gpio6_dbck: clock-gpio6-dbck@17 {
+			reg = <17>;
 			#clock-cells = <0>;
 			compatible = "ti,gate-clock";
 			clock-output-names = "gpio6_dbck";
 			clocks = <&per_32k_alwon_fck>;
-			ti,bit-shift = <17>;
 		};
 
-		gpio5_dbck: clock-gpio5-dbck {
+		gpio5_dbck: clock-gpio5-dbck@16 {
+			reg = <16>;
 			#clock-cells = <0>;
 			compatible = "ti,gate-clock";
 			clock-output-names = "gpio5_dbck";
 			clocks = <&per_32k_alwon_fck>;
-			ti,bit-shift = <16>;
 		};
 
-		gpio4_dbck: clock-gpio4-dbck {
+		gpio4_dbck: clock-gpio4-dbck@15 {
+			reg = <15>;
 			#clock-cells = <0>;
 			compatible = "ti,gate-clock";
 			clock-output-names = "gpio4_dbck";
 			clocks = <&per_32k_alwon_fck>;
-			ti,bit-shift = <15>;
 		};
 
-		gpio3_dbck: clock-gpio3-dbck {
+		gpio3_dbck: clock-gpio3-dbck@14 {
+			reg = <14>;
 			#clock-cells = <0>;
 			compatible = "ti,gate-clock";
 			clock-output-names = "gpio3_dbck";
 			clocks = <&per_32k_alwon_fck>;
-			ti,bit-shift = <14>;
 		};
 
-		gpio2_dbck: clock-gpio2-dbck {
+		gpio2_dbck: clock-gpio2-dbck@13 {
+			reg = <13>;
 			#clock-cells = <0>;
 			compatible = "ti,gate-clock";
 			clock-output-names = "gpio2_dbck";
 			clocks = <&per_32k_alwon_fck>;
-			ti,bit-shift = <13>;
 		};
 
-		wdt3_fck: clock-wdt3-fck {
+		wdt3_fck: clock-wdt3-fck@12 {
+			reg = <12>;
 			#clock-cells = <0>;
 			compatible = "ti,wait-gate-clock";
 			clock-output-names = "wdt3_fck";
 			clocks = <&per_32k_alwon_fck>;
-			ti,bit-shift = <12>;
 		};
 
-		mcbsp2_gate_fck: clock-mcbsp2-gate-fck {
+		mcbsp2_gate_fck: clock-mcbsp2-gate-fck@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-gate-clock";
 			clock-output-names = "mcbsp2_gate_fck";
 			clocks = <&mcbsp_clks>;
-			ti,bit-shift = <0>;
 		};
 
-		mcbsp3_gate_fck: clock-mcbsp3-gate-fck {
+		mcbsp3_gate_fck: clock-mcbsp3-gate-fck@1 {
+			reg = <1>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-gate-clock";
 			clock-output-names = "mcbsp3_gate_fck";
 			clocks = <&mcbsp_clks>;
-			ti,bit-shift = <1>;
 		};
 
-		mcbsp4_gate_fck: clock-mcbsp4-gate-fck {
+		mcbsp4_gate_fck: clock-mcbsp4-gate-fck@2 {
+			reg = <2>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-gate-clock";
 			clock-output-names = "mcbsp4_gate_fck";
 			clocks = <&mcbsp_clks>;
-			ti,bit-shift = <2>;
 		};
 	};
 
@@ -1406,69 +1425,71 @@
 		compatible = "ti,clksel";
 		reg = <0x1040>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		gpt2_mux_fck: clock-gpt2-mux-fck {
+		gpt2_mux_fck: clock-gpt2-mux-fck@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-mux-clock";
 			clock-output-names = "gpt2_mux_fck";
 			clocks = <&omap_32k_fck>, <&sys_ck>;
 		};
 
-		gpt3_mux_fck: clock-gpt3-mux-fck {
+		gpt3_mux_fck: clock-gpt3-mux-fck@1 {
+			reg = <1>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-mux-clock";
 			clock-output-names = "gpt3_mux_fck";
 			clocks = <&omap_32k_fck>, <&sys_ck>;
-			ti,bit-shift = <1>;
 		};
 
-		gpt4_mux_fck: clock-gpt4-mux-fck {
+		gpt4_mux_fck: clock-gpt4-mux-fck@2 {
+			reg = <2>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-mux-clock";
 			clock-output-names = "gpt4_mux_fck";
 			clocks = <&omap_32k_fck>, <&sys_ck>;
-			ti,bit-shift = <2>;
 		};
 
-		gpt5_mux_fck: clock-gpt5-mux-fck {
+		gpt5_mux_fck: clock-gpt5-mux-fck@3 {
+			reg = <3>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-mux-clock";
 			clock-output-names = "gpt5_mux_fck";
 			clocks = <&omap_32k_fck>, <&sys_ck>;
-			ti,bit-shift = <3>;
 		};
 
-		gpt6_mux_fck: clock-gpt6-mux-fck {
+		gpt6_mux_fck: clock-gpt6-mux-fck@4 {
+			reg = <4>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-mux-clock";
 			clock-output-names = "gpt6_mux_fck";
 			clocks = <&omap_32k_fck>, <&sys_ck>;
-			ti,bit-shift = <4>;
 		};
 
-		gpt7_mux_fck: clock-gpt7-mux-fck {
+		gpt7_mux_fck: clock-gpt7-mux-fck@5 {
+			reg = <5>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-mux-clock";
 			clock-output-names = "gpt7_mux_fck";
 			clocks = <&omap_32k_fck>, <&sys_ck>;
-			ti,bit-shift = <5>;
 		};
 
-		gpt8_mux_fck: clock-gpt8-mux-fck {
+		gpt8_mux_fck: clock-gpt8-mux-fck@6 {
+			reg = <6>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-mux-clock";
 			clock-output-names = "gpt8_mux_fck";
 			clocks = <&omap_32k_fck>, <&sys_ck>;
-			ti,bit-shift = <6>;
 		};
 
-		gpt9_mux_fck: clock-gpt9-mux-fck {
+		gpt9_mux_fck: clock-gpt9-mux-fck@7 {
+			reg = <7>;
 			#clock-cells = <0>;
 			compatible = "ti,composite-mux-clock";
 			clock-output-names = "gpt9_mux_fck";
 			clocks = <&omap_32k_fck>, <&sys_ck>;
-			ti,bit-shift = <7>;
 		};
 	};
 
@@ -1541,158 +1562,159 @@
 		compatible = "ti,clksel";
 		reg = <0x1010>;
 		#clock-cells = <2>;
-		#address-cells = <0>;
+		#address-cells = <1>;
+		#size-cells = <0>;
 
-		gpio6_ick: clock-gpio6-ick {
+		gpio6_ick: clock-gpio6-ick@17 {
+			reg = <17>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "gpio6_ick";
 			clocks = <&per_l4_ick>;
-			ti,bit-shift = <17>;
 		};
 
-		gpio5_ick: clock-gpio5-ick {
+		gpio5_ick: clock-gpio5-ick@16 {
+			reg = <16>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "gpio5_ick";
 			clocks = <&per_l4_ick>;
-			ti,bit-shift = <16>;
 		};
 
-		gpio4_ick: clock-gpio4-ick {
+		gpio4_ick: clock-gpio4-ick@15 {
+			reg = <15>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "gpio4_ick";
 			clocks = <&per_l4_ick>;
-			ti,bit-shift = <15>;
 		};
 
-		gpio3_ick: clock-gpio3-ick {
+		gpio3_ick: clock-gpio3-ick@14 {
+			reg = <14>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "gpio3_ick";
 			clocks = <&per_l4_ick>;
-			ti,bit-shift = <14>;
 		};
 
-		gpio2_ick: clock-gpio2-ick {
+		gpio2_ick: clock-gpio2-ick@13 {
+			reg = <13>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "gpio2_ick";
 			clocks = <&per_l4_ick>;
-			ti,bit-shift = <13>;
 		};
 
-		wdt3_ick: clock-wdt3-ick {
+		wdt3_ick: clock-wdt3-ick@12 {
+			reg = <12>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "wdt3_ick";
 			clocks = <&per_l4_ick>;
-			ti,bit-shift = <12>;
 		};
 
-		uart3_ick: clock-uart3-ick {
+		uart3_ick: clock-uart3-ick@11 {
+			reg = <11>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "uart3_ick";
 			clocks = <&per_l4_ick>;
-			ti,bit-shift = <11>;
 		};
 
-		uart4_ick: clock-uart4-ick {
+		uart4_ick: clock-uart4-ick@18 {
+			reg = <18>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "uart4_ick";
 			clocks = <&per_l4_ick>;
-			ti,bit-shift = <18>;
 		};
 
-		gpt9_ick: clock-gpt9-ick {
+		gpt9_ick: clock-gpt9-ick@10 {
+			reg = <10>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "gpt9_ick";
 			clocks = <&per_l4_ick>;
-			ti,bit-shift = <10>;
 		};
 
-		gpt8_ick: clock-gpt8-ick {
+		gpt8_ick: clock-gpt8-ick@9 {
+			reg = <9>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "gpt8_ick";
 			clocks = <&per_l4_ick>;
-			ti,bit-shift = <9>;
 		};
 
-		gpt7_ick: clock-gpt7-ick {
+		gpt7_ick: clock-gpt7-ick@8 {
+			reg = <8>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "gpt7_ick";
 			clocks = <&per_l4_ick>;
-			ti,bit-shift = <8>;
 		};
 
-		gpt6_ick: clock-gpt6-ick {
+		gpt6_ick: clock-gpt6-ick@7 {
+			reg = <7>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "gpt6_ick";
 			clocks = <&per_l4_ick>;
-			ti,bit-shift = <7>;
 		};
 
-		gpt5_ick: clock-gpt5-ick {
+		gpt5_ick: clock-gpt5-ick@6 {
+			reg = <6>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "gpt5_ick";
 			clocks = <&per_l4_ick>;
-			ti,bit-shift = <6>;
 		};
 
-		gpt4_ick: clock-gpt4-ick {
+		gpt4_ick: clock-gpt4-ick@5 {
+			reg = <5>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "gpt4_ick";
 			clocks = <&per_l4_ick>;
-			ti,bit-shift = <5>;
 		};
 
-		gpt3_ick: clock-gpt3-ick {
+		gpt3_ick: clock-gpt3-ick@4 {
+			reg = <4>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "gpt3_ick";
 			clocks = <&per_l4_ick>;
-			ti,bit-shift = <4>;
 		};
 
-		gpt2_ick: clock-gpt2-ick {
+		gpt2_ick: clock-gpt2-ick@3 {
+			reg = <3>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "gpt2_ick";
 			clocks = <&per_l4_ick>;
-			ti,bit-shift = <3>;
 		};
 
-		mcbsp2_ick: clock-mcbsp2-ick {
+		mcbsp2_ick: clock-mcbsp2-ick@0 {
+			reg = <0>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "mcbsp2_ick";
 			clocks = <&per_l4_ick>;
-			ti,bit-shift = <0>;
 		};
 
-		mcbsp3_ick: clock-mcbsp3-ick {
+		mcbsp3_ick: clock-mcbsp3-ick@1 {
+			reg = <1>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "mcbsp3_ick";
 			clocks = <&per_l4_ick>;
-			ti,bit-shift = <1>;
 		};
 
-		mcbsp4_ick: clock-mcbsp4-ick {
+		mcbsp4_ick: clock-mcbsp4-ick@2 {
+			reg = <2>;
 			#clock-cells = <0>;
 			compatible = "ti,omap3-interface-clock";
 			clock-output-names = "mcbsp4_ick";
 			clocks = <&per_l4_ick>;
-			ti,bit-shift = <2>;
 		};
 	};
 
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index 7f44e88d1f25..14a38cc67e0b 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -10,6 +10,7 @@
 #include <uapi/asm/ptrace.h>
 
 #ifndef __ASSEMBLY__
+#include <linux/bitfield.h>
 #include <linux/types.h>
 
 struct pt_regs {
@@ -35,8 +36,8 @@ struct svc_pt_regs {
 
 #ifndef CONFIG_CPU_V7M
 #define isa_mode(regs) \
-	((((regs)->ARM_cpsr & PSR_J_BIT) >> (__ffs(PSR_J_BIT) - 1)) | \
-	 (((regs)->ARM_cpsr & PSR_T_BIT) >> (__ffs(PSR_T_BIT))))
+	(FIELD_GET(PSR_J_BIT, (regs)->ARM_cpsr) << 1 | \
+	 FIELD_GET(PSR_T_BIT, (regs)->ARM_cpsr))
 #else
 #define isa_mode(regs) 1 /* Thumb */
 #endif
diff --git a/arch/arm/include/debug/brcmstb.S b/arch/arm/include/debug/brcmstb.S
index f6175e6e28cd..3f7d68740ed4 100644
--- a/arch/arm/include/debug/brcmstb.S
+++ b/arch/arm/include/debug/brcmstb.S
@@ -27,6 +27,7 @@
 #define UARTA_72165		UARTA_7278
 #define UARTA_7364		REG_PHYS_ADDR(0x40b000)
 #define UARTA_7366		UARTA_7364
+#define UARTA_74165		UARTA_7278
 #define UARTA_74371		REG_PHYS_ADDR(0x406b00)
 #define UARTA_7439		REG_PHYS_ADDR(0x40a900)
 #define UARTA_7445		REG_PHYS_ADDR(0x40ab00)
@@ -88,9 +89,10 @@ ARM_BE8(	rev	\rv, \rv )
 30:		checkuart(\rp, \rv, 0x72780000, 7278)
 31:		checkuart(\rp, \rv, 0x73640000, 7364)
 32:		checkuart(\rp, \rv, 0x73660000, 7366)
-33:		checkuart(\rp, \rv, 0x07437100, 74371)
-34:		checkuart(\rp, \rv, 0x74390000, 7439)
-35:		checkuart(\rp, \rv, 0x74450000, 7445)
+33:		checkuart(\rp, \rv, 0x07416500, 74165)
+34:		checkuart(\rp, \rv, 0x07437100, 74371)
+35:		checkuart(\rp, \rv, 0x74390000, 7439)
+36:		checkuart(\rp, \rv, 0x74450000, 7445)
 
 		/* No valid UART found */
 90:		mov	\rp, #0
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 6a9de826ffd3..89a77e3f51d2 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -76,8 +76,6 @@ obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 obj-$(CONFIG_CPU_XSCALE)	+= xscale-cp0.o
 obj-$(CONFIG_CPU_XSC3)		+= xscale-cp0.o
 obj-$(CONFIG_CPU_MOHAWK)	+= xscale-cp0.o
-obj-$(CONFIG_CPU_PJ4)		+= pj4-cp0.o
-obj-$(CONFIG_CPU_PJ4B)		+= pj4-cp0.o
 obj-$(CONFIG_IWMMXT)		+= iwmmxt.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_regs.o perf_callchain.o
 obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event_xscale.o perf_event_v6.o \
diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S
index a0218c4867b9..4a335d3c5969 100644
--- a/arch/arm/kernel/iwmmxt.S
+++ b/arch/arm/kernel/iwmmxt.S
@@ -18,18 +18,6 @@
 #include <asm/assembler.h>
 #include "iwmmxt.h"
 
-#if defined(CONFIG_CPU_PJ4) || defined(CONFIG_CPU_PJ4B)
-#define PJ4(code...)		code
-#define XSC(code...)
-#elif defined(CONFIG_CPU_MOHAWK) || \
-	defined(CONFIG_CPU_XSC3) || \
-	defined(CONFIG_CPU_XSCALE)
-#define PJ4(code...)
-#define XSC(code...)		code
-#else
-#error "Unsupported iWMMXt architecture"
-#endif
-
 #define MMX_WR0		 	(0x00)
 #define MMX_WR1		 	(0x08)
 #define MMX_WR2		 	(0x10)
@@ -81,17 +69,13 @@ ENDPROC(iwmmxt_undef_handler)
 ENTRY(iwmmxt_task_enable)
 	inc_preempt_count r10, r3
 
-	XSC(mrc	p15, 0, r2, c15, c1, 0)
-	PJ4(mrc p15, 0, r2, c1, c0, 2)
+	mrc	p15, 0, r2, c15, c1, 0
 	@ CP0 and CP1 accessible?
-	XSC(tst	r2, #0x3)
-	PJ4(tst	r2, #0xf)
+	tst	r2, #0x3
 	bne	4f				@ if so no business here
 	@ enable access to CP0 and CP1
-	XSC(orr	r2, r2, #0x3)
-	XSC(mcr	p15, 0, r2, c15, c1, 0)
-	PJ4(orr	r2, r2, #0xf)
-	PJ4(mcr	p15, 0, r2, c1, c0, 2)
+	orr	r2, r2, #0x3
+	mcr	p15, 0, r2, c15, c1, 0
 
 	ldr	r3, =concan_owner
 	ldr	r2, [r0, #S_PC]			@ current task pc value
@@ -218,12 +202,9 @@ ENTRY(iwmmxt_task_disable)
 	bne	1f				@ no: quit
 
 	@ enable access to CP0 and CP1
-	XSC(mrc	p15, 0, r4, c15, c1, 0)
-	XSC(orr	r4, r4, #0x3)
-	XSC(mcr	p15, 0, r4, c15, c1, 0)
-	PJ4(mrc p15, 0, r4, c1, c0, 2)
-	PJ4(orr	r4, r4, #0xf)
-	PJ4(mcr	p15, 0, r4, c1, c0, 2)
+	mrc	p15, 0, r4, c15, c1, 0
+	orr	r4, r4, #0x3
+	mcr	p15, 0, r4, c15, c1, 0
 
 	mov	r0, #0				@ nothing to load
 	str	r0, [r3]			@ no more current owner
@@ -232,10 +213,8 @@ ENTRY(iwmmxt_task_disable)
 	bl	concan_save
 
 	@ disable access to CP0 and CP1
-	XSC(bic	r4, r4, #0x3)
-	XSC(mcr	p15, 0, r4, c15, c1, 0)
-	PJ4(bic	r4, r4, #0xf)
-	PJ4(mcr	p15, 0, r4, c1, c0, 2)
+	bic	r4, r4, #0x3
+	mcr	p15, 0, r4, c15, c1, 0
 
 	mrc	p15, 0, r2, c2, c0, 0
 	mov	r2, r2				@ cpwait
@@ -330,11 +309,9 @@ ENDPROC(iwmmxt_task_restore)
  */
 ENTRY(iwmmxt_task_switch)
 
-	XSC(mrc	p15, 0, r1, c15, c1, 0)
-	PJ4(mrc	p15, 0, r1, c1, c0, 2)
+	mrc	p15, 0, r1, c15, c1, 0
 	@ CP0 and CP1 accessible?
-	XSC(tst	r1, #0x3)
-	PJ4(tst	r1, #0xf)
+	tst	r1, #0x3
 	bne	1f				@ yes: block them for next task
 
 	ldr	r2, =concan_owner
@@ -344,10 +321,8 @@ ENTRY(iwmmxt_task_switch)
 	retne	lr				@ no: leave Concan disabled
 
 1:	@ flip Concan access
-	XSC(eor	r1, r1, #0x3)
-	XSC(mcr	p15, 0, r1, c15, c1, 0)
-	PJ4(eor r1, r1, #0xf)
-	PJ4(mcr	p15, 0, r1, c1, c0, 2)
+	eor	r1, r1, #0x3
+	mcr	p15, 0, r1, c15, c1, 0
 
 	mrc	p15, 0, r1, c2, c0, 0
 	sub	pc, lr, r1, lsr #32		@ cpwait and return
diff --git a/arch/arm/kernel/pj4-cp0.c b/arch/arm/kernel/pj4-cp0.c
deleted file mode 100644
index 4bca8098c4ff..000000000000
--- a/arch/arm/kernel/pj4-cp0.c
+++ /dev/null
@@ -1,135 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/arm/kernel/pj4-cp0.c
- *
- * PJ4 iWMMXt coprocessor context switching and handling
- *
- * Copyright (c) 2010 Marvell International Inc.
- */
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <asm/thread_notify.h>
-#include <asm/cputype.h>
-
-static int iwmmxt_do(struct notifier_block *self, unsigned long cmd, void *t)
-{
-	struct thread_info *thread = t;
-
-	switch (cmd) {
-	case THREAD_NOTIFY_FLUSH:
-		/*
-		 * flush_thread() zeroes thread->fpstate, so no need
-		 * to do anything here.
-		 *
-		 * FALLTHROUGH: Ensure we don't try to overwrite our newly
-		 * initialised state information on the first fault.
-		 */
-
-	case THREAD_NOTIFY_EXIT:
-		iwmmxt_task_release(thread);
-		break;
-
-	case THREAD_NOTIFY_SWITCH:
-		iwmmxt_task_switch(thread);
-		break;
-	}
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block __maybe_unused iwmmxt_notifier_block = {
-	.notifier_call	= iwmmxt_do,
-};
-
-
-static u32 __init pj4_cp_access_read(void)
-{
-	u32 value;
-
-	__asm__ __volatile__ (
-		"mrc	p15, 0, %0, c1, c0, 2\n\t"
-		: "=r" (value));
-	return value;
-}
-
-static void __init pj4_cp_access_write(u32 value)
-{
-	u32 temp;
-
-	__asm__ __volatile__ (
-		"mcr	p15, 0, %1, c1, c0, 2\n\t"
-#ifdef CONFIG_THUMB2_KERNEL
-		"isb\n\t"
-#else
-		"mrc	p15, 0, %0, c1, c0, 2\n\t"
-		"mov	%0, %0\n\t"
-		"sub	pc, pc, #4\n\t"
-#endif
-		: "=r" (temp) : "r" (value));
-}
-
-static int __init pj4_get_iwmmxt_version(void)
-{
-	u32 cp_access, wcid;
-
-	cp_access = pj4_cp_access_read();
-	pj4_cp_access_write(cp_access | 0xf);
-
-	/* check if coprocessor 0 and 1 are available */
-	if ((pj4_cp_access_read() & 0xf) != 0xf) {
-		pj4_cp_access_write(cp_access);
-		return -ENODEV;
-	}
-
-	/* read iWMMXt coprocessor id register p1, c0 */
-	__asm__ __volatile__ ("mrc    p1, 0, %0, c0, c0, 0\n" : "=r" (wcid));
-
-	pj4_cp_access_write(cp_access);
-
-	/* iWMMXt v1 */
-	if ((wcid & 0xffffff00) == 0x56051000)
-		return 1;
-	/* iWMMXt v2 */
-	if ((wcid & 0xffffff00) == 0x56052000)
-		return 2;
-
-	return -EINVAL;
-}
-
-/*
- * Disable CP0/CP1 on boot, and let call_fpe() and the iWMMXt lazy
- * switch code handle iWMMXt context switching.
- */
-static int __init pj4_cp0_init(void)
-{
-	u32 __maybe_unused cp_access;
-	int vers;
-
-	if (!cpu_is_pj4())
-		return 0;
-
-	vers = pj4_get_iwmmxt_version();
-	if (vers < 0)
-		return 0;
-
-#ifndef CONFIG_IWMMXT
-	pr_info("PJ4 iWMMXt coprocessor detected, but kernel support is missing.\n");
-#else
-	cp_access = pj4_cp_access_read() & ~0xf;
-	pj4_cp_access_write(cp_access);
-
-	pr_info("PJ4 iWMMXt v%d coprocessor enabled.\n", vers);
-	elf_hwcap |= HWCAP_IWMMXT;
-	thread_register_notifier(&iwmmxt_notifier_block);
-	register_iwmmxt_undef_handler();
-#endif
-
-	return 0;
-}
-
-late_initcall(pj4_cp0_init);
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 3bad79db5d6e..72c82a4d63ac 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -220,7 +220,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
 	unsigned int fp, mode;
 	int ok = 1;
 
-	printk("%sBacktrace: ", loglvl);
+	printk("%sCall trace: ", loglvl);
 
 	if (!tsk)
 		tsk = current;
diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index 9d2192156087..f60547dadc93 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -524,6 +524,8 @@ void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk,
 {
 	struct stackframe frame;
 
+	printk("%sCall trace: ", loglvl);
+
 	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
 
 	if (!tsk)
diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig
index 8789d93a7c04..7318d8789e24 100644
--- a/arch/arm/mach-bcm/Kconfig
+++ b/arch/arm/mach-bcm/Kconfig
@@ -93,7 +93,6 @@ config ARCH_BCM_MOBILE
 	select ARM_ERRATA_775420
 	select ARM_GIC
 	select GPIO_BCM_KONA
-	select TICK_ONESHOT
 	select HAVE_ARM_ARCH_TIMER
 	select PINCTRL
 	select ARCH_BCM_MOBILE_SMP if SMP
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 07565b593ed6..439dc6a26bb9 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -25,6 +25,13 @@
 
 #include "fault.h"
 
+bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
+{
+	unsigned long addr = (unsigned long)unsafe_src;
+
+	return addr >= TASK_SIZE && ULONG_MAX - addr >= size;
+}
+
 #ifdef CONFIG_MMU
 
 /*
@@ -588,6 +595,7 @@ do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs)
 	if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs))
 		return;
 
+	pr_alert("8<--- cut here ---\n");
 	pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
 		inf->name, ifsr, addr);
 
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index d19d140a10c7..0749cf8a6637 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -296,6 +296,9 @@ void __sync_icache_dcache(pte_t pteval)
 		return;
 
 	folio = page_folio(pfn_to_page(pfn));
+	if (folio_test_reserved(folio))
+		return;
+
 	if (cache_is_vipt_aliasing())
 		mapping = folio_flush_mapping(folio);
 	else
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 4c3d78691279..e8c6f4be0ce1 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -418,7 +418,7 @@ static void set_section_perms(struct section_perm *perms, int n, bool set,
 
 }
 
-/**
+/*
  * update_sections_early intended to be called only through stop_machine
  * framework and executed by only one CPU while all other CPUs will spin and
  * wait, so no locking is required in this function.
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 77e05d4959f2..7b11c98b3e84 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -120,6 +120,7 @@ config ARM64
 	select CLONE_BACKWARDS
 	select COMMON_CLK
 	select CPU_PM if (SUSPEND || CPU_IDLE)
+	select CPUMASK_OFFSTACK if NR_CPUS > 256
 	select CRC32
 	select DCACHE_WORD_ACCESS
 	select DYNAMIC_FTRACE if FUNCTION_TRACER
@@ -1425,7 +1426,7 @@ config SCHED_SMT
 config NR_CPUS
 	int "Maximum number of CPUs (2-4096)"
 	range 2 4096
-	default "256"
+	default "512"
 
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs"
diff --git a/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908-asus-gt-ac5300.dts b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908-asus-gt-ac5300.dts
index 52f928dbfa3c..2a0d4ee3bd79 100644
--- a/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908-asus-gt-ac5300.dts
+++ b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908-asus-gt-ac5300.dts
@@ -185,16 +185,17 @@
 		#size-cells = <1>;
 
 		partition@0 {
-			compatible = "nvmem-cells";
 			label = "cferom";
 			reg = <0x0 0x100000>;
 
-			#address-cells = <1>;
-			#size-cells = <1>;
-			ranges = <0 0x0 0x100000>;
+			nvmem-layout {
+				compatible = "fixed-layout";
+				#address-cells = <1>;
+				#size-cells = <1>;
 
-			base_mac_addr: mac@106a0 {
-				reg = <0x106a0 0x6>;
+				base_mac_addr: mac@106a0 {
+					reg = <0x106a0 0x6>;
+				};
 			};
 		};
 
diff --git a/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi
index 336016e334d9..e01cf4f54077 100644
--- a/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi
+++ b/arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi
@@ -227,9 +227,6 @@
 				brcm,num-gphy = <5>;
 				brcm,num-rgmii-ports = <2>;
 
-				#address-cells = <1>;
-				#size-cells = <0>;
-
 				ports: ports {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/qcom/pm6150.dtsi b/arch/arm64/boot/dts/qcom/pm6150.dtsi
index ddbaf7280b03..11158c2bd524 100644
--- a/arch/arm64/boot/dts/qcom/pm6150.dtsi
+++ b/arch/arm64/boot/dts/qcom/pm6150.dtsi
@@ -63,6 +63,52 @@
 			};
 		};
 
+		pm6150_vbus: usb-vbus-regulator@1100 {
+			compatible = "qcom,pm6150-vbus-reg,
+				      qcom,pm8150b-vbus-reg";
+			reg = <0x1100>;
+			status = "disabled";
+		};
+
+		pm6150_typec: typec@1500 {
+			compatible = "qcom,pm6150-typec,
+				      qcom,pm8150b-typec";
+			reg = <0x1500>, <0x1700>;
+			interrupts = <0x0 0x15 0x00 IRQ_TYPE_EDGE_RISING>,
+				     <0x0 0x15 0x01 IRQ_TYPE_EDGE_BOTH>,
+				     <0x0 0x15 0x02 IRQ_TYPE_EDGE_RISING>,
+				     <0x0 0x15 0x03 IRQ_TYPE_EDGE_BOTH>,
+				     <0x0 0x15 0x04 IRQ_TYPE_EDGE_RISING>,
+				     <0x0 0x15 0x05 IRQ_TYPE_EDGE_RISING>,
+				     <0x0 0x15 0x06 IRQ_TYPE_EDGE_BOTH>,
+				     <0x0 0x15 0x07 IRQ_TYPE_EDGE_RISING>,
+				     <0x0 0x17 0x00 IRQ_TYPE_EDGE_RISING>,
+				     <0x0 0x17 0x01 IRQ_TYPE_EDGE_RISING>,
+				     <0x0 0x17 0x02 IRQ_TYPE_EDGE_RISING>,
+				     <0x0 0x17 0x03 IRQ_TYPE_EDGE_RISING>,
+				     <0x0 0x17 0x04 IRQ_TYPE_EDGE_RISING>,
+				     <0x0 0x17 0x05 IRQ_TYPE_EDGE_RISING>,
+				     <0x0 0x17 0x06 IRQ_TYPE_EDGE_RISING>,
+				     <0x0 0x17 0x07 IRQ_TYPE_EDGE_RISING>;
+			interrupt-names = "or-rid-detect-change",
+					  "vpd-detect",
+					  "cc-state-change",
+					  "vconn-oc",
+					  "vbus-change",
+					  "attach-detach",
+					  "legacy-cable-detect",
+					  "try-snk-src-detect",
+					  "sig-tx",
+					  "sig-rx",
+					  "msg-tx",
+					  "msg-rx",
+					  "msg-tx-failed",
+					  "msg-tx-discarded",
+					  "msg-rx-discarded",
+					  "fr-swap";
+			status = "disabled";
+		};
+
 		pm6150_temp: temp-alarm@2400 {
 			compatible = "qcom,spmi-temp-alarm";
 			reg = <0x2400>;
diff --git a/arch/arm64/hyperv/hv_core.c b/arch/arm64/hyperv/hv_core.c
index b54c34793701..f1ebc025e1df 100644
--- a/arch/arm64/hyperv/hv_core.c
+++ b/arch/arm64/hyperv/hv_core.c
@@ -160,22 +160,22 @@ void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die)
 		return;
 	panic_reported = true;
 
-	guest_id = hv_get_vpreg(HV_REGISTER_GUEST_OSID);
+	guest_id = hv_get_vpreg(HV_REGISTER_GUEST_OS_ID);
 
 	/*
 	 * Hyper-V provides the ability to store only 5 values.
 	 * Pick the passed in error value, the guest_id, the PC,
 	 * and the SP.
 	 */
-	hv_set_vpreg(HV_REGISTER_CRASH_P0, err);
-	hv_set_vpreg(HV_REGISTER_CRASH_P1, guest_id);
-	hv_set_vpreg(HV_REGISTER_CRASH_P2, regs->pc);
-	hv_set_vpreg(HV_REGISTER_CRASH_P3, regs->sp);
-	hv_set_vpreg(HV_REGISTER_CRASH_P4, 0);
+	hv_set_vpreg(HV_REGISTER_GUEST_CRASH_P0, err);
+	hv_set_vpreg(HV_REGISTER_GUEST_CRASH_P1, guest_id);
+	hv_set_vpreg(HV_REGISTER_GUEST_CRASH_P2, regs->pc);
+	hv_set_vpreg(HV_REGISTER_GUEST_CRASH_P3, regs->sp);
+	hv_set_vpreg(HV_REGISTER_GUEST_CRASH_P4, 0);
 
 	/*
 	 * Let Hyper-V know there is crash data available
 	 */
-	hv_set_vpreg(HV_REGISTER_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY);
+	hv_set_vpreg(HV_REGISTER_GUEST_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY);
 }
 EXPORT_SYMBOL_GPL(hyperv_report_panic);
diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c
index f1b8a04ee9f2..b1a4de4eee29 100644
--- a/arch/arm64/hyperv/mshyperv.c
+++ b/arch/arm64/hyperv/mshyperv.c
@@ -19,10 +19,17 @@
 
 static bool hyperv_initialized;
 
+int hv_get_hypervisor_version(union hv_hypervisor_version_info *info)
+{
+	hv_get_vpreg_128(HV_REGISTER_HYPERVISOR_VERSION,
+			 (struct hv_get_vp_registers_output *)info);
+
+	return 0;
+}
+
 static int __init hyperv_init(void)
 {
 	struct hv_get_vp_registers_output	result;
-	u32	a, b, c, d;
 	u64	guest_id;
 	int	ret;
 
@@ -39,7 +46,7 @@ static int __init hyperv_init(void)
 
 	/* Setup the guest ID */
 	guest_id = hv_generate_guest_id(LINUX_VERSION_CODE);
-	hv_set_vpreg(HV_REGISTER_GUEST_OSID, guest_id);
+	hv_set_vpreg(HV_REGISTER_GUEST_OS_ID, guest_id);
 
 	/* Get the features and hints from Hyper-V */
 	hv_get_vpreg_128(HV_REGISTER_FEATURES, &result);
@@ -54,15 +61,6 @@ static int __init hyperv_init(void)
 		ms_hyperv.features, ms_hyperv.priv_high, ms_hyperv.hints,
 		ms_hyperv.misc_features);
 
-	/* Get information about the Hyper-V host version */
-	hv_get_vpreg_128(HV_REGISTER_HYPERVISOR_VERSION, &result);
-	a = result.as32.a;
-	b = result.as32.b;
-	c = result.as32.c;
-	d = result.as32.d;
-	pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n",
-		b >> 16, b & 0xFFFF, a,	d & 0xFFFFFF, c, d >> 24);
-
 	ret = hv_common_init();
 	if (ret)
 		return ret;
@@ -74,6 +72,8 @@ static int __init hyperv_init(void)
 		return ret;
 	}
 
+	ms_hyperv_late_init();
+
 	hyperv_initialized = true;
 	return 0;
 }
diff --git a/arch/arm64/include/asm/hyperv-tlfs.h b/arch/arm64/include/asm/hyperv-tlfs.h
index bc6c7ac934a1..bc30aadedfe9 100644
--- a/arch/arm64/include/asm/hyperv-tlfs.h
+++ b/arch/arm64/include/asm/hyperv-tlfs.h
@@ -22,14 +22,6 @@
  */
 
 /*
- * These Hyper-V registers provide information equivalent to the CPUID
- * instruction on x86/x64.
- */
-#define HV_REGISTER_HYPERVISOR_VERSION		0x00000100 /*CPUID 0x40000002 */
-#define HV_REGISTER_FEATURES			0x00000200 /*CPUID 0x40000003 */
-#define HV_REGISTER_ENLIGHTENMENTS		0x00000201 /*CPUID 0x40000004 */
-
-/*
  * Group C Features. See the asm-generic version of hyperv-tlfs.h
  * for a description of Feature Groups.
  */
@@ -41,28 +33,29 @@
 #define HV_STIMER_DIRECT_MODE_AVAILABLE		BIT(13)
 
 /*
- * Synthetic register definitions equivalent to MSRs on x86/x64
+ * To support arch-generic code calling hv_set/get_register:
+ * - On x86, HV_MSR_ indicates an MSR accessed via rdmsrl/wrmsrl
+ * - On ARM, HV_MSR_ indicates a VP register accessed via hypercall
  */
-#define HV_REGISTER_CRASH_P0		0x00000210
-#define HV_REGISTER_CRASH_P1		0x00000211
-#define HV_REGISTER_CRASH_P2		0x00000212
-#define HV_REGISTER_CRASH_P3		0x00000213
-#define HV_REGISTER_CRASH_P4		0x00000214
-#define HV_REGISTER_CRASH_CTL		0x00000215
+#define HV_MSR_CRASH_P0		(HV_REGISTER_GUEST_CRASH_P0)
+#define HV_MSR_CRASH_P1		(HV_REGISTER_GUEST_CRASH_P1)
+#define HV_MSR_CRASH_P2		(HV_REGISTER_GUEST_CRASH_P2)
+#define HV_MSR_CRASH_P3		(HV_REGISTER_GUEST_CRASH_P3)
+#define HV_MSR_CRASH_P4		(HV_REGISTER_GUEST_CRASH_P4)
+#define HV_MSR_CRASH_CTL	(HV_REGISTER_GUEST_CRASH_CTL)
 
-#define HV_REGISTER_GUEST_OSID		0x00090002
-#define HV_REGISTER_VP_INDEX		0x00090003
-#define HV_REGISTER_TIME_REF_COUNT	0x00090004
-#define HV_REGISTER_REFERENCE_TSC	0x00090017
+#define HV_MSR_VP_INDEX		(HV_REGISTER_VP_INDEX)
+#define HV_MSR_TIME_REF_COUNT	(HV_REGISTER_TIME_REF_COUNT)
+#define HV_MSR_REFERENCE_TSC	(HV_REGISTER_REFERENCE_TSC)
 
-#define HV_REGISTER_SINT0		0x000A0000
-#define HV_REGISTER_SCONTROL		0x000A0010
-#define HV_REGISTER_SIEFP		0x000A0012
-#define HV_REGISTER_SIMP		0x000A0013
-#define HV_REGISTER_EOM			0x000A0014
+#define HV_MSR_SINT0		(HV_REGISTER_SINT0)
+#define HV_MSR_SCONTROL		(HV_REGISTER_SCONTROL)
+#define HV_MSR_SIEFP		(HV_REGISTER_SIEFP)
+#define HV_MSR_SIMP		(HV_REGISTER_SIMP)
+#define HV_MSR_EOM		(HV_REGISTER_EOM)
 
-#define HV_REGISTER_STIMER0_CONFIG	0x000B0000
-#define HV_REGISTER_STIMER0_COUNT	0x000B0001
+#define HV_MSR_STIMER0_CONFIG	(HV_REGISTER_STIMER0_CONFIG)
+#define HV_MSR_STIMER0_COUNT	(HV_REGISTER_STIMER0_COUNT)
 
 union hv_msi_entry {
 	u64 as_uint64[2];
diff --git a/arch/arm64/include/asm/mshyperv.h b/arch/arm64/include/asm/mshyperv.h
index 20070a847304..a975e1a689dd 100644
--- a/arch/arm64/include/asm/mshyperv.h
+++ b/arch/arm64/include/asm/mshyperv.h
@@ -31,12 +31,12 @@ void hv_set_vpreg(u32 reg, u64 value);
 u64 hv_get_vpreg(u32 reg);
 void hv_get_vpreg_128(u32 reg, struct hv_get_vp_registers_output *result);
 
-static inline void hv_set_register(unsigned int reg, u64 value)
+static inline void hv_set_msr(unsigned int reg, u64 value)
 {
 	hv_set_vpreg(reg, value);
 }
 
-static inline u64 hv_get_register(unsigned int reg)
+static inline u64 hv_get_msr(unsigned int reg)
 {
 	return hv_get_vpreg(reg);
 }
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index ce08b744aaab..06234c3a15f3 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -291,6 +291,21 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
 	blr	x2
 0:
 	mov_q	x0, HCR_HOST_NVHE_FLAGS
+
+	/*
+	 * Compliant CPUs advertise their VHE-onlyness with
+	 * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be
+	 * RES1 in that case. Publish the E2H bit early so that
+	 * it can be picked up by the init_el2_state macro.
+	 *
+	 * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but
+	 * don't advertise it (they predate this relaxation).
+	 */
+	mrs_s	x1, SYS_ID_AA64MMFR4_EL1
+	tbz	x1, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f
+
+	orr	x0, x0, #HCR_E2H
+1:
 	msr	hcr_el2, x0
 	isb
 
@@ -303,22 +318,10 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
 
 	mov_q	x1, INIT_SCTLR_EL1_MMU_OFF
 
-	/*
-	 * Compliant CPUs advertise their VHE-onlyness with
-	 * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be
-	 * RES1 in that case.
-	 *
-	 * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, but
-	 * don't advertise it (they predate this relaxation).
-	 */
-	mrs_s	x0, SYS_ID_AA64MMFR4_EL1
-	ubfx	x0, x0, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH
-	tbnz	x0, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f
-
 	mrs	x0, hcr_el2
 	and	x0, x0, #HCR_E2H
 	cbz	x0, 2f
-1:
+
 	/* Set a sane SCTLR_EL1, the VHE way */
 	pre_disable_mmu_workaround
 	msr_s	SYS_SCTLR_EL12, x1
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 3dee5490eea9..c4a0a35e02c7 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -2597,14 +2597,11 @@ static __init int kvm_arm_init(void)
 	if (err)
 		goto out_hyp;
 
-	if (is_protected_kvm_enabled()) {
-		kvm_info("Protected nVHE mode initialized successfully\n");
-	} else if (in_hyp_mode) {
-		kvm_info("VHE mode initialized successfully\n");
-	} else {
-		char mode = cpus_have_final_cap(ARM64_KVM_HVHE) ? 'h' : 'n';
-		kvm_info("Hyp mode (%cVHE) initialized successfully\n", mode);
-	}
+	kvm_info("%s%sVHE mode initialized successfully\n",
+		 in_hyp_mode ? "" : (is_protected_kvm_enabled() ?
+				     "Protected " : "Hyp "),
+		 in_hyp_mode ? "" : (cpus_have_final_cap(ARM64_KVM_HVHE) ?
+				     "h" : "n"));
 
 	/*
 	 * FIXME: Do something reasonable if kvm_init() fails after pKVM
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index a60fb13e2192..2fc68da4036d 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -154,7 +154,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
 	/* Switch to requested VMID */
 	__tlb_switch_to_guest(mmu, &cxt, false);
 
-	__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+	__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
+				TLBI_TTL_UNKNOWN);
 
 	dsb(ish);
 	__tlbi(vmalle1is);
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 3fae5830f8d2..5a59ef88b646 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -528,7 +528,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
 
 		kvm_clear_pte(ctx->ptep);
 		dsb(ishst);
-		__tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level);
+		__tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), TLBI_TTL_UNKNOWN);
 	} else {
 		if (ctx->end - ctx->addr < granule)
 			return -EINVAL;
@@ -843,12 +843,15 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
 		 * Perform the appropriate TLB invalidation based on the
 		 * evicted pte value (if any).
 		 */
-		if (kvm_pte_table(ctx->old, ctx->level))
-			kvm_tlb_flush_vmid_range(mmu, ctx->addr,
-						kvm_granule_size(ctx->level));
-		else if (kvm_pte_valid(ctx->old))
+		if (kvm_pte_table(ctx->old, ctx->level)) {
+			u64 size = kvm_granule_size(ctx->level);
+			u64 addr = ALIGN_DOWN(ctx->addr, size);
+
+			kvm_tlb_flush_vmid_range(mmu, addr, size);
+		} else if (kvm_pte_valid(ctx->old)) {
 			kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
 				     ctx->addr, ctx->level);
+		}
 	}
 
 	if (stage2_pte_is_counted(ctx->old))
@@ -896,9 +899,13 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
 	if (kvm_pte_valid(ctx->old)) {
 		kvm_clear_pte(ctx->ptep);
 
-		if (!stage2_unmap_defer_tlb_flush(pgt))
-			kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
-					ctx->addr, ctx->level);
+		if (kvm_pte_table(ctx->old, ctx->level)) {
+			kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr,
+				     TLBI_TTL_UNKNOWN);
+		} else if (!stage2_unmap_defer_tlb_flush(pgt)) {
+			kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr,
+				     ctx->level);
+		}
 	}
 
 	mm_ops->put_page(ctx->ptep);
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index b32e2940df7d..1a60b95381e8 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -171,7 +171,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
 	/* Switch to requested VMID */
 	__tlb_switch_to_guest(mmu, &cxt);
 
-	__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+	__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
+				TLBI_TTL_UNKNOWN);
 
 	dsb(ish);
 	__tlbi(vmalle1is);
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 18680771cdb0..dc04bc767865 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1637,7 +1637,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 	fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
 	is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
 
-	if (esr_fsc_is_permission_fault(esr)) {
+	if (esr_fsc_is_translation_fault(esr)) {
 		/* Beyond sanitised PARange (which is the IPA limit) */
 		if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
 			kvm_inject_size_fault(vcpu);
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 1414052e7d6b..e233b5efa276 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -7,11 +7,13 @@ config HEXAGON
 	select ARCH_32BIT_OFF_T
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_NO_PREEMPT
+	select ARCH_WANT_FRAME_POINTERS
 	select DMA_GLOBAL_POOL
 	select HAVE_PAGE_SIZE_4KB
 	select HAVE_PAGE_SIZE_16KB
 	select HAVE_PAGE_SIZE_64KB
 	select HAVE_PAGE_SIZE_256KB
+	select FRAME_POINTER
 	# Other pending projects/to-do items.
 	# select HAVE_REGS_AND_STACK_ACCESS_API
 	# select HAVE_HW_BREAKPOINT if PERF_EVENTS
@@ -23,6 +25,7 @@ config HEXAGON
 	select HAVE_PERF_EVENTS
 	# GENERIC_ALLOCATOR is used by dma_alloc_coherent()
 	select GENERIC_ALLOCATOR
+	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_TRACEHOOK
@@ -47,9 +50,6 @@ config HEXAGON_PHYS_OFFSET
 	help
 	  Platforms that don't load the kernel at zero set this.
 
-config FRAME_POINTER
-	def_bool y
-
 config LOCKDEP_SUPPORT
 	def_bool y
 
@@ -62,12 +62,6 @@ config MMU
 config GENERIC_CSUM
 	def_bool y
 
-#
-# Use the generic interrupt handling code in kernel/irq/:
-#
-config GENERIC_IRQ_PROBE
-	def_bool y
-
 config GENERIC_HWEIGHT
 	def_bool y
 
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index c139d0d72802..a5f300ec6f28 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -15,6 +15,7 @@ config LOONGARCH
 	select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
 	select ARCH_HAS_ACPI_TABLE_UPGRADE	if ACPI
 	select ARCH_HAS_CPU_FINALIZE_INIT
+	select ARCH_HAS_CURRENT_STACK_POINTER
 	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_KCOV
 	select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
@@ -104,6 +105,7 @@ config LOONGARCH
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD
 	select HAVE_ASM_MODVERSIONS
 	select HAVE_CONTEXT_TRACKING_USER
 	select HAVE_C_RECORDMCOUNT
@@ -133,20 +135,24 @@ config LOONGARCH
 	select HAVE_KPROBES
 	select HAVE_KPROBES_ON_FTRACE
 	select HAVE_KRETPROBES
+	select HAVE_LIVEPATCH
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_NMI
+	select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS
 	select HAVE_PCI
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_PREEMPT_DYNAMIC_KEY
 	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_RELIABLE_STACKTRACE if UNWINDER_ORC
 	select HAVE_RETHOOK
 	select HAVE_RSEQ
 	select HAVE_RUST
 	select HAVE_SAMPLE_FTRACE_DIRECT
 	select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
 	select HAVE_SETUP_PER_CPU_AREA if NUMA
+	select HAVE_STACK_VALIDATION if HAVE_OBJTOOL
 	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_TIF_NOHZ
@@ -624,6 +630,8 @@ config RANDOMIZE_BASE_MAX_OFFSET
 
 	  This is limited by the size of the lower address memory, 256MB.
 
+source "kernel/livepatch/Kconfig"
+
 endmenu
 
 config ARCH_SELECT_MEMORY_MODEL
diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug
index 8d36aab53008..98d60630c3d4 100644
--- a/arch/loongarch/Kconfig.debug
+++ b/arch/loongarch/Kconfig.debug
@@ -26,4 +26,15 @@ config UNWINDER_PROLOGUE
 	  Some of the addresses it reports may be incorrect (but better than the
 	  Guess unwinder).
 
+config UNWINDER_ORC
+	bool "ORC unwinder"
+	select OBJTOOL
+	help
+	  This option enables the ORC (Oops Rewind Capability) unwinder for
+	  unwinding kernel stack traces.  It uses a custom data format which is
+	  a simplified version of the DWARF Call Frame Information standard.
+
+	  Enabling this option will increase the kernel's runtime memory usage
+	  by roughly 2-4MB, depending on your kernel config.
+
 endchoice
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index fa4fb09909ae..df6caf79537a 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -26,6 +26,18 @@ endif
 32bit-emul		= elf32loongarch
 64bit-emul		= elf64loongarch
 
+ifdef CONFIG_UNWINDER_ORC
+orc_hash_h := arch/$(SRCARCH)/include/generated/asm/orc_hash.h
+orc_hash_sh := $(srctree)/scripts/orc_hash.sh
+targets += $(orc_hash_h)
+quiet_cmd_orc_hash = GEN     $@
+      cmd_orc_hash = mkdir -p $(dir $@); \
+		     $(CONFIG_SHELL) $(orc_hash_sh) < $< > $@
+$(orc_hash_h): $(srctree)/arch/loongarch/include/asm/orc_types.h $(orc_hash_sh) FORCE
+	$(call if_changed,orc_hash)
+archprepare: $(orc_hash_h)
+endif
+
 ifdef CONFIG_DYNAMIC_FTRACE
 KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
 CC_FLAGS_FTRACE := -fpatchable-function-entry=2
@@ -72,8 +84,6 @@ KBUILD_CFLAGS_KERNEL		+= $(call cc-option,-mdirect-extern-access)
 KBUILD_CFLAGS_KERNEL		+= $(call cc-option,-fdirect-access-external-data)
 KBUILD_AFLAGS_MODULE		+= $(call cc-option,-fno-direct-access-external-data)
 KBUILD_CFLAGS_MODULE		+= $(call cc-option,-fno-direct-access-external-data)
-KBUILD_AFLAGS_MODULE		+= $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
-KBUILD_CFLAGS_MODULE		+= $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
 else
 cflags-y			+= $(call cc-option,-mno-explicit-relocs)
 KBUILD_AFLAGS_KERNEL		+= -Wa,-mla-global-with-pcrel
@@ -82,6 +92,15 @@ KBUILD_AFLAGS_MODULE		+= -Wa,-mla-global-with-abs
 KBUILD_CFLAGS_MODULE		+= -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
 endif
 
+KBUILD_AFLAGS			+= $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
+KBUILD_CFLAGS			+= $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
+KBUILD_AFLAGS			+= $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma)-mthin-add-sub)
+KBUILD_CFLAGS			+= $(call cc-option,-mthin-add-sub) $(call cc-option,-Wa$(comma)-mthin-add-sub)
+
+ifdef CONFIG_OBJTOOL
+KBUILD_CFLAGS			+= -fno-jump-tables
+endif
+
 KBUILD_RUSTFLAGS			+= --target=$(objtree)/scripts/target.json
 KBUILD_RUSTFLAGS_MODULE		+= -Crelocation-model=pic
 
diff --git a/arch/loongarch/crypto/crc32-loongarch.c b/arch/loongarch/crypto/crc32-loongarch.c
index a49e507af38c..3eebea3a7b47 100644
--- a/arch/loongarch/crypto/crc32-loongarch.c
+++ b/arch/loongarch/crypto/crc32-loongarch.c
@@ -44,7 +44,6 @@ static u32 crc32_loongarch_hw(u32 crc_, const u8 *p, unsigned int len)
 
 		CRC32(crc, value, w);
 		p += sizeof(u32);
-		len -= sizeof(u32);
 	}
 
 	if (len & sizeof(u16)) {
@@ -80,7 +79,6 @@ static u32 crc32c_loongarch_hw(u32 crc_, const u8 *p, unsigned int len)
 
 		CRC32C(crc, value, w);
 		p += sizeof(u32);
-		len -= sizeof(u32);
 	}
 
 	if (len & sizeof(u16)) {
diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index 93783fa24f6e..2dbec7853ae8 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -1,9 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0
+generated-y += orc_hash.h
+
 generic-y += dma-contiguous.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += early_ioremap.h
 generic-y += qrwlock.h
+generic-y += qspinlock.h
 generic-y += rwsem.h
 generic-y += segment.h
 generic-y += user.h
diff --git a/arch/loongarch/include/asm/bug.h b/arch/loongarch/include/asm/bug.h
index d4ca3ba25418..08388876ade4 100644
--- a/arch/loongarch/include/asm/bug.h
+++ b/arch/loongarch/include/asm/bug.h
@@ -44,6 +44,7 @@
 do {								\
 	instrumentation_begin();				\
 	__BUG_FLAGS(BUGFLAG_WARNING|(flags));			\
+	annotate_reachable();					\
 	instrumentation_end();					\
 } while (0)
 
diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h
index 80bd74106985..f8754d08a31a 100644
--- a/arch/loongarch/include/asm/cacheflush.h
+++ b/arch/loongarch/include/asm/cacheflush.h
@@ -37,8 +37,6 @@ void local_flush_icache_range(unsigned long start, unsigned long end);
 #define flush_icache_range	local_flush_icache_range
 #define flush_icache_user_range	local_flush_icache_range
 
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
-
 #define flush_cache_all()				do { } while (0)
 #define flush_cache_mm(mm)				do { } while (0)
 #define flush_cache_dup_mm(mm)				do { } while (0)
@@ -47,7 +45,6 @@ void local_flush_icache_range(unsigned long start, unsigned long end);
 #define flush_cache_vmap(start, end)			do { } while (0)
 #define flush_cache_vunmap(start, end)			do { } while (0)
 #define flush_icache_user_page(vma, page, addr, len)	do { } while (0)
-#define flush_dcache_page(page)				do { } while (0)
 #define flush_dcache_mmap_lock(mapping)			do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)		do { } while (0)
 
diff --git a/arch/loongarch/include/asm/exception.h b/arch/loongarch/include/asm/exception.h
index af74a3fdcad1..c6d20736fd92 100644
--- a/arch/loongarch/include/asm/exception.h
+++ b/arch/loongarch/include/asm/exception.h
@@ -6,6 +6,8 @@
 #include <asm/ptrace.h>
 #include <linux/kprobes.h>
 
+extern void *exception_table[];
+
 void show_registers(struct pt_regs *regs);
 
 asmlinkage void cache_parity_error(void);
diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h
index c486c2341b66..4a8adcca329b 100644
--- a/arch/loongarch/include/asm/io.h
+++ b/arch/loongarch/include/asm/io.h
@@ -71,6 +71,8 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t
 #define memcpy_fromio(a, c, l) __memcpy_fromio((a), (c), (l))
 #define memcpy_toio(c, a, l)   __memcpy_toio((c), (a), (l))
 
+#define __io_aw() mmiowb()
+
 #include <asm-generic/io.h>
 
 #define ARCH_HAS_VALID_PHYS_ADDR_RANGE
diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h
index 2ecd82bb64e1..f33f3fd32ecc 100644
--- a/arch/loongarch/include/asm/module.h
+++ b/arch/loongarch/include/asm/module.h
@@ -6,6 +6,7 @@
 #define _ASM_MODULE_H
 
 #include <asm/inst.h>
+#include <asm/orc_types.h>
 #include <asm-generic/module.h>
 
 #define RELA_STACK_DEPTH 16
@@ -21,6 +22,12 @@ struct mod_arch_specific {
 	struct mod_section plt;
 	struct mod_section plt_idx;
 
+#ifdef CONFIG_UNWINDER_ORC
+	unsigned int num_orcs;
+	int *orc_unwind_ip;
+	struct orc_entry *orc_unwind;
+#endif
+
 	/* For CONFIG_DYNAMIC_FTRACE */
 	struct plt_entry *ftrace_trampolines;
 };
diff --git a/arch/loongarch/include/asm/orc_header.h b/arch/loongarch/include/asm/orc_header.h
new file mode 100644
index 000000000000..f9d509c3fd70
--- /dev/null
+++ b/arch/loongarch/include/asm/orc_header.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _ORC_HEADER_H
+#define _ORC_HEADER_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <asm/orc_hash.h>
+
+/*
+ * The header is currently a 20-byte hash of the ORC entry definition; see
+ * scripts/orc_hash.sh.
+ */
+#define ORC_HEADER					\
+	__used __section(".orc_header") __aligned(4)	\
+	static const u8 orc_header[] = { ORC_HASH }
+
+#endif /* _ORC_HEADER_H */
diff --git a/arch/loongarch/include/asm/orc_lookup.h b/arch/loongarch/include/asm/orc_lookup.h
new file mode 100644
index 000000000000..b02e6357def4
--- /dev/null
+++ b/arch/loongarch/include/asm/orc_lookup.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ORC_LOOKUP_H
+#define _ORC_LOOKUP_H
+
+/*
+ * This is a lookup table for speeding up access to the .orc_unwind table.
+ * Given an input address offset, the corresponding lookup table entry
+ * specifies a subset of the .orc_unwind table to search.
+ *
+ * Each block represents the end of the previous range and the start of the
+ * next range.  An extra block is added to give the last range an end.
+ *
+ * The block size should be a power of 2 to avoid a costly 'div' instruction.
+ *
+ * A block size of 256 was chosen because it roughly doubles unwinder
+ * performance while only adding ~5% to the ORC data footprint.
+ */
+#define LOOKUP_BLOCK_ORDER	8
+#define LOOKUP_BLOCK_SIZE	(1 << LOOKUP_BLOCK_ORDER)
+
+#ifndef LINKER_SCRIPT
+
+extern unsigned int orc_lookup[];
+extern unsigned int orc_lookup_end[];
+
+#define LOOKUP_START_IP		(unsigned long)_stext
+#define LOOKUP_STOP_IP		(unsigned long)_etext
+
+#endif /* LINKER_SCRIPT */
+
+#endif /* _ORC_LOOKUP_H */
diff --git a/arch/loongarch/include/asm/orc_types.h b/arch/loongarch/include/asm/orc_types.h
new file mode 100644
index 000000000000..caf1f71a1057
--- /dev/null
+++ b/arch/loongarch/include/asm/orc_types.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ORC_TYPES_H
+#define _ORC_TYPES_H
+
+#include <linux/types.h>
+
+/*
+ * The ORC_REG_* registers are base registers which are used to find other
+ * registers on the stack.
+ *
+ * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the
+ * address of the previous frame: the caller's SP before it called the current
+ * function.
+ *
+ * ORC_REG_UNDEFINED means the corresponding register's value didn't change in
+ * the current frame.
+ *
+ * The most commonly used base registers are SP and FP -- which the previous SP
+ * is usually based on -- and PREV_SP and UNDEFINED -- which the previous FP is
+ * usually based on.
+ *
+ * The rest of the base registers are needed for special cases like entry code
+ * and GCC realigned stacks.
+ */
+#define ORC_REG_UNDEFINED		0
+#define ORC_REG_PREV_SP			1
+#define ORC_REG_SP			2
+#define ORC_REG_FP			3
+#define ORC_REG_MAX			4
+
+#define ORC_TYPE_UNDEFINED		0
+#define ORC_TYPE_END_OF_STACK		1
+#define ORC_TYPE_CALL			2
+#define ORC_TYPE_REGS			3
+#define ORC_TYPE_REGS_PARTIAL		4
+
+#ifndef __ASSEMBLY__
+/*
+ * This struct is more or less a vastly simplified version of the DWARF Call
+ * Frame Information standard.  It contains only the necessary parts of DWARF
+ * CFI, simplified for ease of access by the in-kernel unwinder.  It tells the
+ * unwinder how to find the previous SP and FP (and sometimes entry regs) on
+ * the stack for a given code address.  Each instance of the struct corresponds
+ * to one or more code locations.
+ */
+struct orc_entry {
+	s16		sp_offset;
+	s16		fp_offset;
+	s16		ra_offset;
+	unsigned int	sp_reg:4;
+	unsigned int	fp_reg:4;
+	unsigned int	ra_reg:4;
+	unsigned int	type:3;
+	unsigned int	signal:1;
+};
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ORC_TYPES_H */
diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h
index afb6fa16b826..44027060c54a 100644
--- a/arch/loongarch/include/asm/page.h
+++ b/arch/loongarch/include/asm/page.h
@@ -75,6 +75,9 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 #define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
 #define sym_to_pfn(x)		__phys_to_pfn(__pa_symbol(x))
 
+struct page *dmw_virt_to_page(unsigned long kaddr);
+struct page *tlb_virt_to_page(unsigned long kaddr);
+
 #define virt_to_pfn(kaddr)	PFN_DOWN(PHYSADDR(kaddr))
 
 #define virt_to_page(kaddr)								\
diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h
index 9b36ac003f89..8f290e5546cf 100644
--- a/arch/loongarch/include/asm/percpu.h
+++ b/arch/loongarch/include/asm/percpu.h
@@ -29,7 +29,12 @@ static inline void set_my_cpu_offset(unsigned long off)
 	__my_cpu_offset = off;
 	csr_write64(off, PERCPU_BASE_KS);
 }
-#define __my_cpu_offset __my_cpu_offset
+
+#define __my_cpu_offset					\
+({							\
+	__asm__ __volatile__("":"+r"(__my_cpu_offset));	\
+	__my_cpu_offset;				\
+})
 
 #define PERCPU_OP(op, asm_op, c_op)					\
 static __always_inline unsigned long __percpu_##op(void *ptr,		\
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index 8b5df1bbf9e9..af3acdf3481a 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -363,9 +363,6 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
 extern pgd_t swapper_pg_dir[];
 extern pgd_t invalid_pg_dir[];
 
-struct page *dmw_virt_to_page(unsigned long kaddr);
-struct page *tlb_virt_to_page(unsigned long kaddr);
-
 /*
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h
deleted file mode 100644
index 34f43f8ad591..000000000000
--- a/arch/loongarch/include/asm/qspinlock.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_QSPINLOCK_H
-#define _ASM_QSPINLOCK_H
-
-#include <asm-generic/qspinlock_types.h>
-
-#define queued_spin_unlock queued_spin_unlock
-
-static inline void queued_spin_unlock(struct qspinlock *lock)
-{
-	compiletime_assert_atomic_type(lock->locked);
-	c_sync();
-	WRITE_ONCE(lock->locked, 0);
-}
-
-#include <asm-generic/qspinlock.h>
-
-#endif /* _ASM_QSPINLOCK_H */
diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h
index 4fb1e6408b98..45b507a7b06f 100644
--- a/arch/loongarch/include/asm/stackframe.h
+++ b/arch/loongarch/include/asm/stackframe.h
@@ -13,6 +13,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/loongarch.h>
 #include <asm/thread_info.h>
+#include <asm/unwind_hints.h>
 
 /* Make the addition of cfi info a little easier. */
 	.macro cfi_rel_offset reg offset=0 docfi=0
@@ -162,6 +163,7 @@
 	li.w	t0, CSR_CRMD_WE
 	csrxchg	t0, t0, LOONGARCH_CSR_CRMD
 #endif
+	UNWIND_HINT_REGS
 	.endm
 
 	.macro	SAVE_ALL docfi=0
@@ -219,6 +221,7 @@
 
 	.macro	RESTORE_SP_AND_RET docfi=0
 	cfi_ld	sp, PT_R3, \docfi
+	UNWIND_HINT_FUNC
 	ertn
 	.endm
 
diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h
index 8cb653d49a54..8bf0e6f51546 100644
--- a/arch/loongarch/include/asm/thread_info.h
+++ b/arch/loongarch/include/asm/thread_info.h
@@ -86,6 +86,7 @@ register unsigned long current_stack_pointer __asm__("$sp");
 #define TIF_LASX_CTX_LIVE	18	/* LASX context must be preserved */
 #define TIF_USEDLBT		19	/* LBT was used by this task this quantum (SMP) */
 #define TIF_LBT_CTX_LIVE	20	/* LBT context must be preserved */
+#define TIF_PATCH_PENDING	21	/* pending live patching update */
 
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
@@ -105,6 +106,7 @@ register unsigned long current_stack_pointer __asm__("$sp");
 #define _TIF_LASX_CTX_LIVE	(1<<TIF_LASX_CTX_LIVE)
 #define _TIF_USEDLBT		(1<<TIF_USEDLBT)
 #define _TIF_LBT_CTX_LIVE	(1<<TIF_LBT_CTX_LIVE)
+#define _TIF_PATCH_PENDING	(1<<TIF_PATCH_PENDING)
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h
index b9dce87afd2e..40a6763c5aec 100644
--- a/arch/loongarch/include/asm/unwind.h
+++ b/arch/loongarch/include/asm/unwind.h
@@ -16,6 +16,7 @@
 enum unwinder_type {
 	UNWINDER_GUESS,
 	UNWINDER_PROLOGUE,
+	UNWINDER_ORC,
 };
 
 struct unwind_state {
@@ -24,7 +25,7 @@ struct unwind_state {
 	struct task_struct *task;
 	bool first, error, reset;
 	int graph_idx;
-	unsigned long sp, pc, ra;
+	unsigned long sp, fp, pc, ra;
 };
 
 bool default_next_frame(struct unwind_state *state);
@@ -61,14 +62,17 @@ static __always_inline void __unwind_start(struct unwind_state *state,
 		state->sp = regs->regs[3];
 		state->pc = regs->csr_era;
 		state->ra = regs->regs[1];
+		state->fp = regs->regs[22];
 	} else if (task && task != current) {
 		state->sp = thread_saved_fp(task);
 		state->pc = thread_saved_ra(task);
 		state->ra = 0;
+		state->fp = 0;
 	} else {
 		state->sp = (unsigned long)__builtin_frame_address(0);
 		state->pc = (unsigned long)__builtin_return_address(0);
 		state->ra = 0;
+		state->fp = 0;
 	}
 	state->task = task;
 	get_stack_info(state->sp, state->task, &state->stack_info);
@@ -77,6 +81,18 @@ static __always_inline void __unwind_start(struct unwind_state *state,
 
 static __always_inline unsigned long __unwind_get_return_address(struct unwind_state *state)
 {
-	return unwind_done(state) ? 0 : state->pc;
+	if (unwind_done(state))
+		return 0;
+
+	return __kernel_text_address(state->pc) ? state->pc : 0;
 }
+
+#ifdef CONFIG_UNWINDER_ORC
+void unwind_init(void);
+void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size);
+#else
+static inline void unwind_init(void) {}
+static inline void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {}
+#endif
+
 #endif /* _ASM_UNWIND_H */
diff --git a/arch/loongarch/include/asm/unwind_hints.h b/arch/loongarch/include/asm/unwind_hints.h
new file mode 100644
index 000000000000..a01086ad9dde
--- /dev/null
+++ b/arch/loongarch/include/asm/unwind_hints.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_UNWIND_HINTS_H
+#define _ASM_LOONGARCH_UNWIND_HINTS_H
+
+#include <linux/objtool.h>
+#include <asm/orc_types.h>
+
+#ifdef __ASSEMBLY__
+
+.macro UNWIND_HINT_UNDEFINED
+	UNWIND_HINT type=UNWIND_HINT_TYPE_UNDEFINED
+.endm
+
+.macro UNWIND_HINT_END_OF_STACK
+	UNWIND_HINT type=UNWIND_HINT_TYPE_END_OF_STACK
+.endm
+
+.macro UNWIND_HINT_REGS
+	UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_REGS
+.endm
+
+.macro UNWIND_HINT_FUNC
+	UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_CALL
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_LOONGARCH_UNWIND_HINTS_H */
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 3c808c680370..3a7620b66bc6 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -3,6 +3,8 @@
 # Makefile for the Linux/LoongArch kernel.
 #
 
+OBJECT_FILES_NON_STANDARD_head.o := y
+
 extra-y		:= vmlinux.lds
 
 obj-y		+= head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
@@ -21,6 +23,7 @@ obj-$(CONFIG_ARCH_STRICT_ALIGN)	+= unaligned.o
 
 CFLAGS_module.o		+= $(call cc-option,-Wno-override-init,)
 CFLAGS_syscall.o	+= $(call cc-option,-Wno-override-init,)
+CFLAGS_traps.o		+= $(call cc-option,-Wno-override-init,)
 CFLAGS_perf_event.o	+= $(call cc-option,-Wno-override-init,)
 
 ifdef CONFIG_FUNCTION_TRACER
@@ -62,6 +65,7 @@ obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 
 obj-$(CONFIG_UNWINDER_GUESS)	+= unwind_guess.o
 obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o
+obj-$(CONFIG_UNWINDER_ORC)	+= unwind_orc.o
 
 obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_regs.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
index 1ec8e4c4cc2b..48e7e34e355e 100644
--- a/arch/loongarch/kernel/entry.S
+++ b/arch/loongarch/kernel/entry.S
@@ -14,11 +14,13 @@
 #include <asm/regdef.h>
 #include <asm/stackframe.h>
 #include <asm/thread_info.h>
+#include <asm/unwind_hints.h>
 
 	.text
 	.cfi_sections	.debug_frame
 	.align	5
 SYM_CODE_START(handle_syscall)
+	UNWIND_HINT_UNDEFINED
 	csrrd		t0, PERCPU_BASE_KS
 	la.pcrel	t1, kernelsp
 	add.d		t1, t1, t0
@@ -57,6 +59,7 @@ SYM_CODE_START(handle_syscall)
 	cfi_st		fp, PT_R22
 
 	SAVE_STATIC
+	UNWIND_HINT_REGS
 
 #ifdef CONFIG_KGDB
 	li.w		t1, CSR_CRMD_WE
@@ -75,6 +78,7 @@ SYM_CODE_END(handle_syscall)
 _ASM_NOKPROBE(handle_syscall)
 
 SYM_CODE_START(ret_from_fork)
+	UNWIND_HINT_REGS
 	bl		schedule_tail		# a0 = struct task_struct *prev
 	move		a0, sp
 	bl 		syscall_exit_to_user_mode
@@ -84,6 +88,7 @@ SYM_CODE_START(ret_from_fork)
 SYM_CODE_END(ret_from_fork)
 
 SYM_CODE_START(ret_from_kernel_thread)
+	UNWIND_HINT_REGS
 	bl		schedule_tail		# a0 = struct task_struct *prev
 	move		a0, s1
 	jirl		ra, s0, 0
diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S
index 4382e36ae3d4..69a85f2479fb 100644
--- a/arch/loongarch/kernel/fpu.S
+++ b/arch/loongarch/kernel/fpu.S
@@ -15,6 +15,7 @@
 #include <asm/fpregdef.h>
 #include <asm/loongarch.h>
 #include <asm/regdef.h>
+#include <asm/unwind_hints.h>
 
 #define FPU_REG_WIDTH		8
 #define LSX_REG_WIDTH		16
@@ -526,3 +527,9 @@ SYM_FUNC_END(_restore_lasx_context)
 .L_fpu_fault:
 	li.w	a0, -EFAULT				# failure
 	jr	ra
+
+#ifdef CONFIG_CPU_HAS_LBT
+STACK_FRAME_NON_STANDARD _restore_fp
+STACK_FRAME_NON_STANDARD _restore_lsx
+STACK_FRAME_NON_STANDARD _restore_lasx
+#endif
diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S
index 2bb3aa2dcfcb..86d5d90ebefe 100644
--- a/arch/loongarch/kernel/genex.S
+++ b/arch/loongarch/kernel/genex.S
@@ -32,6 +32,7 @@ SYM_FUNC_START(__arch_cpu_idle)
 SYM_FUNC_END(__arch_cpu_idle)
 
 SYM_CODE_START(handle_vint)
+	UNWIND_HINT_UNDEFINED
 	BACKUP_T0T1
 	SAVE_ALL
 	la_abs	t1, __arch_cpu_idle
@@ -49,6 +50,7 @@ SYM_CODE_START(handle_vint)
 SYM_CODE_END(handle_vint)
 
 SYM_CODE_START(except_vec_cex)
+	UNWIND_HINT_UNDEFINED
 	b	cache_parity_error
 SYM_CODE_END(except_vec_cex)
 
@@ -67,6 +69,7 @@ SYM_CODE_END(except_vec_cex)
 	.macro	BUILD_HANDLER exception handler prep
 	.align	5
 	SYM_CODE_START(handle_\exception)
+	UNWIND_HINT_UNDEFINED
 	666:
 	BACKUP_T0T1
 	SAVE_ALL
@@ -77,7 +80,9 @@ SYM_CODE_END(except_vec_cex)
 	668:
 	RESTORE_ALL_AND_RET
 	SYM_CODE_END(handle_\exception)
+	.pushsection	".data", "aw", %progbits
 	SYM_DATA(unwind_hint_\exception, .word 668b - 666b)
+	.popsection
 	.endm
 
 	BUILD_HANDLER ade ade badv
@@ -94,6 +99,7 @@ SYM_CODE_END(except_vec_cex)
 	BUILD_HANDLER reserved reserved none	/* others */
 
 SYM_CODE_START(handle_sys)
+	UNWIND_HINT_UNDEFINED
 	la_abs	t0, handle_syscall
 	jr	t0
 SYM_CODE_END(handle_sys)
diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S
index 9c75120a26d8..001f061d226a 100644
--- a/arch/loongarch/kernel/lbt.S
+++ b/arch/loongarch/kernel/lbt.S
@@ -11,6 +11,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/errno.h>
 #include <asm/regdef.h>
+#include <asm/unwind_hints.h>
 
 #define SCR_REG_WIDTH 8
 
@@ -153,3 +154,5 @@ SYM_FUNC_END(_restore_ftop_context)
 .L_lbt_fault:
 	li.w		a0, -EFAULT		# failure
 	jr		ra
+
+STACK_FRAME_NON_STANDARD _restore_ftop_context
diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S
index 482aa553aa2d..0c65cf09110c 100644
--- a/arch/loongarch/kernel/mcount_dyn.S
+++ b/arch/loongarch/kernel/mcount_dyn.S
@@ -73,6 +73,7 @@ SYM_FUNC_START(ftrace_stub)
 SYM_FUNC_END(ftrace_stub)
 
 SYM_CODE_START(ftrace_common)
+	UNWIND_HINT_UNDEFINED
 	PTR_ADDI	a0, ra, -8	/* arg0: ip */
 	move		a1, t0		/* arg1: parent_ip */
 	la.pcrel	t1, function_trace_op
@@ -113,12 +114,14 @@ ftrace_common_return:
 SYM_CODE_END(ftrace_common)
 
 SYM_CODE_START(ftrace_caller)
+	UNWIND_HINT_UNDEFINED
 	ftrace_regs_entry allregs=0
 	b		ftrace_common
 SYM_CODE_END(ftrace_caller)
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 SYM_CODE_START(ftrace_regs_caller)
+	UNWIND_HINT_UNDEFINED
 	ftrace_regs_entry allregs=1
 	b		ftrace_common
 SYM_CODE_END(ftrace_regs_caller)
@@ -126,6 +129,7 @@ SYM_CODE_END(ftrace_regs_caller)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 SYM_CODE_START(ftrace_graph_caller)
+	UNWIND_HINT_UNDEFINED
 	PTR_L		a0, sp, PT_ERA
 	PTR_ADDI	a0, a0, -8	/* arg0: self_addr */
 	PTR_ADDI	a1, sp, PT_R1	/* arg1: parent */
@@ -134,6 +138,7 @@ SYM_CODE_START(ftrace_graph_caller)
 SYM_CODE_END(ftrace_graph_caller)
 
 SYM_CODE_START(return_to_handler)
+	UNWIND_HINT_UNDEFINED
 	/* Save return value regs */
 	PTR_ADDI	sp, sp, -FGRET_REGS_SIZE
 	PTR_S		a0, sp, FGRET_REGS_A0
@@ -155,6 +160,7 @@ SYM_CODE_END(return_to_handler)
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 SYM_CODE_START(ftrace_stub_direct_tramp)
+	UNWIND_HINT_UNDEFINED
 	jr		t0
 SYM_CODE_END(ftrace_stub_direct_tramp)
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c
index b13b2858fe39..c7d0338d12c1 100644
--- a/arch/loongarch/kernel/module.c
+++ b/arch/loongarch/kernel/module.c
@@ -20,6 +20,7 @@
 #include <linux/kernel.h>
 #include <asm/alternative.h>
 #include <asm/inst.h>
+#include <asm/unwind.h>
 
 static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top)
 {
@@ -515,15 +516,28 @@ static void module_init_ftrace_plt(const Elf_Ehdr *hdr,
 int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs, struct module *mod)
 {
-	const Elf_Shdr *s, *se;
 	const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+	const Elf_Shdr *s, *alt = NULL, *orc = NULL, *orc_ip = NULL, *ftrace = NULL;
 
-	for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
+	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
 		if (!strcmp(".altinstructions", secstrs + s->sh_name))
-			apply_alternatives((void *)s->sh_addr, (void *)s->sh_addr + s->sh_size);
+			alt = s;
+		if (!strcmp(".orc_unwind", secstrs + s->sh_name))
+			orc = s;
+		if (!strcmp(".orc_unwind_ip", secstrs + s->sh_name))
+			orc_ip = s;
 		if (!strcmp(".ftrace_trampoline", secstrs + s->sh_name))
-			module_init_ftrace_plt(hdr, s, mod);
+			ftrace = s;
 	}
 
+	if (alt)
+		apply_alternatives((void *)alt->sh_addr, (void *)alt->sh_addr + alt->sh_size);
+
+	if (orc && orc_ip)
+		unwind_module_init(mod, (void *)orc_ip->sh_addr, orc_ip->sh_size, (void *)orc->sh_addr, orc->sh_size);
+
+	if (ftrace)
+		module_init_ftrace_plt(hdr, ftrace, mod);
+
 	return 0;
 }
diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
index f49f6b053763..84e6de2fd973 100644
--- a/arch/loongarch/kernel/relocate_kernel.S
+++ b/arch/loongarch/kernel/relocate_kernel.S
@@ -15,6 +15,7 @@
 #include <asm/addrspace.h>
 
 SYM_CODE_START(relocate_new_kernel)
+	UNWIND_HINT_UNDEFINED
 	/*
 	 * a0: EFI boot flag for the new kernel
 	 * a1: Command line pointer for the new kernel
@@ -90,6 +91,7 @@ SYM_CODE_END(relocate_new_kernel)
  * then start at the entry point from LOONGARCH_IOCSR_MBUF0.
  */
 SYM_CODE_START(kexec_smp_wait)
+	UNWIND_HINT_UNDEFINED
 1:	li.w		t0, 0x100			/* wait for init loop */
 2:	addi.w		t0, t0, -1			/* limit mailbox access */
 	bnez		t0, 2b
@@ -106,6 +108,5 @@ SYM_CODE_END(kexec_smp_wait)
 
 relocate_new_kernel_end:
 
-SYM_DATA_START(relocate_new_kernel_size)
-	PTR		relocate_new_kernel_end - relocate_new_kernel
-SYM_DATA_END(relocate_new_kernel_size)
+	.section ".data"
+SYM_DATA(relocate_new_kernel_size, .long relocate_new_kernel_end - relocate_new_kernel)
diff --git a/arch/loongarch/kernel/rethook_trampoline.S b/arch/loongarch/kernel/rethook_trampoline.S
index bd5772c96338..d4ceb2fa2a5c 100644
--- a/arch/loongarch/kernel/rethook_trampoline.S
+++ b/arch/loongarch/kernel/rethook_trampoline.S
@@ -76,6 +76,7 @@
 	.endm
 
 SYM_CODE_START(arch_rethook_trampoline)
+	UNWIND_HINT_UNDEFINED
 	addi.d	sp, sp, -PT_SIZE
 	save_all_base_regs
 
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 2b72eb326b44..60e0fe97f61a 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -47,6 +47,7 @@
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/time.h>
+#include <asm/unwind.h>
 
 #define SMBIOS_BIOSSIZE_OFFSET		0x09
 #define SMBIOS_BIOSEXTERN_OFFSET	0x13
@@ -587,6 +588,7 @@ static void __init prefill_possible_map(void)
 void __init setup_arch(char **cmdline_p)
 {
 	cpu_probe();
+	unwind_init();
 
 	init_environ();
 	efi_init();
diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c
index f623feb2129f..9a038d1070d7 100644
--- a/arch/loongarch/kernel/stacktrace.c
+++ b/arch/loongarch/kernel/stacktrace.c
@@ -29,6 +29,7 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
 			regs->csr_era = thread_saved_ra(task);
 		}
 		regs->regs[1] = 0;
+		regs->regs[22] = 0;
 	}
 
 	for (unwind_start(&state, task, regs);
@@ -39,6 +40,46 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
 	}
 }
 
+int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+			     void *cookie, struct task_struct *task)
+{
+	unsigned long addr;
+	struct pt_regs dummyregs;
+	struct pt_regs *regs = &dummyregs;
+	struct unwind_state state;
+
+	if (task == current) {
+		regs->regs[3] = (unsigned long)__builtin_frame_address(0);
+		regs->csr_era = (unsigned long)__builtin_return_address(0);
+	} else {
+		regs->regs[3] = thread_saved_fp(task);
+		regs->csr_era = thread_saved_ra(task);
+	}
+	regs->regs[1] = 0;
+	regs->regs[22] = 0;
+
+	for (unwind_start(&state, task, regs);
+	     !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) {
+		addr = unwind_get_return_address(&state);
+
+		/*
+		 * A NULL or invalid return address probably means there's some
+		 * generated code which __kernel_text_address() doesn't know about.
+		 */
+		if (!addr)
+			return -EINVAL;
+
+		if (!consume_entry(cookie, addr))
+			return -EINVAL;
+	}
+
+	/* Check for stack corruption */
+	if (unwind_error(&state))
+		return -EINVAL;
+
+	return 0;
+}
+
 static int
 copy_stack_frame(unsigned long fp, struct stack_frame *frame)
 {
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index aebfc3733a76..f9f4eb00c92e 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -53,6 +53,32 @@
 
 #include "access-helper.h"
 
+void *exception_table[EXCCODE_INT_START] = {
+	[0 ... EXCCODE_INT_START - 1] = handle_reserved,
+
+	[EXCCODE_TLBI]		= handle_tlb_load,
+	[EXCCODE_TLBL]		= handle_tlb_load,
+	[EXCCODE_TLBS]		= handle_tlb_store,
+	[EXCCODE_TLBM]		= handle_tlb_modify,
+	[EXCCODE_TLBNR]		= handle_tlb_protect,
+	[EXCCODE_TLBNX]		= handle_tlb_protect,
+	[EXCCODE_TLBPE]		= handle_tlb_protect,
+	[EXCCODE_ADE]		= handle_ade,
+	[EXCCODE_ALE]		= handle_ale,
+	[EXCCODE_BCE]		= handle_bce,
+	[EXCCODE_SYS]		= handle_sys,
+	[EXCCODE_BP]		= handle_bp,
+	[EXCCODE_INE]		= handle_ri,
+	[EXCCODE_IPE]		= handle_ri,
+	[EXCCODE_FPDIS]		= handle_fpu,
+	[EXCCODE_LSXDIS]	= handle_lsx,
+	[EXCCODE_LASXDIS]	= handle_lasx,
+	[EXCCODE_FPE]		= handle_fpe,
+	[EXCCODE_WATCH]		= handle_watch,
+	[EXCCODE_BTDIS]		= handle_lbt,
+};
+EXPORT_SYMBOL_GPL(exception_table);
+
 static void show_backtrace(struct task_struct *task, const struct pt_regs *regs,
 			   const char *loglvl, bool user)
 {
@@ -1150,19 +1176,9 @@ void __init trap_init(void)
 	for (i = EXCCODE_INT_START; i <= EXCCODE_INT_END; i++)
 		set_handler(i * VECSIZE, handle_vint, VECSIZE);
 
-	set_handler(EXCCODE_ADE * VECSIZE, handle_ade, VECSIZE);
-	set_handler(EXCCODE_ALE * VECSIZE, handle_ale, VECSIZE);
-	set_handler(EXCCODE_BCE * VECSIZE, handle_bce, VECSIZE);
-	set_handler(EXCCODE_SYS * VECSIZE, handle_sys, VECSIZE);
-	set_handler(EXCCODE_BP * VECSIZE, handle_bp, VECSIZE);
-	set_handler(EXCCODE_INE * VECSIZE, handle_ri, VECSIZE);
-	set_handler(EXCCODE_IPE * VECSIZE, handle_ri, VECSIZE);
-	set_handler(EXCCODE_FPDIS * VECSIZE, handle_fpu, VECSIZE);
-	set_handler(EXCCODE_LSXDIS * VECSIZE, handle_lsx, VECSIZE);
-	set_handler(EXCCODE_LASXDIS * VECSIZE, handle_lasx, VECSIZE);
-	set_handler(EXCCODE_FPE * VECSIZE, handle_fpe, VECSIZE);
-	set_handler(EXCCODE_BTDIS * VECSIZE, handle_lbt, VECSIZE);
-	set_handler(EXCCODE_WATCH * VECSIZE, handle_watch, VECSIZE);
+	/* Set exception vector handler */
+	for (i = EXCCODE_ADE; i <= EXCCODE_BTDIS; i++)
+		set_handler(i * VECSIZE, exception_table[i], VECSIZE);
 
 	cache_error_setup();
 
diff --git a/arch/loongarch/kernel/unwind_orc.c b/arch/loongarch/kernel/unwind_orc.c
new file mode 100644
index 000000000000..b25722876331
--- /dev/null
+++ b/arch/loongarch/kernel/unwind_orc.c
@@ -0,0 +1,528 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/objtool.h>
+#include <linux/module.h>
+#include <linux/sort.h>
+#include <asm/exception.h>
+#include <asm/orc_header.h>
+#include <asm/orc_lookup.h>
+#include <asm/orc_types.h>
+#include <asm/ptrace.h>
+#include <asm/setup.h>
+#include <asm/stacktrace.h>
+#include <asm/tlb.h>
+#include <asm/unwind.h>
+
+ORC_HEADER;
+
+#define orc_warn(fmt, ...) \
+	printk_deferred_once(KERN_WARNING "WARNING: " fmt, ##__VA_ARGS__)
+
+extern int __start_orc_unwind_ip[];
+extern int __stop_orc_unwind_ip[];
+extern struct orc_entry __start_orc_unwind[];
+extern struct orc_entry __stop_orc_unwind[];
+
+static bool orc_init __ro_after_init;
+static unsigned int lookup_num_blocks __ro_after_init;
+
+/* Fake frame pointer entry -- used as a fallback for generated code */
+static struct orc_entry orc_fp_entry = {
+	.sp_reg		= ORC_REG_FP,
+	.sp_offset	= 16,
+	.fp_reg		= ORC_REG_PREV_SP,
+	.fp_offset	= -16,
+	.ra_reg		= ORC_REG_PREV_SP,
+	.ra_offset	= -8,
+	.type		= ORC_TYPE_CALL
+};
+
+/*
+ * If we crash with IP==0, the last successfully executed instruction
+ * was probably an indirect function call with a NULL function pointer,
+ * and we don't have unwind information for NULL.
+ * This hardcoded ORC entry for IP==0 allows us to unwind from a NULL function
+ * pointer into its parent and then continue normally from there.
+ */
+static struct orc_entry orc_null_entry = {
+	.sp_reg		= ORC_REG_SP,
+	.sp_offset	= sizeof(long),
+	.fp_reg		= ORC_REG_UNDEFINED,
+	.type		= ORC_TYPE_CALL
+};
+
+static inline unsigned long orc_ip(const int *ip)
+{
+	return (unsigned long)ip + *ip;
+}
+
+static struct orc_entry *__orc_find(int *ip_table, struct orc_entry *u_table,
+				    unsigned int num_entries, unsigned long ip)
+{
+	int *first = ip_table;
+	int *mid = first, *found = first;
+	int *last = ip_table + num_entries - 1;
+
+	if (!num_entries)
+		return NULL;
+
+	/*
+	 * Do a binary range search to find the rightmost duplicate of a given
+	 * starting address.  Some entries are section terminators which are
+	 * "weak" entries for ensuring there are no gaps.  They should be
+	 * ignored when they conflict with a real entry.
+	 */
+	while (first <= last) {
+		mid = first + ((last - first) / 2);
+
+		if (orc_ip(mid) <= ip) {
+			found = mid;
+			first = mid + 1;
+		} else
+			last = mid - 1;
+	}
+
+	return u_table + (found - ip_table);
+}
+
+#ifdef CONFIG_MODULES
+static struct orc_entry *orc_module_find(unsigned long ip)
+{
+	struct module *mod;
+
+	mod = __module_address(ip);
+	if (!mod || !mod->arch.orc_unwind || !mod->arch.orc_unwind_ip)
+		return NULL;
+
+	return __orc_find(mod->arch.orc_unwind_ip, mod->arch.orc_unwind, mod->arch.num_orcs, ip);
+}
+#else
+static struct orc_entry *orc_module_find(unsigned long ip)
+{
+	return NULL;
+}
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+static struct orc_entry *orc_find(unsigned long ip);
+
+/*
+ * Ftrace dynamic trampolines do not have orc entries of their own.
+ * But they are copies of the ftrace entries that are static and
+ * defined in ftrace_*.S, which do have orc entries.
+ *
+ * If the unwinder comes across a ftrace trampoline, then find the
+ * ftrace function that was used to create it, and use that ftrace
+ * function's orc entry, as the placement of the return code in
+ * the stack will be identical.
+ */
+static struct orc_entry *orc_ftrace_find(unsigned long ip)
+{
+	struct ftrace_ops *ops;
+	unsigned long tramp_addr, offset;
+
+	ops = ftrace_ops_trampoline(ip);
+	if (!ops)
+		return NULL;
+
+	/* Set tramp_addr to the start of the code copied by the trampoline */
+	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
+		tramp_addr = (unsigned long)ftrace_regs_caller;
+	else
+		tramp_addr = (unsigned long)ftrace_caller;
+
+	/* Now place tramp_addr to the location within the trampoline ip is at */
+	offset = ip - ops->trampoline;
+	tramp_addr += offset;
+
+	/* Prevent unlikely recursion */
+	if (ip == tramp_addr)
+		return NULL;
+
+	return orc_find(tramp_addr);
+}
+#else
+static struct orc_entry *orc_ftrace_find(unsigned long ip)
+{
+	return NULL;
+}
+#endif
+
+static struct orc_entry *orc_find(unsigned long ip)
+{
+	static struct orc_entry *orc;
+
+	if (ip == 0)
+		return &orc_null_entry;
+
+	/* For non-init vmlinux addresses, use the fast lookup table: */
+	if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) {
+		unsigned int idx, start, stop;
+
+		idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE;
+
+		if (unlikely((idx >= lookup_num_blocks-1))) {
+			orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%pB\n",
+				 idx, lookup_num_blocks, (void *)ip);
+			return NULL;
+		}
+
+		start = orc_lookup[idx];
+		stop = orc_lookup[idx + 1] + 1;
+
+		if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) ||
+			     (__start_orc_unwind + stop > __stop_orc_unwind))) {
+			orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%pB\n",
+				 idx, lookup_num_blocks, start, stop, (void *)ip);
+			return NULL;
+		}
+
+		return __orc_find(__start_orc_unwind_ip + start,
+				  __start_orc_unwind + start, stop - start, ip);
+	}
+
+	/* vmlinux .init slow lookup: */
+	if (is_kernel_inittext(ip))
+		return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
+				  __stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
+
+	/* Module lookup: */
+	orc = orc_module_find(ip);
+	if (orc)
+		return orc;
+
+	return orc_ftrace_find(ip);
+}
+
+#ifdef CONFIG_MODULES
+
+static DEFINE_MUTEX(sort_mutex);
+static int *cur_orc_ip_table = __start_orc_unwind_ip;
+static struct orc_entry *cur_orc_table = __start_orc_unwind;
+
+static void orc_sort_swap(void *_a, void *_b, int size)
+{
+	int delta = _b - _a;
+	int *a = _a, *b = _b, tmp;
+	struct orc_entry *orc_a, *orc_b;
+
+	/* Swap the .orc_unwind_ip entries: */
+	tmp = *a;
+	*a = *b + delta;
+	*b = tmp - delta;
+
+	/* Swap the corresponding .orc_unwind entries: */
+	orc_a = cur_orc_table + (a - cur_orc_ip_table);
+	orc_b = cur_orc_table + (b - cur_orc_ip_table);
+	swap(*orc_a, *orc_b);
+}
+
+static int orc_sort_cmp(const void *_a, const void *_b)
+{
+	const int *a = _a, *b = _b;
+	unsigned long a_val = orc_ip(a);
+	unsigned long b_val = orc_ip(b);
+	struct orc_entry *orc_a;
+
+	if (a_val > b_val)
+		return 1;
+	if (a_val < b_val)
+		return -1;
+
+	/*
+	 * The "weak" section terminator entries need to always be first
+	 * to ensure the lookup code skips them in favor of real entries.
+	 * These terminator entries exist to handle any gaps created by
+	 * whitelisted .o files which didn't get objtool generation.
+	 */
+	orc_a = cur_orc_table + (a - cur_orc_ip_table);
+
+	return orc_a->type == ORC_TYPE_UNDEFINED ? -1 : 1;
+}
+
+void unwind_module_init(struct module *mod, void *_orc_ip, size_t orc_ip_size,
+			void *_orc, size_t orc_size)
+{
+	int *orc_ip = _orc_ip;
+	struct orc_entry *orc = _orc;
+	unsigned int num_entries = orc_ip_size / sizeof(int);
+
+	WARN_ON_ONCE(orc_ip_size % sizeof(int) != 0 ||
+		     orc_size % sizeof(*orc) != 0 ||
+		     num_entries != orc_size / sizeof(*orc));
+
+	/*
+	 * The 'cur_orc_*' globals allow the orc_sort_swap() callback to
+	 * associate an .orc_unwind_ip table entry with its corresponding
+	 * .orc_unwind entry so they can both be swapped.
+	 */
+	mutex_lock(&sort_mutex);
+	cur_orc_ip_table = orc_ip;
+	cur_orc_table = orc;
+	sort(orc_ip, num_entries, sizeof(int), orc_sort_cmp, orc_sort_swap);
+	mutex_unlock(&sort_mutex);
+
+	mod->arch.orc_unwind_ip = orc_ip;
+	mod->arch.orc_unwind = orc;
+	mod->arch.num_orcs = num_entries;
+}
+#endif
+
+void __init unwind_init(void)
+{
+	int i;
+	size_t orc_size = (void *)__stop_orc_unwind - (void *)__start_orc_unwind;
+	size_t orc_ip_size = (void *)__stop_orc_unwind_ip - (void *)__start_orc_unwind_ip;
+	size_t num_entries = orc_ip_size / sizeof(int);
+	struct orc_entry *orc;
+
+	if (!num_entries || orc_ip_size % sizeof(int) != 0 ||
+	    orc_size % sizeof(struct orc_entry) != 0 ||
+	    num_entries != orc_size / sizeof(struct orc_entry)) {
+		orc_warn("WARNING: Bad or missing .orc_unwind table.  Disabling unwinder.\n");
+		return;
+	}
+
+	/*
+	 * Note, the orc_unwind and orc_unwind_ip tables were already
+	 * sorted at build time via the 'sorttable' tool.
+	 * It's ready for binary search straight away, no need to sort it.
+	 */
+
+	/* Initialize the fast lookup table: */
+	lookup_num_blocks = orc_lookup_end - orc_lookup;
+	for (i = 0; i < lookup_num_blocks-1; i++) {
+		orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
+				 num_entries, LOOKUP_START_IP + (LOOKUP_BLOCK_SIZE * i));
+		if (!orc) {
+			orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
+			return;
+		}
+
+		orc_lookup[i] = orc - __start_orc_unwind;
+	}
+
+	/* Initialize the ending block: */
+	orc = __orc_find(__start_orc_unwind_ip, __start_orc_unwind, num_entries, LOOKUP_STOP_IP);
+	if (!orc) {
+		orc_warn("WARNING: Corrupt .orc_unwind table.  Disabling unwinder.\n");
+		return;
+	}
+	orc_lookup[lookup_num_blocks-1] = orc - __start_orc_unwind;
+
+	orc_init = true;
+}
+
+static inline bool on_stack(struct stack_info *info, unsigned long addr, size_t len)
+{
+	unsigned long begin = info->begin;
+	unsigned long end   = info->end;
+
+	return (info->type != STACK_TYPE_UNKNOWN &&
+		addr >= begin && addr < end && addr + len > begin && addr + len <= end);
+}
+
+static bool stack_access_ok(struct unwind_state *state, unsigned long addr, size_t len)
+{
+	struct stack_info *info = &state->stack_info;
+
+	if (on_stack(info, addr, len))
+		return true;
+
+	return !get_stack_info(addr, state->task, info) && on_stack(info, addr, len);
+}
+
+unsigned long unwind_get_return_address(struct unwind_state *state)
+{
+	return __unwind_get_return_address(state);
+}
+EXPORT_SYMBOL_GPL(unwind_get_return_address);
+
+void unwind_start(struct unwind_state *state, struct task_struct *task,
+		    struct pt_regs *regs)
+{
+	__unwind_start(state, task, regs);
+	state->type = UNWINDER_ORC;
+	if (!unwind_done(state) && !__kernel_text_address(state->pc))
+		unwind_next_frame(state);
+}
+EXPORT_SYMBOL_GPL(unwind_start);
+
+static bool is_entry_func(unsigned long addr)
+{
+	extern u32 kernel_entry;
+	extern u32 kernel_entry_end;
+
+	return addr >= (unsigned long)&kernel_entry && addr < (unsigned long)&kernel_entry_end;
+}
+
+static inline unsigned long bt_address(unsigned long ra)
+{
+	extern unsigned long eentry;
+
+	if (__kernel_text_address(ra))
+		return ra;
+
+	if (__module_text_address(ra))
+		return ra;
+
+	if (ra >= eentry && ra < eentry +  EXCCODE_INT_END * VECSIZE) {
+		unsigned long func;
+		unsigned long type = (ra - eentry) / VECSIZE;
+		unsigned long offset = (ra - eentry) % VECSIZE;
+
+		switch (type) {
+		case 0 ... EXCCODE_INT_START - 1:
+			func = (unsigned long)exception_table[type];
+			break;
+		case EXCCODE_INT_START ... EXCCODE_INT_END:
+			func = (unsigned long)handle_vint;
+			break;
+		default:
+			func = (unsigned long)handle_reserved;
+			break;
+		}
+
+		return func + offset;
+	}
+
+	return ra;
+}
+
+bool unwind_next_frame(struct unwind_state *state)
+{
+	unsigned long *p, pc;
+	struct pt_regs *regs;
+	struct orc_entry *orc;
+	struct stack_info *info = &state->stack_info;
+
+	if (unwind_done(state))
+		return false;
+
+	/* Don't let modules unload while we're reading their ORC data. */
+	preempt_disable();
+
+	if (is_entry_func(state->pc))
+		goto end;
+
+	orc = orc_find(state->pc);
+	if (!orc) {
+		/*
+		 * As a fallback, try to assume this code uses a frame pointer.
+		 * This is useful for generated code, like BPF, which ORC
+		 * doesn't know about.  This is just a guess, so the rest of
+		 * the unwind is no longer considered reliable.
+		 */
+		orc = &orc_fp_entry;
+		state->error = true;
+	} else {
+		if (orc->type == ORC_TYPE_UNDEFINED)
+			goto err;
+
+		if (orc->type == ORC_TYPE_END_OF_STACK)
+			goto end;
+	}
+
+	switch (orc->sp_reg) {
+	case ORC_REG_SP:
+		if (info->type == STACK_TYPE_IRQ && state->sp == info->end)
+			orc->type = ORC_TYPE_REGS;
+		else
+			state->sp = state->sp + orc->sp_offset;
+		break;
+	case ORC_REG_FP:
+		state->sp = state->fp;
+		break;
+	default:
+		orc_warn("unknown SP base reg %d at %pB\n", orc->sp_reg, (void *)state->pc);
+		goto err;
+	}
+
+	switch (orc->fp_reg) {
+	case ORC_REG_PREV_SP:
+		p = (unsigned long *)(state->sp + orc->fp_offset);
+		if (!stack_access_ok(state, (unsigned long)p, sizeof(unsigned long)))
+			goto err;
+
+		state->fp = *p;
+		break;
+	case ORC_REG_UNDEFINED:
+		/* Nothing. */
+		break;
+	default:
+		orc_warn("unknown FP base reg %d at %pB\n", orc->fp_reg, (void *)state->pc);
+		goto err;
+	}
+
+	switch (orc->type) {
+	case ORC_TYPE_CALL:
+		if (orc->ra_reg == ORC_REG_PREV_SP) {
+			p = (unsigned long *)(state->sp + orc->ra_offset);
+			if (!stack_access_ok(state, (unsigned long)p, sizeof(unsigned long)))
+				goto err;
+
+			pc = unwind_graph_addr(state, *p, state->sp);
+			pc -= LOONGARCH_INSN_SIZE;
+		} else if (orc->ra_reg == ORC_REG_UNDEFINED) {
+			if (!state->ra || state->ra == state->pc)
+				goto err;
+
+			pc = unwind_graph_addr(state, state->ra, state->sp);
+			pc -=  LOONGARCH_INSN_SIZE;
+			state->ra = 0;
+		} else {
+			orc_warn("unknown ra base reg %d at %pB\n", orc->ra_reg, (void *)state->pc);
+			goto err;
+		}
+		break;
+	case ORC_TYPE_REGS:
+		if (info->type == STACK_TYPE_IRQ && state->sp == info->end)
+			regs = (struct pt_regs *)info->next_sp;
+		else
+			regs = (struct pt_regs *)state->sp;
+
+		if (!stack_access_ok(state, (unsigned long)regs, sizeof(*regs)))
+			goto err;
+
+		if ((info->end == (unsigned long)regs + sizeof(*regs)) &&
+		    !regs->regs[3] && !regs->regs[1])
+			goto end;
+
+		if (user_mode(regs))
+			goto end;
+
+		pc = regs->csr_era;
+		if (!__kernel_text_address(pc))
+			goto err;
+
+		state->sp = regs->regs[3];
+		state->ra = regs->regs[1];
+		state->fp = regs->regs[22];
+		get_stack_info(state->sp, state->task, info);
+
+		break;
+	default:
+		orc_warn("unknown .orc_unwind entry type %d at %pB\n", orc->type, (void *)state->pc);
+		goto err;
+	}
+
+	state->pc = bt_address(pc);
+	if (!state->pc) {
+		pr_err("cannot find unwind pc at %pK\n", (void *)pc);
+		goto err;
+	}
+
+	if (!__kernel_text_address(state->pc))
+		goto err;
+
+	preempt_enable();
+	return true;
+
+err:
+	state->error = true;
+
+end:
+	preempt_enable();
+	state->stack_info.type = STACK_TYPE_UNKNOWN;
+	return false;
+}
+EXPORT_SYMBOL_GPL(unwind_next_frame);
diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S
index a5d0cd2035da..e8e97dbf9ca4 100644
--- a/arch/loongarch/kernel/vmlinux.lds.S
+++ b/arch/loongarch/kernel/vmlinux.lds.S
@@ -2,6 +2,7 @@
 #include <linux/sizes.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
+#include <asm/orc_lookup.h>
 
 #define PAGE_SIZE _PAGE_SIZE
 #define RO_EXCEPTION_TABLE_ALIGN	4
@@ -122,6 +123,8 @@ SECTIONS
 	}
 #endif
 
+	ORC_UNWIND_TABLE
+
 	.sdata : {
 		*(.sdata)
 	}
diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
index 3634431db18a..80e988985a6a 100644
--- a/arch/loongarch/kvm/switch.S
+++ b/arch/loongarch/kvm/switch.S
@@ -8,7 +8,7 @@
 #include <asm/asmmacro.h>
 #include <asm/loongarch.h>
 #include <asm/regdef.h>
-#include <asm/stackframe.h>
+#include <asm/unwind_hints.h>
 
 #define HGPR_OFFSET(x)		(PT_R0 + 8*x)
 #define GGPR_OFFSET(x)		(KVM_ARCH_GGPR + 8*x)
@@ -112,6 +112,7 @@
 	.text
 	.cfi_sections	.debug_frame
 SYM_CODE_START(kvm_exc_entry)
+	UNWIND_HINT_UNDEFINED
 	csrwr	a2,   KVM_TEMP_KS
 	csrrd	a2,   KVM_VCPU_KS
 	addi.d	a2,   a2, KVM_VCPU_ARCH
@@ -273,3 +274,9 @@ SYM_FUNC_END(kvm_restore_lasx)
 	.section ".rodata"
 SYM_DATA(kvm_exception_size, .quad kvm_exc_entry_end - kvm_exc_entry)
 SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest)
+
+#ifdef CONFIG_CPU_HAS_LBT
+STACK_FRAME_NON_STANDARD kvm_restore_fpu
+STACK_FRAME_NON_STANDARD kvm_restore_lsx
+STACK_FRAME_NON_STANDARD kvm_restore_lasx
+#endif
diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S
index be741544e62b..7a0db643b286 100644
--- a/arch/loongarch/lib/clear_user.S
+++ b/arch/loongarch/lib/clear_user.S
@@ -10,6 +10,7 @@
 #include <asm/asm-extable.h>
 #include <asm/cpu.h>
 #include <asm/regdef.h>
+#include <asm/unwind_hints.h>
 
 SYM_FUNC_START(__clear_user)
 	/*
@@ -204,3 +205,5 @@ SYM_FUNC_START(__clear_user_fast)
 	_asm_extable 28b, .Lsmall_fixup
 	_asm_extable 29b, .Lexit
 SYM_FUNC_END(__clear_user_fast)
+
+STACK_FRAME_NON_STANDARD __clear_user_fast
diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S
index feec3d362803..095ce9181c6c 100644
--- a/arch/loongarch/lib/copy_user.S
+++ b/arch/loongarch/lib/copy_user.S
@@ -10,6 +10,7 @@
 #include <asm/asm-extable.h>
 #include <asm/cpu.h>
 #include <asm/regdef.h>
+#include <asm/unwind_hints.h>
 
 SYM_FUNC_START(__copy_user)
 	/*
@@ -278,3 +279,5 @@ SYM_FUNC_START(__copy_user_fast)
 	_asm_extable 58b, .Lexit
 	_asm_extable 59b, .Lexit
 SYM_FUNC_END(__copy_user_fast)
+
+STACK_FRAME_NON_STANDARD __copy_user_fast
diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S
index fa1148878d2b..9517a2f961af 100644
--- a/arch/loongarch/lib/memcpy.S
+++ b/arch/loongarch/lib/memcpy.S
@@ -9,6 +9,7 @@
 #include <asm/asmmacro.h>
 #include <asm/cpu.h>
 #include <asm/regdef.h>
+#include <asm/unwind_hints.h>
 
 .section .noinstr.text, "ax"
 
@@ -197,3 +198,5 @@ SYM_FUNC_START(__memcpy_fast)
 	jr	ra
 SYM_FUNC_END(__memcpy_fast)
 _ASM_NOKPROBE(__memcpy_fast)
+
+STACK_FRAME_NON_STANDARD __memcpy_small
diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S
index 06d3ca54cbfe..df3846620553 100644
--- a/arch/loongarch/lib/memset.S
+++ b/arch/loongarch/lib/memset.S
@@ -9,6 +9,7 @@
 #include <asm/asmmacro.h>
 #include <asm/cpu.h>
 #include <asm/regdef.h>
+#include <asm/unwind_hints.h>
 
 .macro fill_to_64 r0
 	bstrins.d \r0, \r0, 15, 8
@@ -166,3 +167,5 @@ SYM_FUNC_START(__memset_fast)
 	jr	ra
 SYM_FUNC_END(__memset_fast)
 _ASM_NOKPROBE(__memset_fast)
+
+STACK_FRAME_NON_STANDARD __memset_fast
diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c
index 0b95d32b30c9..5ac9beb5f093 100644
--- a/arch/loongarch/mm/tlb.c
+++ b/arch/loongarch/mm/tlb.c
@@ -9,8 +9,9 @@
 #include <linux/hugetlb.h>
 #include <linux/export.h>
 
-#include <asm/cpu.h>
 #include <asm/bootinfo.h>
+#include <asm/cpu.h>
+#include <asm/exception.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/tlb.h>
@@ -266,24 +267,20 @@ static void setup_tlb_handler(int cpu)
 	setup_ptwalker();
 	local_flush_tlb_all();
 
+	if (cpu_has_ptw) {
+		exception_table[EXCCODE_TLBI] = handle_tlb_load_ptw;
+		exception_table[EXCCODE_TLBL] = handle_tlb_load_ptw;
+		exception_table[EXCCODE_TLBS] = handle_tlb_store_ptw;
+		exception_table[EXCCODE_TLBM] = handle_tlb_modify_ptw;
+	}
+
 	/* The tlb handlers are generated only once */
 	if (cpu == 0) {
 		memcpy((void *)tlbrentry, handle_tlb_refill, 0x80);
 		local_flush_icache_range(tlbrentry, tlbrentry + 0x80);
-		if (!cpu_has_ptw) {
-			set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load, VECSIZE);
-			set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load, VECSIZE);
-			set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store, VECSIZE);
-			set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify, VECSIZE);
-		} else {
-			set_handler(EXCCODE_TLBI * VECSIZE, handle_tlb_load_ptw, VECSIZE);
-			set_handler(EXCCODE_TLBL * VECSIZE, handle_tlb_load_ptw, VECSIZE);
-			set_handler(EXCCODE_TLBS * VECSIZE, handle_tlb_store_ptw, VECSIZE);
-			set_handler(EXCCODE_TLBM * VECSIZE, handle_tlb_modify_ptw, VECSIZE);
-		}
-		set_handler(EXCCODE_TLBNR * VECSIZE, handle_tlb_protect, VECSIZE);
-		set_handler(EXCCODE_TLBNX * VECSIZE, handle_tlb_protect, VECSIZE);
-		set_handler(EXCCODE_TLBPE * VECSIZE, handle_tlb_protect, VECSIZE);
+
+		for (int i = EXCCODE_TLBL; i <= EXCCODE_TLBPE; i++)
+			set_handler(i * VECSIZE, exception_table[i], VECSIZE);
 	} else {
 		int vec_sz __maybe_unused;
 		void *addr __maybe_unused;
diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S
index d5d682f3d29f..a44387b838af 100644
--- a/arch/loongarch/mm/tlbex.S
+++ b/arch/loongarch/mm/tlbex.S
@@ -18,6 +18,7 @@
 
 	.macro tlb_do_page_fault, write
 	SYM_CODE_START(tlb_do_page_fault_\write)
+	UNWIND_HINT_UNDEFINED
 	SAVE_ALL
 	csrrd		a2, LOONGARCH_CSR_BADV
 	move		a0, sp
@@ -32,6 +33,7 @@
 	tlb_do_page_fault 1
 
 SYM_CODE_START(handle_tlb_protect)
+	UNWIND_HINT_UNDEFINED
 	BACKUP_T0T1
 	SAVE_ALL
 	move		a0, sp
@@ -44,6 +46,7 @@ SYM_CODE_START(handle_tlb_protect)
 SYM_CODE_END(handle_tlb_protect)
 
 SYM_CODE_START(handle_tlb_load)
+	UNWIND_HINT_UNDEFINED
 	csrwr		t0, EXCEPTION_KS0
 	csrwr		t1, EXCEPTION_KS1
 	csrwr		ra, EXCEPTION_KS2
@@ -190,6 +193,7 @@ nopage_tlb_load:
 SYM_CODE_END(handle_tlb_load)
 
 SYM_CODE_START(handle_tlb_load_ptw)
+	UNWIND_HINT_UNDEFINED
 	csrwr		t0, LOONGARCH_CSR_KS0
 	csrwr		t1, LOONGARCH_CSR_KS1
 	la_abs		t0, tlb_do_page_fault_0
@@ -197,6 +201,7 @@ SYM_CODE_START(handle_tlb_load_ptw)
 SYM_CODE_END(handle_tlb_load_ptw)
 
 SYM_CODE_START(handle_tlb_store)
+	UNWIND_HINT_UNDEFINED
 	csrwr		t0, EXCEPTION_KS0
 	csrwr		t1, EXCEPTION_KS1
 	csrwr		ra, EXCEPTION_KS2
@@ -346,6 +351,7 @@ nopage_tlb_store:
 SYM_CODE_END(handle_tlb_store)
 
 SYM_CODE_START(handle_tlb_store_ptw)
+	UNWIND_HINT_UNDEFINED
 	csrwr		t0, LOONGARCH_CSR_KS0
 	csrwr		t1, LOONGARCH_CSR_KS1
 	la_abs		t0, tlb_do_page_fault_1
@@ -353,6 +359,7 @@ SYM_CODE_START(handle_tlb_store_ptw)
 SYM_CODE_END(handle_tlb_store_ptw)
 
 SYM_CODE_START(handle_tlb_modify)
+	UNWIND_HINT_UNDEFINED
 	csrwr		t0, EXCEPTION_KS0
 	csrwr		t1, EXCEPTION_KS1
 	csrwr		ra, EXCEPTION_KS2
@@ -500,6 +507,7 @@ nopage_tlb_modify:
 SYM_CODE_END(handle_tlb_modify)
 
 SYM_CODE_START(handle_tlb_modify_ptw)
+	UNWIND_HINT_UNDEFINED
 	csrwr		t0, LOONGARCH_CSR_KS0
 	csrwr		t1, LOONGARCH_CSR_KS1
 	la_abs		t0, tlb_do_page_fault_1
@@ -507,6 +515,7 @@ SYM_CODE_START(handle_tlb_modify_ptw)
 SYM_CODE_END(handle_tlb_modify_ptw)
 
 SYM_CODE_START(handle_tlb_refill)
+	UNWIND_HINT_UNDEFINED
 	csrwr		t0, LOONGARCH_CSR_TLBRSAVE
 	csrrd		t0, LOONGARCH_CSR_PGD
 	lddir		t0, t0, 3
diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile
index f597cd08a96b..75c6726382c3 100644
--- a/arch/loongarch/vdso/Makefile
+++ b/arch/loongarch/vdso/Makefile
@@ -4,6 +4,7 @@
 KASAN_SANITIZE := n
 UBSAN_SANITIZE := n
 KCOV_INSTRUMENT := n
+OBJECT_FILES_NON_STANDARD := y
 
 # Include the generic Makefile to check the built vdso.
 include $(srctree)/lib/vdso/Makefile
diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index 7791673e547b..99718f3dc686 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -846,6 +846,6 @@ static void amiga_get_hardware_list(struct seq_file *m)
  * The Amiga keyboard driver needs key_maps, but we cannot export it in
  * drivers/char/defkeymap.c, as it is autogenerated
  */
-#ifdef CONFIG_HW_CONSOLE
+#ifdef CONFIG_VT
 EXPORT_SYMBOL_GPL(key_maps);
 #endif
diff --git a/arch/m68k/hp300/config.c b/arch/m68k/hp300/config.c
index e4bd6913f50e..1a2739852351 100644
--- a/arch/m68k/hp300/config.c
+++ b/arch/m68k/hp300/config.c
@@ -10,6 +10,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/serial_8250.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/console.h>
@@ -67,9 +68,6 @@ static char *hp300_models[] __initdata = {
 static char hp300_model_name[13] = "HP9000/";
 
 extern void hp300_reset(void);
-#ifdef CONFIG_SERIAL_8250_CONSOLE
-extern int hp300_setup_serial_console(void) __init;
-#endif
 
 int __init hp300_parse_bootinfo(const struct bi_record *record)
 {
@@ -263,7 +261,5 @@ void __init config_hp300(void)
 	} else {
 		panic("Unknown HP9000 Model");
 	}
-#ifdef CONFIG_SERIAL_8250_CONSOLE
 	hp300_setup_serial_console();
-#endif
 }
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index ee29c4c8d7c1..daafeb20f993 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -238,9 +238,9 @@ config PARISC_HUGE_KERNEL
 	def_bool y if !MODULES || UBSAN || FTRACE || COMPILE_TEST
 
 config MLONGCALLS
-	def_bool y if PARISC_HUGE_KERNEL
 	bool "Enable the -mlong-calls compiler option for big kernels" if !PARISC_HUGE_KERNEL
 	depends on PA8X00
+	default PARISC_HUGE_KERNEL
 	help
 	  If you configure the kernel to include many drivers built-in instead
 	  as modules, the kernel executable may become too big, so that the
@@ -255,9 +255,9 @@ config MLONGCALLS
 	  Enabling this option will probably slow down your kernel.
 
 config 64BIT
-	def_bool y if "$(ARCH)" = "parisc64"
 	bool "64-bit kernel" if "$(ARCH)" = "parisc"
 	depends on PA8X00
+	default "$(ARCH)" = "parisc64"
 	help
 	  Enable this if you want to support 64bit kernel on PA-RISC platform.
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a68b9e637eda..1c4be3373686 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -607,11 +607,6 @@ config PPC64_SUPPORTS_MEMORY_FAILURE
 config ARCH_SUPPORTS_KEXEC
 	def_bool PPC_BOOK3S || PPC_E500 || (44x && !SMP)
 
-config ARCH_SELECTS_KEXEC
-	def_bool y
-	depends on KEXEC
-	select CRASH_DUMP
-
 config ARCH_SUPPORTS_KEXEC_FILE
 	def_bool PPC64
 
@@ -622,7 +617,6 @@ config ARCH_SELECTS_KEXEC_FILE
 	def_bool y
 	depends on KEXEC_FILE
 	select KEXEC_ELF
-	select CRASH_DUMP
 	select HAVE_IMA_KEXEC if IMA
 
 config PPC64_BIG_ENDIAN_ELF_ABI_V2
@@ -694,8 +688,7 @@ config ARCH_SELECTS_CRASH_DUMP
 
 config FA_DUMP
 	bool "Firmware-assisted dump"
-	depends on PPC64 && (PPC_RTAS || PPC_POWERNV)
-	select CRASH_DUMP
+	depends on CRASH_DUMP && PPC64 && (PPC_RTAS || PPC_POWERNV)
 	help
 	  A robust mechanism to get reliable kernel crash dump with
 	  assistance from firmware. This approach does not use kexec,
diff --git a/arch/powerpc/boot/dts/akebono.dts b/arch/powerpc/boot/dts/akebono.dts
index df18f8dc4642..343326c30380 100644
--- a/arch/powerpc/boot/dts/akebono.dts
+++ b/arch/powerpc/boot/dts/akebono.dts
@@ -126,7 +126,7 @@
 			interrupts = <93 2>;
 		};
 
-		EHCI0: ehci@30010000000 {
+		EHCI0: usb@30010000000 {
 			compatible = "ibm,476gtr-ehci", "generic-ehci";
 			reg = <0x300 0x10000000 0x0 0x10000>;
 			interrupt-parent = <&MPIC>;
@@ -140,14 +140,14 @@
 			interrupt-parent = <&MPIC>;
 		};
 
-		OHCI0: ohci@30010010000 {
+		OHCI0: usb@30010010000 {
 			compatible = "ibm,476gtr-ohci", "generic-ohci";
 			reg = <0x300 0x10010000 0x0 0x10000>;
 			interrupt-parent = <&MPIC>;
 			interrupts = <89 1>;
 			};
 
-		OHCI1: ohci@30010020000 {
+		OHCI1: usb@30010020000 {
 			compatible = "ibm,476gtr-ohci", "generic-ohci";
 			reg = <0x300 0x10020000 0x0 0x10000>;
 			interrupt-parent = <&MPIC>;
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index e1b43aa12175..fdb90e24dc74 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -55,59 +55,18 @@
 typedef void (*crash_shutdown_t)(void);
 
 #ifdef CONFIG_KEXEC_CORE
-
-/*
- * This function is responsible for capturing register states if coming
- * via panic or invoking dump using sysrq-trigger.
- */
-static inline void crash_setup_regs(struct pt_regs *newregs,
-					struct pt_regs *oldregs)
-{
-	if (oldregs)
-		memcpy(newregs, oldregs, sizeof(*newregs));
-	else
-		ppc_save_regs(newregs);
-}
+struct kimage;
+struct pt_regs;
 
 extern void kexec_smp_wait(void);	/* get and clear naca physid, wait for
 					  master to copy new code to 0 */
-extern int crashing_cpu;
-extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *));
-extern void crash_ipi_callback(struct pt_regs *);
-extern int crash_wake_offline;
-
-struct kimage;
-struct pt_regs;
 extern void default_machine_kexec(struct kimage *image);
-extern void default_machine_crash_shutdown(struct pt_regs *regs);
-extern int crash_shutdown_register(crash_shutdown_t handler);
-extern int crash_shutdown_unregister(crash_shutdown_t handler);
-
-extern void crash_kexec_prepare(void);
-extern void crash_kexec_secondary(struct pt_regs *regs);
-int __init overlaps_crashkernel(unsigned long start, unsigned long size);
-extern void reserve_crashkernel(void);
 extern void machine_kexec_mask_interrupts(void);
 
-static inline bool kdump_in_progress(void)
-{
-	return crashing_cpu >= 0;
-}
-
 void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_code_buffer,
 			 unsigned long start_address) __noreturn;
-
 void kexec_copy_flush(struct kimage *image);
 
-#if defined(CONFIG_CRASH_DUMP)
-bool is_kdump_kernel(void);
-#define is_kdump_kernel			is_kdump_kernel
-#if defined(CONFIG_PPC_RTAS)
-void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
-#define crash_free_reserved_phys_range crash_free_reserved_phys_range
-#endif /* CONFIG_PPC_RTAS */
-#endif /* CONFIG_CRASH_DUMP */
-
 #ifdef CONFIG_KEXEC_FILE
 extern const struct kexec_file_ops kexec_elf64_ops;
 
@@ -152,15 +111,56 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
 
 #endif /* CONFIG_KEXEC_FILE */
 
-#else /* !CONFIG_KEXEC_CORE */
-static inline void crash_kexec_secondary(struct pt_regs *regs) { }
+#endif /* CONFIG_KEXEC_CORE */
+
+#ifdef CONFIG_CRASH_RESERVE
+int __init overlaps_crashkernel(unsigned long start, unsigned long size);
+extern void reserve_crashkernel(void);
+#else
+static inline void reserve_crashkernel(void) {}
+static inline int overlaps_crashkernel(unsigned long start, unsigned long size) { return 0; }
+#endif
 
-static inline int overlaps_crashkernel(unsigned long start, unsigned long size)
+#if defined(CONFIG_CRASH_DUMP)
+/*
+ * This function is responsible for capturing register states if coming
+ * via panic or invoking dump using sysrq-trigger.
+ */
+static inline void crash_setup_regs(struct pt_regs *newregs,
+					struct pt_regs *oldregs)
 {
-	return 0;
+	if (oldregs)
+		memcpy(newregs, oldregs, sizeof(*newregs));
+	else
+		ppc_save_regs(newregs);
+}
+
+extern int crashing_cpu;
+extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *));
+extern void crash_ipi_callback(struct pt_regs *regs);
+extern int crash_wake_offline;
+
+extern int crash_shutdown_register(crash_shutdown_t handler);
+extern int crash_shutdown_unregister(crash_shutdown_t handler);
+extern void default_machine_crash_shutdown(struct pt_regs *regs);
+
+extern void crash_kexec_prepare(void);
+extern void crash_kexec_secondary(struct pt_regs *regs);
+
+static inline bool kdump_in_progress(void)
+{
+	return crashing_cpu >= 0;
 }
 
-static inline void reserve_crashkernel(void) { ; }
+bool is_kdump_kernel(void);
+#define is_kdump_kernel			is_kdump_kernel
+#if defined(CONFIG_PPC_RTAS)
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
+#define crash_free_reserved_phys_range crash_free_reserved_phys_range
+#endif /* CONFIG_PPC_RTAS */
+
+#else /* !CONFIG_CRASH_DUMP */
+static inline void crash_kexec_secondary(struct pt_regs *regs) { }
 
 static inline int crash_shutdown_register(crash_shutdown_t handler)
 {
@@ -183,7 +183,7 @@ static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
 {
 }
 
-#endif /* CONFIG_KEXEC_CORE */
+#endif /* CONFIG_CRASH_DUMP */
 
 #ifdef CONFIG_PPC_BOOK3S_64
 #include <asm/book3s/64/kexec.h>
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 1dc32a058156..cd8d8883de90 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -475,7 +475,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node,
 		tce_alloc_end = *lprop;
 #endif
 
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_RESERVE
 	lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL);
 	if (lprop)
 		crashk_res.start = *lprop;
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 2add292da494..01ed1263e1a9 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -110,7 +110,7 @@ int ppc_do_canonicalize_irqs;
 EXPORT_SYMBOL(ppc_do_canonicalize_irqs);
 #endif
 
-#ifdef CONFIG_VMCORE_INFO
+#ifdef CONFIG_CRASH_DUMP
 /* This keeps a track of which one is the crashing cpu. */
 int crashing_cpu = -1;
 #endif
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index a60e4139214b..12e53b3d7923 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -588,7 +588,7 @@ void smp_send_debugger_break(void)
 }
 #endif
 
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
 void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
 {
 	int cpu;
@@ -631,7 +631,7 @@ void crash_smp_send_stop(void)
 
 	stopped = true;
 
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
 	if (kexec_crash_image) {
 		crash_kexec_prepare();
 		return;
diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile
index 91e96f5168b7..8e469c4da3f8 100644
--- a/arch/powerpc/kexec/Makefile
+++ b/arch/powerpc/kexec/Makefile
@@ -3,12 +3,13 @@
 # Makefile for the linux kernel.
 #
 
-obj-y				+= core.o crash.o core_$(BITS).o
+obj-y				+= core.o core_$(BITS).o
 
 obj-$(CONFIG_PPC32)		+= relocate_32.o
 
 obj-$(CONFIG_KEXEC_FILE)	+= file_load.o ranges.o file_load_$(BITS).o elf_$(BITS).o
 obj-$(CONFIG_VMCORE_INFO)	+= vmcore_info.o
+obj-$(CONFIG_CRASH_DUMP)	+= crash.o
 
 # Disable GCOV, KCOV & sanitizers in odd or sensitive code
 GCOV_PROFILE_core_$(BITS).o := n
diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
index 3ff4411ed496..b8333a49ea5d 100644
--- a/arch/powerpc/kexec/core.c
+++ b/arch/powerpc/kexec/core.c
@@ -44,10 +44,12 @@ void machine_kexec_mask_interrupts(void) {
 	}
 }
 
+#ifdef CONFIG_CRASH_DUMP
 void machine_crash_shutdown(struct pt_regs *regs)
 {
 	default_machine_crash_shutdown(regs);
 }
+#endif
 
 void machine_kexec_cleanup(struct kimage *image)
 {
@@ -77,6 +79,7 @@ void machine_kexec(struct kimage *image)
 	for(;;);
 }
 
+#ifdef CONFIG_CRASH_RESERVE
 void __init reserve_crashkernel(void)
 {
 	unsigned long long crash_size, crash_base, total_mem_sz;
@@ -251,3 +254,4 @@ static int __init kexec_setup(void)
 	return 0;
 }
 late_initcall(kexec_setup);
+#endif /* CONFIG_CRASH_RESERVE */
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 904016cf89ea..6d8951e8e966 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -47,7 +47,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
 	if (ret)
 		return ERR_PTR(ret);
 
-	if (image->type == KEXEC_TYPE_CRASH) {
+	if (IS_ENABLED(CONFIG_CRASH_DUMP) && image->type == KEXEC_TYPE_CRASH) {
 		/* min & max buffer values for kdump case */
 		kbuf.buf_min = pbuf.buf_min = crashk_res.start;
 		kbuf.buf_max = pbuf.buf_max =
@@ -70,7 +70,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
 	kexec_dprintk("Loaded purgatory at 0x%lx\n", pbuf.mem);
 
 	/* Load additional segments needed for panic kernel */
-	if (image->type == KEXEC_TYPE_CRASH) {
+	if (IS_ENABLED(CONFIG_CRASH_DUMP) && image->type == KEXEC_TYPE_CRASH) {
 		ret = load_crashdump_segments_ppc64(image, &kbuf);
 		if (ret) {
 			pr_err("Failed to load kdump kernel segments\n");
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index 5b4c5cb23354..1bc65de6174f 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -97,119 +97,6 @@ out:
 }
 
 /**
- * get_usable_memory_ranges - Get usable memory ranges. This list includes
- *                            regions like crashkernel, opal/rtas & tce-table,
- *                            that kdump kernel could use.
- * @mem_ranges:               Range list to add the memory ranges to.
- *
- * Returns 0 on success, negative errno on error.
- */
-static int get_usable_memory_ranges(struct crash_mem **mem_ranges)
-{
-	int ret;
-
-	/*
-	 * Early boot failure observed on guests when low memory (first memory
-	 * block?) is not added to usable memory. So, add [0, crashk_res.end]
-	 * instead of [crashk_res.start, crashk_res.end] to workaround it.
-	 * Also, crashed kernel's memory must be added to reserve map to
-	 * avoid kdump kernel from using it.
-	 */
-	ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
-	if (ret)
-		goto out;
-
-	ret = add_rtas_mem_range(mem_ranges);
-	if (ret)
-		goto out;
-
-	ret = add_opal_mem_range(mem_ranges);
-	if (ret)
-		goto out;
-
-	ret = add_tce_mem_ranges(mem_ranges);
-out:
-	if (ret)
-		pr_err("Failed to setup usable memory ranges\n");
-	return ret;
-}
-
-/**
- * get_crash_memory_ranges - Get crash memory ranges. This list includes
- *                           first/crashing kernel's memory regions that
- *                           would be exported via an elfcore.
- * @mem_ranges:              Range list to add the memory ranges to.
- *
- * Returns 0 on success, negative errno on error.
- */
-static int get_crash_memory_ranges(struct crash_mem **mem_ranges)
-{
-	phys_addr_t base, end;
-	struct crash_mem *tmem;
-	u64 i;
-	int ret;
-
-	for_each_mem_range(i, &base, &end) {
-		u64 size = end - base;
-
-		/* Skip backup memory region, which needs a separate entry */
-		if (base == BACKUP_SRC_START) {
-			if (size > BACKUP_SRC_SIZE) {
-				base = BACKUP_SRC_END + 1;
-				size -= BACKUP_SRC_SIZE;
-			} else
-				continue;
-		}
-
-		ret = add_mem_range(mem_ranges, base, size);
-		if (ret)
-			goto out;
-
-		/* Try merging adjacent ranges before reallocation attempt */
-		if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
-			sort_memory_ranges(*mem_ranges, true);
-	}
-
-	/* Reallocate memory ranges if there is no space to split ranges */
-	tmem = *mem_ranges;
-	if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
-		tmem = realloc_mem_ranges(mem_ranges);
-		if (!tmem)
-			goto out;
-	}
-
-	/* Exclude crashkernel region */
-	ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);
-	if (ret)
-		goto out;
-
-	/*
-	 * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL
-	 *        regions are exported to save their context at the time of
-	 *        crash, they should actually be backed up just like the
-	 *        first 64K bytes of memory.
-	 */
-	ret = add_rtas_mem_range(mem_ranges);
-	if (ret)
-		goto out;
-
-	ret = add_opal_mem_range(mem_ranges);
-	if (ret)
-		goto out;
-
-	/* create a separate program header for the backup region */
-	ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE);
-	if (ret)
-		goto out;
-
-	sort_memory_ranges(*mem_ranges, false);
-out:
-	if (ret)
-		pr_err("Failed to setup crash memory ranges\n");
-	return ret;
-}
-
-/**
  * get_reserved_memory_ranges - Get reserve memory ranges. This list includes
  *                              memory regions that should be added to the
  *                              memory reserve map to ensure the region is
@@ -434,6 +321,120 @@ static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf,
 	return ret;
 }
 
+#ifdef CONFIG_CRASH_DUMP
+/**
+ * get_usable_memory_ranges - Get usable memory ranges. This list includes
+ *                            regions like crashkernel, opal/rtas & tce-table,
+ *                            that kdump kernel could use.
+ * @mem_ranges:               Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int get_usable_memory_ranges(struct crash_mem **mem_ranges)
+{
+	int ret;
+
+	/*
+	 * Early boot failure observed on guests when low memory (first memory
+	 * block?) is not added to usable memory. So, add [0, crashk_res.end]
+	 * instead of [crashk_res.start, crashk_res.end] to workaround it.
+	 * Also, crashed kernel's memory must be added to reserve map to
+	 * avoid kdump kernel from using it.
+	 */
+	ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
+	if (ret)
+		goto out;
+
+	ret = add_rtas_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_opal_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_tce_mem_ranges(mem_ranges);
+out:
+	if (ret)
+		pr_err("Failed to setup usable memory ranges\n");
+	return ret;
+}
+
+/**
+ * get_crash_memory_ranges - Get crash memory ranges. This list includes
+ *                           first/crashing kernel's memory regions that
+ *                           would be exported via an elfcore.
+ * @mem_ranges:              Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int get_crash_memory_ranges(struct crash_mem **mem_ranges)
+{
+	phys_addr_t base, end;
+	struct crash_mem *tmem;
+	u64 i;
+	int ret;
+
+	for_each_mem_range(i, &base, &end) {
+		u64 size = end - base;
+
+		/* Skip backup memory region, which needs a separate entry */
+		if (base == BACKUP_SRC_START) {
+			if (size > BACKUP_SRC_SIZE) {
+				base = BACKUP_SRC_END + 1;
+				size -= BACKUP_SRC_SIZE;
+			} else
+				continue;
+		}
+
+		ret = add_mem_range(mem_ranges, base, size);
+		if (ret)
+			goto out;
+
+		/* Try merging adjacent ranges before reallocation attempt */
+		if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
+			sort_memory_ranges(*mem_ranges, true);
+	}
+
+	/* Reallocate memory ranges if there is no space to split ranges */
+	tmem = *mem_ranges;
+	if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
+		tmem = realloc_mem_ranges(mem_ranges);
+		if (!tmem)
+			goto out;
+	}
+
+	/* Exclude crashkernel region */
+	ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);
+	if (ret)
+		goto out;
+
+	/*
+	 * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL
+	 *        regions are exported to save their context at the time of
+	 *        crash, they should actually be backed up just like the
+	 *        first 64K bytes of memory.
+	 */
+	ret = add_rtas_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_opal_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	/* create a separate program header for the backup region */
+	ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE);
+	if (ret)
+		goto out;
+
+	sort_memory_ranges(*mem_ranges, false);
+out:
+	if (ret)
+		pr_err("Failed to setup crash memory ranges\n");
+	return ret;
+}
+
 /**
  * check_realloc_usable_mem - Reallocate buffer if it can't accommodate entries
  * @um_info:                  Usable memory buffer and ranges info.
@@ -863,6 +864,7 @@ int load_crashdump_segments_ppc64(struct kimage *image,
 
 	return 0;
 }
+#endif
 
 /**
  * setup_purgatory_ppc64 - initialize PPC64 specific purgatory's global
@@ -972,26 +974,14 @@ static unsigned int cpu_node_size(void)
 	return size;
 }
 
-/**
- * kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to
- *                              setup FDT for kexec/kdump kernel.
- * @image:                      kexec image being loaded.
- *
- * Returns the estimated extra size needed for kexec/kdump kernel FDT.
- */
-unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
+static unsigned int kdump_extra_fdt_size_ppc64(struct kimage *image)
 {
 	unsigned int cpu_nodes, extra_size = 0;
 	struct device_node *dn;
 	u64 usm_entries;
 
-	// Budget some space for the password blob. There's already extra space
-	// for the key name
-	if (plpks_is_available())
-		extra_size += (unsigned int)plpks_get_passwordlen();
-
-	if (image->type != KEXEC_TYPE_CRASH)
-		return extra_size;
+	if (!IS_ENABLED(CONFIG_CRASH_DUMP) || image->type != KEXEC_TYPE_CRASH)
+		return 0;
 
 	/*
 	 * For kdump kernel, account for linux,usable-memory and
@@ -1020,6 +1010,25 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
 }
 
 /**
+ * kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to
+ *                              setup FDT for kexec/kdump kernel.
+ * @image:                      kexec image being loaded.
+ *
+ * Returns the estimated extra size needed for kexec/kdump kernel FDT.
+ */
+unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
+{
+	unsigned int extra_size = 0;
+
+	// Budget some space for the password blob. There's already extra space
+	// for the key name
+	if (plpks_is_available())
+		extra_size += (unsigned int)plpks_get_passwordlen();
+
+	return extra_size + kdump_extra_fdt_size_ppc64(image);
+}
+
+/**
  * add_node_props - Reads node properties from device node structure and add
  *                  them to fdt.
  * @fdt:            Flattened device tree of the kernel
@@ -1171,6 +1180,7 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
 	struct crash_mem *umem = NULL, *rmem = NULL;
 	int i, nr_ranges, ret;
 
+#ifdef CONFIG_CRASH_DUMP
 	/*
 	 * Restrict memory usage for kdump kernel by setting up
 	 * usable memory ranges and memory reserve map.
@@ -1207,6 +1217,7 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
 			goto out;
 		}
 	}
+#endif
 
 	/* Update cpus nodes information to account hotplug CPUs. */
 	ret =  update_cpus_node(fdt);
@@ -1278,7 +1289,7 @@ int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
 	buf_min = kbuf->buf_min;
 	buf_max = kbuf->buf_max;
 	/* Segments for kdump kernel should be within crashkernel region */
-	if (kbuf->image->type == KEXEC_TYPE_CRASH) {
+	if (IS_ENABLED(CONFIG_CRASH_DUMP) && kbuf->image->type == KEXEC_TYPE_CRASH) {
 		buf_min = (buf_min < crashk_res.start ?
 			   crashk_res.start : buf_min);
 		buf_max = (buf_max > crashk_res.end ?
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 5445587bfe84..100f999871bc 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -193,7 +193,7 @@ static bool is_module_segment(unsigned long addr)
 	return true;
 }
 
-void mmu_mark_initmem_nx(void)
+int mmu_mark_initmem_nx(void)
 {
 	int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
 	int i;
@@ -230,9 +230,10 @@ void mmu_mark_initmem_nx(void)
 
 		mtsr(mfsr(i << 28) | 0x10000000, i << 28);
 	}
+	return 0;
 }
 
-void mmu_mark_rodata_ro(void)
+int mmu_mark_rodata_ro(void)
 {
 	int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
 	int i;
@@ -245,6 +246,8 @@ void mmu_mark_rodata_ro(void)
 	}
 
 	update_bats();
+
+	return 0;
 }
 
 /*
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 8e84bc214d13..6949c2c937e7 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -160,11 +160,11 @@ static inline unsigned long p_block_mapped(phys_addr_t pa) { return 0; }
 #endif
 
 #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC_E500)
-void mmu_mark_initmem_nx(void);
-void mmu_mark_rodata_ro(void);
+int mmu_mark_initmem_nx(void);
+int mmu_mark_rodata_ro(void);
 #else
-static inline void mmu_mark_initmem_nx(void) { }
-static inline void mmu_mark_rodata_ro(void) { }
+static inline int mmu_mark_initmem_nx(void) { return 0; }
+static inline int mmu_mark_rodata_ro(void) { return 0; }
 #endif
 
 #ifdef CONFIG_PPC_8xx
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 6be6421086ed..43d4842bb1c7 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -119,23 +119,26 @@ void __init mmu_mapin_immr(void)
 				    PAGE_KERNEL_NCG, MMU_PAGE_512K, true);
 }
 
-static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top,
-				pgprot_t prot, bool new)
+static int mmu_mapin_ram_chunk(unsigned long offset, unsigned long top,
+			       pgprot_t prot, bool new)
 {
 	unsigned long v = PAGE_OFFSET + offset;
 	unsigned long p = offset;
+	int err = 0;
 
 	WARN_ON(!IS_ALIGNED(offset, SZ_512K) || !IS_ALIGNED(top, SZ_512K));
 
-	for (; p < ALIGN(p, SZ_8M) && p < top; p += SZ_512K, v += SZ_512K)
-		__early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new);
-	for (; p < ALIGN_DOWN(top, SZ_8M) && p < top; p += SZ_8M, v += SZ_8M)
-		__early_map_kernel_hugepage(v, p, prot, MMU_PAGE_8M, new);
-	for (; p < ALIGN_DOWN(top, SZ_512K) && p < top; p += SZ_512K, v += SZ_512K)
-		__early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new);
+	for (; p < ALIGN(p, SZ_8M) && p < top && !err; p += SZ_512K, v += SZ_512K)
+		err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new);
+	for (; p < ALIGN_DOWN(top, SZ_8M) && p < top && !err; p += SZ_8M, v += SZ_8M)
+		err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_8M, new);
+	for (; p < ALIGN_DOWN(top, SZ_512K) && p < top && !err; p += SZ_512K, v += SZ_512K)
+		err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new);
 
 	if (!new)
 		flush_tlb_kernel_range(PAGE_OFFSET + v, PAGE_OFFSET + top);
+
+	return err;
 }
 
 unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
@@ -166,27 +169,33 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
 	return top;
 }
 
-void mmu_mark_initmem_nx(void)
+int mmu_mark_initmem_nx(void)
 {
 	unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M);
 	unsigned long sinittext = __pa(_sinittext);
 	unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8;
 	unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
+	int err = 0;
 
 	if (!debug_pagealloc_enabled_or_kfence())
-		mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false);
+		err = mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false);
 
 	mmu_pin_tlb(block_mapped_ram, false);
+
+	return err;
 }
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
-void mmu_mark_rodata_ro(void)
+int mmu_mark_rodata_ro(void)
 {
 	unsigned long sinittext = __pa(_sinittext);
+	int err;
 
-	mmu_mapin_ram_chunk(0, sinittext, PAGE_KERNEL_ROX, false);
+	err = mmu_mapin_ram_chunk(0, sinittext, PAGE_KERNEL_ROX, false);
 	if (IS_ENABLED(CONFIG_PIN_TLB_DATA))
 		mmu_pin_tlb(block_mapped_ram, true);
+
+	return err;
 }
 #endif
 
diff --git a/arch/powerpc/mm/nohash/e500.c b/arch/powerpc/mm/nohash/e500.c
index 921c3521ec11..266fb22131fc 100644
--- a/arch/powerpc/mm/nohash/e500.c
+++ b/arch/powerpc/mm/nohash/e500.c
@@ -285,19 +285,23 @@ void __init adjust_total_lowmem(void)
 }
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
-void mmu_mark_rodata_ro(void)
+int mmu_mark_rodata_ro(void)
 {
 	unsigned long remapped;
 
 	remapped = map_mem_in_cams(__max_low_memory, CONFIG_LOWMEM_CAM_NUM, false, false);
 
-	WARN_ON(__max_low_memory != remapped);
+	if (WARN_ON(__max_low_memory != remapped))
+		return -EINVAL;
+
+	return 0;
 }
 #endif
 
-void mmu_mark_initmem_nx(void)
+int mmu_mark_initmem_nx(void)
 {
 	/* Everything is done in mmu_mark_rodata_ro() */
+	return 0;
 }
 
 void setup_initial_memory_limit(phys_addr_t first_memblock_base,
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index face94977cb2..cfd622ebf774 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -130,31 +130,41 @@ void __init mapin_ram(void)
 	}
 }
 
-void mark_initmem_nx(void)
+static int __mark_initmem_nx(void)
 {
 	unsigned long numpages = PFN_UP((unsigned long)_einittext) -
 				 PFN_DOWN((unsigned long)_sinittext);
+	int err;
 
-	mmu_mark_initmem_nx();
+	err = mmu_mark_initmem_nx();
 
 	if (!v_block_mapped((unsigned long)_sinittext)) {
-		set_memory_nx((unsigned long)_sinittext, numpages);
-		set_memory_rw((unsigned long)_sinittext, numpages);
+		err = set_memory_nx((unsigned long)_sinittext, numpages);
+		if (err)
+			return err;
+		err = set_memory_rw((unsigned long)_sinittext, numpages);
 	}
+	return err;
+}
+
+void mark_initmem_nx(void)
+{
+	int err = __mark_initmem_nx();
+
+	if (err)
+		panic("%s() failed, err = %d\n", __func__, err);
 }
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
-void mark_rodata_ro(void)
+static int __mark_rodata_ro(void)
 {
 	unsigned long numpages;
 
 	if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX) && mmu_has_feature(MMU_FTR_HPTE_TABLE))
 		pr_warn("This platform has HASH MMU, STRICT_MODULE_RWX won't work\n");
 
-	if (v_block_mapped((unsigned long)_stext + 1)) {
-		mmu_mark_rodata_ro();
-		return;
-	}
+	if (v_block_mapped((unsigned long)_stext + 1))
+		return mmu_mark_rodata_ro();
 
 	/*
 	 * mark text and rodata as read only. __end_rodata is set by
@@ -164,6 +174,14 @@ void mark_rodata_ro(void)
 	numpages = PFN_UP((unsigned long)__end_rodata) -
 		   PFN_DOWN((unsigned long)_stext);
 
-	set_memory_ro((unsigned long)_stext, numpages);
+	return set_memory_ro((unsigned long)_stext, numpages);
+}
+
+void mark_rodata_ro(void)
+{
+	int err = __mark_rodata_ro();
+
+	if (err)
+		panic("%s() failed, err = %d\n", __func__, err);
 }
 #endif
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 9e1a25398f98..8f14f0581a21 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -434,7 +434,7 @@ void __init pnv_smp_init(void)
 	smp_ops = &pnv_smp_ops;
 
 #ifdef CONFIG_HOTPLUG_CPU
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
 	crash_wake_offline = 1;
 #endif
 #endif
diff --git a/arch/riscv/Kbuild b/arch/riscv/Kbuild
index d25ad1c19f88..2c585f7a0b6e 100644
--- a/arch/riscv/Kbuild
+++ b/arch/riscv/Kbuild
@@ -2,6 +2,7 @@
 
 obj-y += kernel/ mm/ net/
 obj-$(CONFIG_BUILTIN_DTB) += boot/dts/
+obj-$(CONFIG_CRYPTO) += crypto/
 obj-y += errata/
 obj-$(CONFIG_KVM) += kvm/
 
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 92b1dbf55176..be09c8836d56 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -27,14 +27,18 @@ config RISCV
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_GIGANTIC_PAGE
 	select ARCH_HAS_KCOV
+	select ARCH_HAS_MEMBARRIER_CALLBACKS
+	select ARCH_HAS_MEMBARRIER_SYNC_CORE
 	select ARCH_HAS_MMIOWB
 	select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
 	select ARCH_HAS_PMEM_API
+	select ARCH_HAS_PREPARE_SYNC_CORE_CMD
 	select ARCH_HAS_PTE_SPECIAL
 	select ARCH_HAS_SET_DIRECT_MAP if MMU
 	select ARCH_HAS_SET_MEMORY if MMU
 	select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
 	select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
+	select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
 	select ARCH_HAS_SYSCALL_WRAPPER
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAS_UBSAN
@@ -47,6 +51,9 @@ config RISCV
 	select ARCH_SUPPORTS_CFI_CLANG
 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
 	select ARCH_SUPPORTS_HUGETLBFS if MMU
+	# LLD >= 14: https://github.com/llvm/llvm-project/issues/50505
+	select ARCH_SUPPORTS_LTO_CLANG if LLD_VERSION >= 140000
+	select ARCH_SUPPORTS_LTO_CLANG_THIN if LLD_VERSION >= 140000
 	select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
 	select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
 	select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
@@ -106,6 +113,7 @@ config RISCV
 	select HAVE_ARCH_KGDB_QXFER_PKT
 	select HAVE_ARCH_MMAP_RND_BITS if MMU
 	select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
+	select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_THREAD_STRUCT_WHITELIST
 	select HAVE_ARCH_TRACEHOOK
@@ -124,6 +132,7 @@ config RISCV
 	select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION
 	select HAVE_EBPF_JIT if MMU
+	select HAVE_FAST_GUP if MMU
 	select HAVE_FUNCTION_ARG_ACCESS_API
 	select HAVE_FUNCTION_ERROR_INJECTION
 	select HAVE_GCC_PLUGINS
@@ -155,6 +164,7 @@ config RISCV
 	select IRQ_FORCED_THREADING
 	select KASAN_VMALLOC if KASAN
 	select LOCK_MM_AND_FIND_VMA
+	select MMU_GATHER_RCU_TABLE_FREE if SMP && MMU
 	select MODULES_USE_ELF_RELA if MODULES
 	select MODULE_SECTIONS if MODULES
 	select OF
@@ -576,6 +586,13 @@ config TOOLCHAIN_HAS_ZBB
 	depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
 	depends on AS_HAS_OPTION_ARCH
 
+# This symbol indicates that the toolchain supports all v1.0 vector crypto
+# extensions, including Zvk*, Zvbb, and Zvbc.  LLVM added all of these at once.
+# binutils added all except Zvkb, then added Zvkb.  So we just check for Zvkb.
+config TOOLCHAIN_HAS_VECTOR_CRYPTO
+	def_bool $(as-instr, .option arch$(comma) +v$(comma) +zvkb)
+	depends on AS_HAS_OPTION_ARCH
+
 config RISCV_ISA_ZBB
 	bool "Zbb extension support for bit manipulation instructions"
 	depends on TOOLCHAIN_HAS_ZBB
@@ -686,27 +703,61 @@ config THREAD_SIZE_ORDER
 	  affects irq stack size, which is equal to thread stack size.
 
 config RISCV_MISALIGNED
-	bool "Support misaligned load/store traps for kernel and userspace"
+	bool
 	select SYSCTL_ARCH_UNALIGN_ALLOW
-	default y
 	help
-	  Say Y here if you want the kernel to embed support for misaligned
-	  load/store for both kernel and userspace. When disable, misaligned
-	  accesses will generate SIGBUS in userspace and panic in kernel.
+	  Embed support for emulating misaligned loads and stores.
+
+choice
+	prompt "Unaligned Accesses Support"
+	default RISCV_PROBE_UNALIGNED_ACCESS
+	help
+	  This determines the level of support for unaligned accesses. This
+	  information is used by the kernel to perform optimizations. It is also
+	  exposed to user space via the hwprobe syscall. The hardware will be
+	  probed at boot by default.
+
+config RISCV_PROBE_UNALIGNED_ACCESS
+	bool "Probe for hardware unaligned access support"
+	select RISCV_MISALIGNED
+	help
+	  During boot, the kernel will run a series of tests to determine the
+	  speed of unaligned accesses. This probing will dynamically determine
+	  the speed of unaligned accesses on the underlying system. If unaligned
+	  memory accesses trap into the kernel as they are not supported by the
+	  system, the kernel will emulate the unaligned accesses to preserve the
+	  UABI.
+
+config RISCV_EMULATED_UNALIGNED_ACCESS
+	bool "Emulate unaligned access where system support is missing"
+	select RISCV_MISALIGNED
+	help
+	  If unaligned memory accesses trap into the kernel as they are not
+	  supported by the system, the kernel will emulate the unaligned
+	  accesses to preserve the UABI. When the underlying system does support
+	  unaligned accesses, the unaligned accesses are assumed to be slow.
+
+config RISCV_SLOW_UNALIGNED_ACCESS
+	bool "Assume the system supports slow unaligned memory accesses"
+	depends on NONPORTABLE
+	help
+	  Assume that the system supports slow unaligned memory accesses. The
+	  kernel and userspace programs may not be able to run at all on systems
+	  that do not support unaligned memory accesses.
 
 config RISCV_EFFICIENT_UNALIGNED_ACCESS
-	bool "Assume the CPU supports fast unaligned memory accesses"
+	bool "Assume the system supports fast unaligned memory accesses"
 	depends on NONPORTABLE
 	select DCACHE_WORD_ACCESS if MMU
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	help
-	  Say Y here if you want the kernel to assume that the CPU supports
-	  efficient unaligned memory accesses.  When enabled, this option
-	  improves the performance of the kernel on such CPUs.  However, the
-	  kernel will run much more slowly, or will not be able to run at all,
-	  on CPUs that do not support efficient unaligned memory accesses.
+	  Assume that the system supports fast unaligned memory accesses. When
+	  enabled, this option improves the performance of the kernel on such
+	  systems. However, the kernel and userspace programs will run much more
+	  slowly, or will not be able to run at all, on systems that do not
+	  support efficient unaligned memory accesses.
 
-	  If unsure what to do here, say N.
+endchoice
 
 endmenu # "Platform type"
 
@@ -1011,11 +1062,8 @@ menu "Power management options"
 
 source "kernel/power/Kconfig"
 
-# Hibernation is only possible on systems where the SBI implementation has
-# marked its reserved memory as not accessible from, or does not run
-# from the same memory as, Linux
 config ARCH_HIBERNATION_POSSIBLE
-	def_bool NONPORTABLE
+	def_bool y
 
 config ARCH_HIBERNATION_HEADER
 	def_bool HIBERNATION
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 0b7d109258e7..252d63942f34 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -50,6 +50,11 @@ ifndef CONFIG_AS_IS_LLVM
 	KBUILD_CFLAGS += -Wa,-mno-relax
 	KBUILD_AFLAGS += -Wa,-mno-relax
 endif
+# LLVM has an issue with target-features and LTO: https://github.com/llvm/llvm-project/issues/59350
+# Ensure it is aware of linker relaxation with LTO, otherwise relocations may
+# be incorrect: https://github.com/llvm/llvm-project/issues/65090
+else ifeq ($(CONFIG_LTO_CLANG),y)
+	KBUILD_LDFLAGS += -mllvm -mattr=+c -mllvm -mattr=+relax
 endif
 
 ifeq ($(CONFIG_SHADOW_CALL_STACK),y)
diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi
index 59fd2d4ea523..9883ca3554c5 100644
--- a/arch/riscv/boot/dts/microchip/mpfs.dtsi
+++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi
@@ -243,7 +243,7 @@
 		};
 
 		pdma: dma-controller@3000000 {
-			compatible = "sifive,fu540-c000-pdma", "sifive,pdma0";
+			compatible = "microchip,mpfs-pdma", "sifive,pdma0";
 			reg = <0x0 0x3000000 0x0 0x8000>;
 			interrupt-parent = <&plic>;
 			interrupts = <5 6>, <7 8>, <9 10>, <11 12>;
@@ -422,7 +422,7 @@
 		can0: can@2010c000 {
 			compatible = "microchip,mpfs-can";
 			reg = <0x0 0x2010c000 0x0 0x1000>;
-			clocks = <&clkcfg CLK_CAN0>;
+			clocks = <&clkcfg CLK_CAN0>, <&clkcfg CLK_MSSPLL3>;
 			interrupt-parent = <&plic>;
 			interrupts = <56>;
 			status = "disabled";
@@ -431,7 +431,7 @@
 		can1: can@2010d000 {
 			compatible = "microchip,mpfs-can";
 			reg = <0x0 0x2010d000 0x0 0x1000>;
-			clocks = <&clkcfg CLK_CAN1>;
+			clocks = <&clkcfg CLK_CAN1>, <&clkcfg CLK_MSSPLL3>;
 			interrupt-parent = <&plic>;
 			interrupts = <57>;
 			status = "disabled";
diff --git a/arch/riscv/boot/dts/renesas/r9a07g043f.dtsi b/arch/riscv/boot/dts/renesas/r9a07g043f.dtsi
index 09ef10b39f46..f35324b9173c 100644
--- a/arch/riscv/boot/dts/renesas/r9a07g043f.dtsi
+++ b/arch/riscv/boot/dts/renesas/r9a07g043f.dtsi
@@ -27,7 +27,7 @@
 			riscv,isa-base = "rv64i";
 			riscv,isa-extensions = "i", "m", "a", "f", "d", "c",
 					       "zicntr", "zicsr", "zifencei",
-					       "zihpm";
+					       "zihpm", "xandespmu";
 			mmu-type = "riscv,sv39";
 			i-cache-size = <0x8000>;
 			i-cache-line-size = <0x40>;
@@ -39,7 +39,7 @@
 
 			cpu0_intc: interrupt-controller {
 				#interrupt-cells = <1>;
-				compatible = "riscv,cpu-intc";
+				compatible = "andestech,cpu-intc", "riscv,cpu-intc";
 				interrupt-controller;
 			};
 		};
diff --git a/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts b/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts
index 7cda3a89020a..168f5d9895a9 100644
--- a/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts
+++ b/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts
@@ -11,3 +11,14 @@
 	model = "BeagleV Starlight Beta";
 	compatible = "beagle,beaglev-starlight-jh7100-r0", "starfive,jh7100";
 };
+
+&gmac {
+	phy-handle = <&phy>;
+};
+
+&mdio {
+	phy: ethernet-phy@7 {
+		reg = <7>;
+		reset-gpios = <&gpio 63 GPIO_ACTIVE_LOW>;
+	};
+};
diff --git a/arch/riscv/boot/dts/starfive/jh7100-common.dtsi b/arch/riscv/boot/dts/starfive/jh7100-common.dtsi
index 42fb61c36068..ae1a6aeb0aea 100644
--- a/arch/riscv/boot/dts/starfive/jh7100-common.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7100-common.dtsi
@@ -72,7 +72,91 @@
 	};
 };
 
+&gmac {
+	pinctrl-names = "default";
+	pinctrl-0 = <&gmac_pins>;
+	phy-mode = "rgmii-id";
+	status = "okay";
+
+	mdio: mdio {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "snps,dwmac-mdio";
+	};
+};
+
 &gpio {
+	gmac_pins: gmac-0 {
+		gtxclk-pins {
+			pins = <PAD_FUNC_SHARE(115)>;
+			bias-pull-up;
+			drive-strength = <35>;
+			input-enable;
+			input-schmitt-enable;
+			slew-rate = <0>;
+		};
+		miitxclk-pins {
+			pins = <PAD_FUNC_SHARE(116)>;
+			bias-pull-up;
+			drive-strength = <14>;
+			input-enable;
+			input-schmitt-disable;
+			slew-rate = <0>;
+		};
+		tx-pins {
+			pins = <PAD_FUNC_SHARE(117)>,
+			       <PAD_FUNC_SHARE(119)>,
+			       <PAD_FUNC_SHARE(120)>,
+			       <PAD_FUNC_SHARE(121)>,
+			       <PAD_FUNC_SHARE(122)>,
+			       <PAD_FUNC_SHARE(123)>,
+			       <PAD_FUNC_SHARE(124)>,
+			       <PAD_FUNC_SHARE(125)>,
+			       <PAD_FUNC_SHARE(126)>;
+			bias-pull-up;
+			drive-strength = <35>;
+			input-disable;
+			input-schmitt-disable;
+			slew-rate = <0>;
+		};
+		rxclk-pins {
+			pins = <PAD_FUNC_SHARE(127)>;
+			bias-pull-up;
+			drive-strength = <14>;
+			input-enable;
+			input-schmitt-disable;
+			slew-rate = <6>;
+		};
+		rxer-pins {
+			pins = <PAD_FUNC_SHARE(129)>;
+			bias-pull-up;
+			drive-strength = <14>;
+			input-enable;
+			input-schmitt-disable;
+			slew-rate = <0>;
+		};
+		rx-pins {
+			pins = <PAD_FUNC_SHARE(128)>,
+			       <PAD_FUNC_SHARE(130)>,
+			       <PAD_FUNC_SHARE(131)>,
+			       <PAD_FUNC_SHARE(132)>,
+			       <PAD_FUNC_SHARE(133)>,
+			       <PAD_FUNC_SHARE(134)>,
+			       <PAD_FUNC_SHARE(135)>,
+			       <PAD_FUNC_SHARE(136)>,
+			       <PAD_FUNC_SHARE(137)>,
+			       <PAD_FUNC_SHARE(138)>,
+			       <PAD_FUNC_SHARE(139)>,
+			       <PAD_FUNC_SHARE(140)>,
+			       <PAD_FUNC_SHARE(141)>;
+			bias-pull-up;
+			drive-strength = <14>;
+			input-enable;
+			input-schmitt-enable;
+			slew-rate = <0>;
+		};
+	};
+
 	i2c0_pins: i2c0-0 {
 		i2c-pins {
 			pinmux = <GPIOMUX(62, GPO_LOW,
@@ -115,6 +199,24 @@
 		};
 	};
 
+	pwm_pins: pwm-0 {
+		pwm-pins {
+			pinmux = <GPIOMUX(7,
+				  GPO_PWM_PAD_OUT_BIT0,
+				  GPO_PWM_PAD_OE_N_BIT0,
+				  GPI_NONE)>,
+				 <GPIOMUX(5,
+				  GPO_PWM_PAD_OUT_BIT1,
+				  GPO_PWM_PAD_OE_N_BIT1,
+				  GPI_NONE)>;
+			bias-disable;
+			drive-strength = <35>;
+			input-disable;
+			input-schmitt-disable;
+			slew-rate = <0>;
+		};
+	};
+
 	sdio0_pins: sdio0-0 {
 		clk-pins {
 			pinmux = <GPIOMUX(54, GPO_SDIO0_PAD_CCLK_OUT,
@@ -257,6 +359,12 @@
 	clock-frequency = <27000000>;
 };
 
+&pwm {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pwm_pins>;
+	status = "okay";
+};
+
 &sdio0 {
 	broken-cd;
 	bus-width = <4>;
diff --git a/arch/riscv/boot/dts/starfive/jh7100-starfive-visionfive-v1.dts b/arch/riscv/boot/dts/starfive/jh7100-starfive-visionfive-v1.dts
index e82af72f1aaf..692c696e1ab4 100644
--- a/arch/riscv/boot/dts/starfive/jh7100-starfive-visionfive-v1.dts
+++ b/arch/riscv/boot/dts/starfive/jh7100-starfive-visionfive-v1.dts
@@ -6,7 +6,6 @@
 
 /dts-v1/;
 #include "jh7100-common.dtsi"
-#include <dt-bindings/gpio/gpio.h>
 
 / {
 	model = "StarFive VisionFive V1";
@@ -18,3 +17,24 @@
 		priority = <224>;
 	};
 };
+
+&gmac {
+	phy-handle = <&phy>;
+};
+
+/*
+ * The board uses a Motorcomm YT8521 PHY supporting RGMII-ID, but requires
+ * manual adjustment of the RX internal delay to work properly.  The default
+ * RX delay provided by the driver (1.95ns) is too high, but applying a 50%
+ * reduction seems to mitigate the issue.
+ *
+ * It is worth noting the adjustment is not necessary on BeagleV Starlight SBC,
+ * which uses a Microchip PHY.  Hence, most likely the Motorcomm PHY is the one
+ * responsible for the misbehaviour, not the GMAC.
+ */
+&mdio {
+	phy: ethernet-phy@0 {
+		reg = <0>;
+		rx-internal-delay-ps = <900>;
+	};
+};
diff --git a/arch/riscv/boot/dts/starfive/jh7100.dtsi b/arch/riscv/boot/dts/starfive/jh7100.dtsi
index 5d499d8aa804..9a2e9583af88 100644
--- a/arch/riscv/boot/dts/starfive/jh7100.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7100.dtsi
@@ -208,6 +208,37 @@
 			status = "disabled";
 		};
 
+		gmac: ethernet@10020000 {
+			compatible = "starfive,jh7100-dwmac", "snps,dwmac";
+			reg = <0x0 0x10020000 0x0 0x10000>;
+			clocks = <&clkgen JH7100_CLK_GMAC_ROOT_DIV>,
+				 <&clkgen JH7100_CLK_GMAC_AHB>,
+				 <&clkgen JH7100_CLK_GMAC_PTP_REF>,
+				 <&clkgen JH7100_CLK_GMAC_TX_INV>,
+				 <&clkgen JH7100_CLK_GMAC_GTX>;
+			clock-names = "stmmaceth", "pclk", "ptp_ref", "tx", "gtx";
+			resets = <&rstgen JH7100_RSTN_GMAC_AHB>;
+			reset-names = "ahb";
+			interrupts = <6>, <7>;
+			interrupt-names = "macirq", "eth_wake_irq";
+			max-frame-size = <9000>;
+			snps,multicast-filter-bins = <32>;
+			snps,perfect-filter-entries = <128>;
+			starfive,syscon = <&sysmain 0x70 0>;
+			rx-fifo-depth = <32768>;
+			tx-fifo-depth = <16384>;
+			snps,axi-config = <&stmmac_axi_setup>;
+			snps,fixed-burst;
+			snps,force_thresh_dma_mode;
+			status = "disabled";
+
+			stmmac_axi_setup: stmmac-axi-config {
+				snps,wr_osr_lmt = <16>;
+				snps,rd_osr_lmt = <16>;
+				snps,blen = <256 128 64 32 0 0 0>;
+			};
+		};
+
 		clkgen: clock-controller@11800000 {
 			compatible = "starfive,jh7100-clkgen";
 			reg = <0x0 0x11800000 0x0 0x10000>;
@@ -222,6 +253,11 @@
 			#reset-cells = <1>;
 		};
 
+		sysmain: syscon@11850000 {
+			compatible = "starfive,jh7100-sysmain", "syscon";
+			reg = <0x0 0x11850000 0x0 0x10000>;
+		};
+
 		i2c0: i2c@118b0000 {
 			compatible = "snps,designware-i2c";
 			reg = <0x0 0x118b0000 0x0 0x10000>;
@@ -324,6 +360,15 @@
 				 <&rstgen JH7100_RSTN_WDT>;
 		};
 
+		pwm: pwm@12490000 {
+			compatible = "starfive,jh7100-pwm", "opencores,pwm-v1";
+			reg = <0x0 0x12490000 0x0 0x10000>;
+			clocks = <&clkgen JH7100_CLK_PWM_APB>;
+			resets = <&rstgen JH7100_RSTN_PWM_APB>;
+			#pwm-cells = <3>;
+			status = "disabled";
+		};
+
 		sfctemp: temperature-sensor@124a0000 {
 			compatible = "starfive,jh7100-temp";
 			reg = <0x0 0x124a0000 0x0 0x10000>;
diff --git a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi
index b89e9791efa7..45b58b6f3df8 100644
--- a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi
@@ -125,6 +125,55 @@
 	clock-frequency = <49152000>;
 };
 
+&camss {
+	assigned-clocks = <&ispcrg JH7110_ISPCLK_DOM4_APB_FUNC>,
+			  <&ispcrg JH7110_ISPCLK_MIPI_RX0_PXL>;
+	assigned-clock-rates = <49500000>, <198000000>;
+	status = "okay";
+
+	ports {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		port@0 {
+			reg = <0>;
+		};
+
+		port@1 {
+			reg = <1>;
+
+			camss_from_csi2rx: endpoint {
+				remote-endpoint = <&csi2rx_to_camss>;
+			};
+		};
+	};
+};
+
+&csi2rx {
+	assigned-clocks = <&ispcrg JH7110_ISPCLK_VIN_SYS>;
+	assigned-clock-rates = <297000000>;
+	status = "okay";
+
+	ports {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		port@0 {
+			reg = <0>;
+
+			/* remote MIPI sensor endpoint */
+		};
+
+		port@1 {
+			reg = <1>;
+
+			csi2rx_to_camss: endpoint {
+				remote-endpoint = <&camss_from_csi2rx>;
+			};
+		};
+	};
+};
+
 &gmac0 {
 	phy-handle = <&phy0>;
 	phy-mode = "rgmii-id";
@@ -323,6 +372,12 @@
 	};
 };
 
+&pwm {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pwm_pins>;
+	status = "okay";
+};
+
 &spi0 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&spi0_pins>;
@@ -513,6 +568,22 @@
 		};
 	};
 
+	pwm_pins: pwm-0 {
+		pwm-pins {
+			pinmux = <GPIOMUX(46, GPOUT_SYS_PWM_CHANNEL0,
+					      GPOEN_SYS_PWM0_CHANNEL0,
+					      GPI_NONE)>,
+				 <GPIOMUX(59, GPOUT_SYS_PWM_CHANNEL1,
+					      GPOEN_SYS_PWM0_CHANNEL1,
+					      GPI_NONE)>;
+			bias-disable;
+			drive-strength = <12>;
+			input-disable;
+			input-schmitt-disable;
+			slew-rate = <0>;
+		};
+	};
+
 	spi0_pins: spi0-0 {
 		mosi-pins {
 			pinmux = <GPIOMUX(52, GPOUT_SYS_SPI0_TXD,
diff --git a/arch/riscv/boot/dts/starfive/jh7110.dtsi b/arch/riscv/boot/dts/starfive/jh7110.dtsi
index 74ed3b9264d8..4a5708f7fcf7 100644
--- a/arch/riscv/boot/dts/starfive/jh7110.dtsi
+++ b/arch/riscv/boot/dts/starfive/jh7110.dtsi
@@ -829,6 +829,15 @@
 			status = "disabled";
 		};
 
+		pwm: pwm@120d0000 {
+			compatible = "starfive,jh7110-pwm", "opencores,pwm-v1";
+			reg = <0x0 0x120d0000 0x0 0x10000>;
+			clocks = <&syscrg JH7110_SYSCLK_PWM_APB>;
+			resets = <&syscrg JH7110_SYSRST_PWM_APB>;
+			#pwm-cells = <3>;
+			status = "disabled";
+		};
+
 		sfctemp: temperature-sensor@120e0000 {
 			compatible = "starfive,jh7110-temp";
 			reg = <0x0 0x120e0000 0x0 0x10000>;
@@ -1104,6 +1113,32 @@
 			#power-domain-cells = <1>;
 		};
 
+		csi2rx: csi@19800000 {
+			compatible = "starfive,jh7110-csi2rx", "cdns,csi2rx";
+			reg = <0x0 0x19800000 0x0 0x10000>;
+			clocks = <&ispcrg JH7110_ISPCLK_VIN_SYS>,
+				 <&ispcrg JH7110_ISPCLK_VIN_APB>,
+				 <&ispcrg JH7110_ISPCLK_VIN_PIXEL_IF0>,
+				 <&ispcrg JH7110_ISPCLK_VIN_PIXEL_IF1>,
+				 <&ispcrg JH7110_ISPCLK_VIN_PIXEL_IF2>,
+				 <&ispcrg JH7110_ISPCLK_VIN_PIXEL_IF3>;
+			clock-names = "sys_clk", "p_clk",
+				      "pixel_if0_clk", "pixel_if1_clk",
+				      "pixel_if2_clk", "pixel_if3_clk";
+			resets = <&ispcrg JH7110_ISPRST_VIN_SYS>,
+				 <&ispcrg JH7110_ISPRST_VIN_APB>,
+				 <&ispcrg JH7110_ISPRST_VIN_PIXEL_IF0>,
+				 <&ispcrg JH7110_ISPRST_VIN_PIXEL_IF1>,
+				 <&ispcrg JH7110_ISPRST_VIN_PIXEL_IF2>,
+				 <&ispcrg JH7110_ISPRST_VIN_PIXEL_IF3>;
+			reset-names = "sys", "reg_bank",
+				      "pixel_if0", "pixel_if1",
+				      "pixel_if2", "pixel_if3";
+			phys = <&csi_phy>;
+			phy-names = "dphy";
+			status = "disabled";
+		};
+
 		ispcrg: clock-controller@19810000 {
 			compatible = "starfive,jh7110-ispcrg";
 			reg = <0x0 0x19810000 0x0 0x10000>;
@@ -1121,6 +1156,47 @@
 			power-domains = <&pwrc JH7110_PD_ISP>;
 		};
 
+		csi_phy: phy@19820000 {
+			compatible = "starfive,jh7110-dphy-rx";
+			reg = <0x0 0x19820000 0x0 0x10000>;
+			clocks = <&ispcrg JH7110_ISPCLK_M31DPHY_CFG_IN>,
+				 <&ispcrg JH7110_ISPCLK_M31DPHY_REF_IN>,
+				 <&ispcrg JH7110_ISPCLK_M31DPHY_TX_ESC_LAN0>;
+			clock-names = "cfg", "ref", "tx";
+			resets = <&ispcrg JH7110_ISPRST_M31DPHY_HW>,
+				 <&ispcrg JH7110_ISPRST_M31DPHY_B09_AON>;
+			power-domains = <&aon_syscon JH7110_AON_PD_DPHY_RX>;
+			#phy-cells = <0>;
+		};
+
+		camss: isp@19840000 {
+			compatible = "starfive,jh7110-camss";
+			reg = <0x0 0x19840000 0x0 0x10000>,
+			      <0x0 0x19870000 0x0 0x30000>;
+			reg-names = "syscon", "isp";
+			clocks = <&ispcrg JH7110_ISPCLK_DOM4_APB_FUNC>,
+				 <&ispcrg JH7110_ISPCLK_ISPV2_TOP_WRAPPER_C>,
+				 <&ispcrg JH7110_ISPCLK_DVP_INV>,
+				 <&ispcrg JH7110_ISPCLK_VIN_P_AXI_WR>,
+				 <&ispcrg JH7110_ISPCLK_MIPI_RX0_PXL>,
+				 <&syscrg JH7110_SYSCLK_ISP_TOP_CORE>,
+				 <&syscrg JH7110_SYSCLK_ISP_TOP_AXI>;
+			clock-names = "apb_func", "wrapper_clk_c", "dvp_inv",
+				      "axiwr", "mipi_rx0_pxl", "ispcore_2x",
+				      "isp_axi";
+			resets = <&ispcrg JH7110_ISPRST_ISPV2_TOP_WRAPPER_P>,
+				 <&ispcrg JH7110_ISPRST_ISPV2_TOP_WRAPPER_C>,
+				 <&ispcrg JH7110_ISPRST_VIN_P_AXI_RD>,
+				 <&ispcrg JH7110_ISPRST_VIN_P_AXI_WR>,
+				 <&syscrg JH7110_SYSRST_ISP_TOP>,
+				 <&syscrg JH7110_SYSRST_ISP_TOP_AXI>;
+			reset-names = "wrapper_p", "wrapper_c", "axird",
+				      "axiwr", "isp_top_n", "isp_top_axi";
+			power-domains = <&pwrc JH7110_PD_ISP>;
+			interrupts = <92>, <87>, <90>, <88>;
+			status = "disabled";
+		};
+
 		voutcrg: clock-controller@295c0000 {
 			compatible = "starfive,jh7110-voutcrg";
 			reg = <0x0 0x295c0000 0x0 0x10000>;
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index eaf34e871e30..fc0ec2ee13bc 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -44,6 +44,7 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y
 CONFIG_CPU_FREQ_GOV_ONDEMAND=y
 CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
 CONFIG_CPUFREQ_DT=y
+CONFIG_ACPI_CPPC_CPUFREQ=m
 CONFIG_VIRTUALIZATION=y
 CONFIG_KVM=m
 CONFIG_ACPI=y
@@ -215,6 +216,7 @@ CONFIG_MMC=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_SDHCI_CADENCE=y
+CONFIG_MMC_SDHCI_OF_DWCMSHC=y
 CONFIG_MMC_SPI=y
 CONFIG_MMC_DW=y
 CONFIG_MMC_DW_STARFIVE=y
@@ -224,6 +226,7 @@ CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_SUN6I=y
 CONFIG_DMADEVICES=y
 CONFIG_DMA_SUN6I=m
+CONFIG_DW_AXI_DMAC=y
 CONFIG_RZ_DMAC=y
 CONFIG_VIRTIO_PCI=y
 CONFIG_VIRTIO_BALLOON=y
diff --git a/arch/riscv/crypto/Kconfig b/arch/riscv/crypto/Kconfig
new file mode 100644
index 000000000000..ad58dad9a580
--- /dev/null
+++ b/arch/riscv/crypto/Kconfig
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "Accelerated Cryptographic Algorithms for CPU (riscv)"
+
+config CRYPTO_AES_RISCV64
+	tristate "Ciphers: AES, modes: ECB, CBC, CTS, CTR, XTS"
+	depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+	select CRYPTO_ALGAPI
+	select CRYPTO_LIB_AES
+	select CRYPTO_SKCIPHER
+	help
+	  Block cipher: AES cipher algorithms
+	  Length-preserving ciphers: AES with ECB, CBC, CTS, CTR, XTS
+
+	  Architecture: riscv64 using:
+	  - Zvkned vector crypto extension
+	  - Zvbb vector extension (XTS)
+	  - Zvkb vector crypto extension (CTR)
+	  - Zvkg vector crypto extension (XTS)
+
+config CRYPTO_CHACHA_RISCV64
+	tristate "Ciphers: ChaCha"
+	depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+	select CRYPTO_SKCIPHER
+	select CRYPTO_LIB_CHACHA_GENERIC
+	help
+	  Length-preserving ciphers: ChaCha20 stream cipher algorithm
+
+	  Architecture: riscv64 using:
+	  - Zvkb vector crypto extension
+
+config CRYPTO_GHASH_RISCV64
+	tristate "Hash functions: GHASH"
+	depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+	select CRYPTO_GCM
+	help
+	  GCM GHASH function (NIST SP 800-38D)
+
+	  Architecture: riscv64 using:
+	  - Zvkg vector crypto extension
+
+config CRYPTO_SHA256_RISCV64
+	tristate "Hash functions: SHA-224 and SHA-256"
+	depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+	select CRYPTO_SHA256
+	help
+	  SHA-224 and SHA-256 secure hash algorithm (FIPS 180)
+
+	  Architecture: riscv64 using:
+	  - Zvknha or Zvknhb vector crypto extensions
+	  - Zvkb vector crypto extension
+
+config CRYPTO_SHA512_RISCV64
+	tristate "Hash functions: SHA-384 and SHA-512"
+	depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+	select CRYPTO_SHA512
+	help
+	  SHA-384 and SHA-512 secure hash algorithm (FIPS 180)
+
+	  Architecture: riscv64 using:
+	  - Zvknhb vector crypto extension
+	  - Zvkb vector crypto extension
+
+config CRYPTO_SM3_RISCV64
+	tristate "Hash functions: SM3 (ShangMi 3)"
+	depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+	select CRYPTO_HASH
+	select CRYPTO_SM3
+	help
+	  SM3 (ShangMi 3) secure hash function (OSCCA GM/T 0004-2012)
+
+	  Architecture: riscv64 using:
+	  - Zvksh vector crypto extension
+	  - Zvkb vector crypto extension
+
+config CRYPTO_SM4_RISCV64
+	tristate "Ciphers: SM4 (ShangMi 4)"
+	depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+	select CRYPTO_ALGAPI
+	select CRYPTO_SM4
+	help
+	  SM4 block cipher algorithm (OSCCA GB/T 32907-2016,
+	  ISO/IEC 18033-3:2010/Amd 1:2021)
+
+	  SM4 (GBT.32907-2016) is a cryptographic standard issued by the
+	  Organization of State Commercial Administration of China (OSCCA)
+	  as an authorized cryptographic algorithm for use within China.
+
+	  Architecture: riscv64 using:
+	  - Zvksed vector crypto extension
+	  - Zvkb vector crypto extension
+
+endmenu
diff --git a/arch/riscv/crypto/Makefile b/arch/riscv/crypto/Makefile
new file mode 100644
index 000000000000..247c7bc7288c
--- /dev/null
+++ b/arch/riscv/crypto/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_CRYPTO_AES_RISCV64) += aes-riscv64.o
+aes-riscv64-y := aes-riscv64-glue.o aes-riscv64-zvkned.o \
+		 aes-riscv64-zvkned-zvbb-zvkg.o aes-riscv64-zvkned-zvkb.o
+
+obj-$(CONFIG_CRYPTO_CHACHA_RISCV64) += chacha-riscv64.o
+chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o
+
+obj-$(CONFIG_CRYPTO_GHASH_RISCV64) += ghash-riscv64.o
+ghash-riscv64-y := ghash-riscv64-glue.o ghash-riscv64-zvkg.o
+
+obj-$(CONFIG_CRYPTO_SHA256_RISCV64) += sha256-riscv64.o
+sha256-riscv64-y := sha256-riscv64-glue.o sha256-riscv64-zvknha_or_zvknhb-zvkb.o
+
+obj-$(CONFIG_CRYPTO_SHA512_RISCV64) += sha512-riscv64.o
+sha512-riscv64-y := sha512-riscv64-glue.o sha512-riscv64-zvknhb-zvkb.o
+
+obj-$(CONFIG_CRYPTO_SM3_RISCV64) += sm3-riscv64.o
+sm3-riscv64-y := sm3-riscv64-glue.o sm3-riscv64-zvksh-zvkb.o
+
+obj-$(CONFIG_CRYPTO_SM4_RISCV64) += sm4-riscv64.o
+sm4-riscv64-y := sm4-riscv64-glue.o sm4-riscv64-zvksed-zvkb.o
diff --git a/arch/riscv/crypto/aes-macros.S b/arch/riscv/crypto/aes-macros.S
new file mode 100644
index 000000000000..d1a258d04bc7
--- /dev/null
+++ b/arch/riscv/crypto/aes-macros.S
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
+// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
+// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This file contains macros that are shared by the other aes-*.S files.  The
+// generated code of these macros depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector AES block cipher extension ('Zvkned')
+
+// Loads the AES round keys from \keyp into vector registers and jumps to code
+// specific to the length of the key.  Specifically:
+//   - If AES-128, loads round keys into v1-v11 and jumps to \label128.
+//   - If AES-192, loads round keys into v1-v13 and jumps to \label192.
+//   - If AES-256, loads round keys into v1-v15 and continues onwards.
+//
+// Also sets vl=4 and vtype=e32,m1,ta,ma.  Clobbers t0 and t1.
+.macro	aes_begin	keyp, label128, label192
+	lwu		t0, 480(\keyp)	// t0 = key length in bytes
+	li		t1, 24		// t1 = key length for AES-192
+	vsetivli	zero, 4, e32, m1, ta, ma
+	vle32.v		v1, (\keyp)
+	addi		\keyp, \keyp, 16
+	vle32.v		v2, (\keyp)
+	addi		\keyp, \keyp, 16
+	vle32.v		v3, (\keyp)
+	addi		\keyp, \keyp, 16
+	vle32.v		v4, (\keyp)
+	addi		\keyp, \keyp, 16
+	vle32.v		v5, (\keyp)
+	addi		\keyp, \keyp, 16
+	vle32.v		v6, (\keyp)
+	addi		\keyp, \keyp, 16
+	vle32.v		v7, (\keyp)
+	addi		\keyp, \keyp, 16
+	vle32.v		v8, (\keyp)
+	addi		\keyp, \keyp, 16
+	vle32.v		v9, (\keyp)
+	addi		\keyp, \keyp, 16
+	vle32.v		v10, (\keyp)
+	addi		\keyp, \keyp, 16
+	vle32.v		v11, (\keyp)
+	blt		t0, t1, \label128	// If AES-128, goto label128.
+	addi		\keyp, \keyp, 16
+	vle32.v		v12, (\keyp)
+	addi		\keyp, \keyp, 16
+	vle32.v		v13, (\keyp)
+	beq		t0, t1, \label192	// If AES-192, goto label192.
+	// Else, it's AES-256.
+	addi		\keyp, \keyp, 16
+	vle32.v		v14, (\keyp)
+	addi		\keyp, \keyp, 16
+	vle32.v		v15, (\keyp)
+.endm
+
+// Encrypts \data using zvkned instructions, using the round keys loaded into
+// v1-v11 (for AES-128), v1-v13 (for AES-192), or v1-v15 (for AES-256).  \keylen
+// is the AES key length in bits.  vl and vtype must already be set
+// appropriately.  Note that if vl > 4, multiple blocks are encrypted.
+.macro	aes_encrypt	data, keylen
+	vaesz.vs	\data, v1
+	vaesem.vs	\data, v2
+	vaesem.vs	\data, v3
+	vaesem.vs	\data, v4
+	vaesem.vs	\data, v5
+	vaesem.vs	\data, v6
+	vaesem.vs	\data, v7
+	vaesem.vs	\data, v8
+	vaesem.vs	\data, v9
+	vaesem.vs	\data, v10
+.if \keylen == 128
+	vaesef.vs	\data, v11
+.elseif \keylen == 192
+	vaesem.vs	\data, v11
+	vaesem.vs	\data, v12
+	vaesef.vs	\data, v13
+.else
+	vaesem.vs	\data, v11
+	vaesem.vs	\data, v12
+	vaesem.vs	\data, v13
+	vaesem.vs	\data, v14
+	vaesef.vs	\data, v15
+.endif
+.endm
+
+// Same as aes_encrypt, but decrypts instead of encrypts.
+.macro	aes_decrypt	data, keylen
+.if \keylen == 128
+	vaesz.vs	\data, v11
+.elseif \keylen == 192
+	vaesz.vs	\data, v13
+	vaesdm.vs	\data, v12
+	vaesdm.vs	\data, v11
+.else
+	vaesz.vs	\data, v15
+	vaesdm.vs	\data, v14
+	vaesdm.vs	\data, v13
+	vaesdm.vs	\data, v12
+	vaesdm.vs	\data, v11
+.endif
+	vaesdm.vs	\data, v10
+	vaesdm.vs	\data, v9
+	vaesdm.vs	\data, v8
+	vaesdm.vs	\data, v7
+	vaesdm.vs	\data, v6
+	vaesdm.vs	\data, v5
+	vaesdm.vs	\data, v4
+	vaesdm.vs	\data, v3
+	vaesdm.vs	\data, v2
+	vaesdf.vs	\data, v1
+.endm
+
+// Expands to aes_encrypt or aes_decrypt according to \enc, which is 1 or 0.
+.macro	aes_crypt	data, enc, keylen
+.if \enc
+	aes_encrypt	\data, \keylen
+.else
+	aes_decrypt	\data, \keylen
+.endif
+.endm
diff --git a/arch/riscv/crypto/aes-riscv64-glue.c b/arch/riscv/crypto/aes-riscv64-glue.c
new file mode 100644
index 000000000000..f814ee048555
--- /dev/null
+++ b/arch/riscv/crypto/aes-riscv64-glue.c
@@ -0,0 +1,637 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AES using the RISC-V vector crypto extensions.  Includes the bare block
+ * cipher and the ECB, CBC, CBC-CTS, CTR, and XTS modes.
+ *
+ * Copyright (C) 2023 VRULL GmbH
+ * Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ *
+ * Copyright 2024 Google LLC
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/aes.h>
+#include <crypto/internal/cipher.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/xts.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+asmlinkage void aes_encrypt_zvkned(const struct crypto_aes_ctx *key,
+				   const u8 in[AES_BLOCK_SIZE],
+				   u8 out[AES_BLOCK_SIZE]);
+asmlinkage void aes_decrypt_zvkned(const struct crypto_aes_ctx *key,
+				   const u8 in[AES_BLOCK_SIZE],
+				   u8 out[AES_BLOCK_SIZE]);
+
+asmlinkage void aes_ecb_encrypt_zvkned(const struct crypto_aes_ctx *key,
+				       const u8 *in, u8 *out, size_t len);
+asmlinkage void aes_ecb_decrypt_zvkned(const struct crypto_aes_ctx *key,
+				       const u8 *in, u8 *out, size_t len);
+
+asmlinkage void aes_cbc_encrypt_zvkned(const struct crypto_aes_ctx *key,
+				       const u8 *in, u8 *out, size_t len,
+				       u8 iv[AES_BLOCK_SIZE]);
+asmlinkage void aes_cbc_decrypt_zvkned(const struct crypto_aes_ctx *key,
+				       const u8 *in, u8 *out, size_t len,
+				       u8 iv[AES_BLOCK_SIZE]);
+
+asmlinkage void aes_cbc_cts_crypt_zvkned(const struct crypto_aes_ctx *key,
+					 const u8 *in, u8 *out, size_t len,
+					 const u8 iv[AES_BLOCK_SIZE], bool enc);
+
+asmlinkage void aes_ctr32_crypt_zvkned_zvkb(const struct crypto_aes_ctx *key,
+					    const u8 *in, u8 *out, size_t len,
+					    u8 iv[AES_BLOCK_SIZE]);
+
+asmlinkage void aes_xts_encrypt_zvkned_zvbb_zvkg(
+			const struct crypto_aes_ctx *key,
+			const u8 *in, u8 *out, size_t len,
+			u8 tweak[AES_BLOCK_SIZE]);
+
+asmlinkage void aes_xts_decrypt_zvkned_zvbb_zvkg(
+			const struct crypto_aes_ctx *key,
+			const u8 *in, u8 *out, size_t len,
+			u8 tweak[AES_BLOCK_SIZE]);
+
+static int riscv64_aes_setkey(struct crypto_aes_ctx *ctx,
+			      const u8 *key, unsigned int keylen)
+{
+	/*
+	 * For now we just use the generic key expansion, for these reasons:
+	 *
+	 * - zvkned's key expansion instructions don't support AES-192.
+	 *   So, non-zvkned fallback code would be needed anyway.
+	 *
+	 * - Users of AES in Linux usually don't change keys frequently.
+	 *   So, key expansion isn't performance-critical.
+	 *
+	 * - For single-block AES exposed as a "cipher" algorithm, it's
+	 *   necessary to use struct crypto_aes_ctx and initialize its 'key_dec'
+	 *   field with the round keys for the Equivalent Inverse Cipher.  This
+	 *   is because with "cipher", decryption can be requested from a
+	 *   context where the vector unit isn't usable, necessitating a
+	 *   fallback to aes_decrypt().  But, zvkned can only generate and use
+	 *   the normal round keys.  Of course, it's preferable to not have
+	 *   special code just for "cipher", as e.g. XTS also uses a
+	 *   single-block AES encryption.  It's simplest to just use
+	 *   struct crypto_aes_ctx and aes_expandkey() everywhere.
+	 */
+	return aes_expandkey(ctx, key, keylen);
+}
+
+static int riscv64_aes_setkey_cipher(struct crypto_tfm *tfm,
+				     const u8 *key, unsigned int keylen)
+{
+	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	return riscv64_aes_setkey(ctx, key, keylen);
+}
+
+static int riscv64_aes_setkey_skcipher(struct crypto_skcipher *tfm,
+				       const u8 *key, unsigned int keylen)
+{
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	return riscv64_aes_setkey(ctx, key, keylen);
+}
+
+/* Bare AES, without a mode of operation */
+
+static void riscv64_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (crypto_simd_usable()) {
+		kernel_vector_begin();
+		aes_encrypt_zvkned(ctx, src, dst);
+		kernel_vector_end();
+	} else {
+		aes_encrypt(ctx, dst, src);
+	}
+}
+
+static void riscv64_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (crypto_simd_usable()) {
+		kernel_vector_begin();
+		aes_decrypt_zvkned(ctx, src, dst);
+		kernel_vector_end();
+	} else {
+		aes_decrypt(ctx, dst, src);
+	}
+}
+
+/* AES-ECB */
+
+static inline int riscv64_aes_ecb_crypt(struct skcipher_request *req, bool enc)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) != 0) {
+		kernel_vector_begin();
+		if (enc)
+			aes_ecb_encrypt_zvkned(ctx, walk.src.virt.addr,
+					       walk.dst.virt.addr,
+					       nbytes & ~(AES_BLOCK_SIZE - 1));
+		else
+			aes_ecb_decrypt_zvkned(ctx, walk.src.virt.addr,
+					       walk.dst.virt.addr,
+					       nbytes & ~(AES_BLOCK_SIZE - 1));
+		kernel_vector_end();
+		err = skcipher_walk_done(&walk, nbytes & (AES_BLOCK_SIZE - 1));
+	}
+
+	return err;
+}
+
+static int riscv64_aes_ecb_encrypt(struct skcipher_request *req)
+{
+	return riscv64_aes_ecb_crypt(req, true);
+}
+
+static int riscv64_aes_ecb_decrypt(struct skcipher_request *req)
+{
+	return riscv64_aes_ecb_crypt(req, false);
+}
+
+/* AES-CBC */
+
+static int riscv64_aes_cbc_crypt(struct skcipher_request *req, bool enc)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) != 0) {
+		kernel_vector_begin();
+		if (enc)
+			aes_cbc_encrypt_zvkned(ctx, walk.src.virt.addr,
+					       walk.dst.virt.addr,
+					       nbytes & ~(AES_BLOCK_SIZE - 1),
+					       walk.iv);
+		else
+			aes_cbc_decrypt_zvkned(ctx, walk.src.virt.addr,
+					       walk.dst.virt.addr,
+					       nbytes & ~(AES_BLOCK_SIZE - 1),
+					       walk.iv);
+		kernel_vector_end();
+		err = skcipher_walk_done(&walk, nbytes & (AES_BLOCK_SIZE - 1));
+	}
+
+	return err;
+}
+
+static int riscv64_aes_cbc_encrypt(struct skcipher_request *req)
+{
+	return riscv64_aes_cbc_crypt(req, true);
+}
+
+static int riscv64_aes_cbc_decrypt(struct skcipher_request *req)
+{
+	return riscv64_aes_cbc_crypt(req, false);
+}
+
+/* AES-CBC-CTS */
+
+static int riscv64_aes_cbc_cts_crypt(struct skcipher_request *req, bool enc)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct scatterlist sg_src[2], sg_dst[2];
+	struct skcipher_request subreq;
+	struct scatterlist *src, *dst;
+	struct skcipher_walk walk;
+	unsigned int cbc_len;
+	int err;
+
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	err = skcipher_walk_virt(&walk, req, false);
+	if (err)
+		return err;
+	/*
+	 * If the full message is available in one step, decrypt it in one call
+	 * to the CBC-CTS assembly function.  This reduces overhead, especially
+	 * on short messages.  Otherwise, fall back to doing CBC up to the last
+	 * two blocks, then invoke CTS just for the ciphertext stealing.
+	 */
+	if (unlikely(walk.nbytes != req->cryptlen)) {
+		cbc_len = round_down(req->cryptlen - AES_BLOCK_SIZE - 1,
+				     AES_BLOCK_SIZE);
+		skcipher_walk_abort(&walk);
+		skcipher_request_set_tfm(&subreq, tfm);
+		skcipher_request_set_callback(&subreq,
+					      skcipher_request_flags(req),
+					      NULL, NULL);
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   cbc_len, req->iv);
+		err = riscv64_aes_cbc_crypt(&subreq, enc);
+		if (err)
+			return err;
+		dst = src = scatterwalk_ffwd(sg_src, req->src, cbc_len);
+		if (req->dst != req->src)
+			dst = scatterwalk_ffwd(sg_dst, req->dst, cbc_len);
+		skcipher_request_set_crypt(&subreq, src, dst,
+					   req->cryptlen - cbc_len, req->iv);
+		err = skcipher_walk_virt(&walk, &subreq, false);
+		if (err)
+			return err;
+	}
+	kernel_vector_begin();
+	aes_cbc_cts_crypt_zvkned(ctx, walk.src.virt.addr, walk.dst.virt.addr,
+				 walk.nbytes, req->iv, enc);
+	kernel_vector_end();
+	return skcipher_walk_done(&walk, 0);
+}
+
+static int riscv64_aes_cbc_cts_encrypt(struct skcipher_request *req)
+{
+	return riscv64_aes_cbc_cts_crypt(req, true);
+}
+
+static int riscv64_aes_cbc_cts_decrypt(struct skcipher_request *req)
+{
+	return riscv64_aes_cbc_cts_crypt(req, false);
+}
+
+/* AES-CTR */
+
+static int riscv64_aes_ctr_crypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	unsigned int nbytes, p1_nbytes;
+	struct skcipher_walk walk;
+	u32 ctr32, nblocks;
+	int err;
+
+	/* Get the low 32-bit word of the 128-bit big endian counter. */
+	ctr32 = get_unaligned_be32(req->iv + 12);
+
+	err = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) != 0) {
+		if (nbytes < walk.total) {
+			/* Not the end yet, so keep the length block-aligned. */
+			nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+			nblocks = nbytes / AES_BLOCK_SIZE;
+		} else {
+			/* It's the end, so include any final partial block. */
+			nblocks = DIV_ROUND_UP(nbytes, AES_BLOCK_SIZE);
+		}
+		ctr32 += nblocks;
+
+		kernel_vector_begin();
+		if (ctr32 >= nblocks) {
+			/* The low 32-bit word of the counter won't overflow. */
+			aes_ctr32_crypt_zvkned_zvkb(ctx, walk.src.virt.addr,
+						    walk.dst.virt.addr, nbytes,
+						    req->iv);
+		} else {
+			/*
+			 * The low 32-bit word of the counter will overflow.
+			 * The assembly doesn't handle this case, so split the
+			 * operation into two at the point where the overflow
+			 * will occur.  After the first part, add the carry bit.
+			 */
+			p1_nbytes = min_t(unsigned int, nbytes,
+					  (nblocks - ctr32) * AES_BLOCK_SIZE);
+			aes_ctr32_crypt_zvkned_zvkb(ctx, walk.src.virt.addr,
+						    walk.dst.virt.addr,
+						    p1_nbytes, req->iv);
+			crypto_inc(req->iv, 12);
+
+			if (ctr32) {
+				aes_ctr32_crypt_zvkned_zvkb(
+					ctx,
+					walk.src.virt.addr + p1_nbytes,
+					walk.dst.virt.addr + p1_nbytes,
+					nbytes - p1_nbytes, req->iv);
+			}
+		}
+		kernel_vector_end();
+
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+	}
+
+	return err;
+}
+
+/* AES-XTS */
+
+struct riscv64_aes_xts_ctx {
+	struct crypto_aes_ctx ctx1;
+	struct crypto_aes_ctx ctx2;
+};
+
+static int riscv64_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
+				  unsigned int keylen)
+{
+	struct riscv64_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	return xts_verify_key(tfm, key, keylen) ?:
+	       riscv64_aes_setkey(&ctx->ctx1, key, keylen / 2) ?:
+	       riscv64_aes_setkey(&ctx->ctx2, key + keylen / 2, keylen / 2);
+}
+
+static int riscv64_aes_xts_crypt(struct skcipher_request *req, bool enc)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct riscv64_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int tail = req->cryptlen % AES_BLOCK_SIZE;
+	struct scatterlist sg_src[2], sg_dst[2];
+	struct skcipher_request subreq;
+	struct scatterlist *src, *dst;
+	struct skcipher_walk walk;
+	int err;
+
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	/* Encrypt the IV with the tweak key to get the first tweak. */
+	kernel_vector_begin();
+	aes_encrypt_zvkned(&ctx->ctx2, req->iv, req->iv);
+	kernel_vector_end();
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	/*
+	 * If the message length isn't divisible by the AES block size and the
+	 * full message isn't available in one step of the scatterlist walk,
+	 * then separate off the last full block and the partial block.  This
+	 * ensures that they are processed in the same call to the assembly
+	 * function, which is required for ciphertext stealing.
+	 */
+	if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
+		skcipher_walk_abort(&walk);
+
+		skcipher_request_set_tfm(&subreq, tfm);
+		skcipher_request_set_callback(&subreq,
+					      skcipher_request_flags(req),
+					      NULL, NULL);
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   req->cryptlen - tail - AES_BLOCK_SIZE,
+					   req->iv);
+		req = &subreq;
+		err = skcipher_walk_virt(&walk, req, false);
+	} else {
+		tail = 0;
+	}
+
+	while (walk.nbytes) {
+		unsigned int nbytes = walk.nbytes;
+
+		if (nbytes < walk.total)
+			nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+
+		kernel_vector_begin();
+		if (enc)
+			aes_xts_encrypt_zvkned_zvbb_zvkg(
+				&ctx->ctx1, walk.src.virt.addr,
+				walk.dst.virt.addr, nbytes, req->iv);
+		else
+			aes_xts_decrypt_zvkned_zvbb_zvkg(
+				&ctx->ctx1, walk.src.virt.addr,
+				walk.dst.virt.addr, nbytes, req->iv);
+		kernel_vector_end();
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+	}
+
+	if (err || likely(!tail))
+		return err;
+
+	/* Do ciphertext stealing with the last full block and partial block. */
+
+	dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
+	if (req->dst != req->src)
+		dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
+
+	skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
+				   req->iv);
+
+	err = skcipher_walk_virt(&walk, req, false);
+	if (err)
+		return err;
+
+	kernel_vector_begin();
+	if (enc)
+		aes_xts_encrypt_zvkned_zvbb_zvkg(
+			&ctx->ctx1, walk.src.virt.addr,
+			walk.dst.virt.addr, walk.nbytes, req->iv);
+	else
+		aes_xts_decrypt_zvkned_zvbb_zvkg(
+			&ctx->ctx1, walk.src.virt.addr,
+			walk.dst.virt.addr, walk.nbytes, req->iv);
+	kernel_vector_end();
+
+	return skcipher_walk_done(&walk, 0);
+}
+
+static int riscv64_aes_xts_encrypt(struct skcipher_request *req)
+{
+	return riscv64_aes_xts_crypt(req, true);
+}
+
+static int riscv64_aes_xts_decrypt(struct skcipher_request *req)
+{
+	return riscv64_aes_xts_crypt(req, false);
+}
+
+/* Algorithm definitions */
+
+static struct crypto_alg riscv64_zvkned_aes_cipher_alg = {
+	.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize = AES_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct crypto_aes_ctx),
+	.cra_priority = 300,
+	.cra_name = "aes",
+	.cra_driver_name = "aes-riscv64-zvkned",
+	.cra_cipher = {
+		.cia_min_keysize = AES_MIN_KEY_SIZE,
+		.cia_max_keysize = AES_MAX_KEY_SIZE,
+		.cia_setkey = riscv64_aes_setkey_cipher,
+		.cia_encrypt = riscv64_aes_encrypt,
+		.cia_decrypt = riscv64_aes_decrypt,
+	},
+	.cra_module = THIS_MODULE,
+};
+
+static struct skcipher_alg riscv64_zvkned_aes_skcipher_algs[] = {
+	{
+		.setkey = riscv64_aes_setkey_skcipher,
+		.encrypt = riscv64_aes_ecb_encrypt,
+		.decrypt = riscv64_aes_ecb_decrypt,
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.walksize = 8 * AES_BLOCK_SIZE, /* matches LMUL=8 */
+		.base = {
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct crypto_aes_ctx),
+			.cra_priority = 300,
+			.cra_name = "ecb(aes)",
+			.cra_driver_name = "ecb-aes-riscv64-zvkned",
+			.cra_module = THIS_MODULE,
+		},
+	}, {
+		.setkey = riscv64_aes_setkey_skcipher,
+		.encrypt = riscv64_aes_cbc_encrypt,
+		.decrypt = riscv64_aes_cbc_decrypt,
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.ivsize = AES_BLOCK_SIZE,
+		.base = {
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct crypto_aes_ctx),
+			.cra_priority = 300,
+			.cra_name = "cbc(aes)",
+			.cra_driver_name = "cbc-aes-riscv64-zvkned",
+			.cra_module = THIS_MODULE,
+		},
+	}, {
+		.setkey = riscv64_aes_setkey_skcipher,
+		.encrypt = riscv64_aes_cbc_cts_encrypt,
+		.decrypt = riscv64_aes_cbc_cts_decrypt,
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.ivsize = AES_BLOCK_SIZE,
+		.walksize = 4 * AES_BLOCK_SIZE, /* matches LMUL=4 */
+		.base = {
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct crypto_aes_ctx),
+			.cra_priority = 300,
+			.cra_name = "cts(cbc(aes))",
+			.cra_driver_name = "cts-cbc-aes-riscv64-zvkned",
+			.cra_module = THIS_MODULE,
+		},
+	}
+};
+
+static struct skcipher_alg riscv64_zvkned_zvkb_aes_skcipher_alg = {
+	.setkey = riscv64_aes_setkey_skcipher,
+	.encrypt = riscv64_aes_ctr_crypt,
+	.decrypt = riscv64_aes_ctr_crypt,
+	.min_keysize = AES_MIN_KEY_SIZE,
+	.max_keysize = AES_MAX_KEY_SIZE,
+	.ivsize = AES_BLOCK_SIZE,
+	.chunksize = AES_BLOCK_SIZE,
+	.walksize = 4 * AES_BLOCK_SIZE, /* matches LMUL=4 */
+	.base = {
+		.cra_blocksize = 1,
+		.cra_ctxsize = sizeof(struct crypto_aes_ctx),
+		.cra_priority = 300,
+		.cra_name = "ctr(aes)",
+		.cra_driver_name = "ctr-aes-riscv64-zvkned-zvkb",
+		.cra_module = THIS_MODULE,
+	},
+};
+
+static struct skcipher_alg riscv64_zvkned_zvbb_zvkg_aes_skcipher_alg = {
+	.setkey = riscv64_aes_xts_setkey,
+	.encrypt = riscv64_aes_xts_encrypt,
+	.decrypt = riscv64_aes_xts_decrypt,
+	.min_keysize = 2 * AES_MIN_KEY_SIZE,
+	.max_keysize = 2 * AES_MAX_KEY_SIZE,
+	.ivsize = AES_BLOCK_SIZE,
+	.chunksize = AES_BLOCK_SIZE,
+	.walksize = 4 * AES_BLOCK_SIZE, /* matches LMUL=4 */
+	.base = {
+		.cra_blocksize = AES_BLOCK_SIZE,
+		.cra_ctxsize = sizeof(struct riscv64_aes_xts_ctx),
+		.cra_priority = 300,
+		.cra_name = "xts(aes)",
+		.cra_driver_name = "xts-aes-riscv64-zvkned-zvbb-zvkg",
+		.cra_module = THIS_MODULE,
+	},
+};
+
+static inline bool riscv64_aes_xts_supported(void)
+{
+	return riscv_isa_extension_available(NULL, ZVBB) &&
+	       riscv_isa_extension_available(NULL, ZVKG) &&
+	       riscv_vector_vlen() < 2048 /* Implementation limitation */;
+}
+
+static int __init riscv64_aes_mod_init(void)
+{
+	int err = -ENODEV;
+
+	if (riscv_isa_extension_available(NULL, ZVKNED) &&
+	    riscv_vector_vlen() >= 128) {
+		err = crypto_register_alg(&riscv64_zvkned_aes_cipher_alg);
+		if (err)
+			return err;
+
+		err = crypto_register_skciphers(
+			riscv64_zvkned_aes_skcipher_algs,
+			ARRAY_SIZE(riscv64_zvkned_aes_skcipher_algs));
+		if (err)
+			goto unregister_zvkned_cipher_alg;
+
+		if (riscv_isa_extension_available(NULL, ZVKB)) {
+			err = crypto_register_skcipher(
+				&riscv64_zvkned_zvkb_aes_skcipher_alg);
+			if (err)
+				goto unregister_zvkned_skcipher_algs;
+		}
+
+		if (riscv64_aes_xts_supported()) {
+			err = crypto_register_skcipher(
+				&riscv64_zvkned_zvbb_zvkg_aes_skcipher_alg);
+			if (err)
+				goto unregister_zvkned_zvkb_skcipher_alg;
+		}
+	}
+
+	return err;
+
+unregister_zvkned_zvkb_skcipher_alg:
+	if (riscv_isa_extension_available(NULL, ZVKB))
+		crypto_unregister_skcipher(&riscv64_zvkned_zvkb_aes_skcipher_alg);
+unregister_zvkned_skcipher_algs:
+	crypto_unregister_skciphers(riscv64_zvkned_aes_skcipher_algs,
+				    ARRAY_SIZE(riscv64_zvkned_aes_skcipher_algs));
+unregister_zvkned_cipher_alg:
+	crypto_unregister_alg(&riscv64_zvkned_aes_cipher_alg);
+	return err;
+}
+
+static void __exit riscv64_aes_mod_exit(void)
+{
+	if (riscv64_aes_xts_supported())
+		crypto_unregister_skcipher(&riscv64_zvkned_zvbb_zvkg_aes_skcipher_alg);
+	if (riscv_isa_extension_available(NULL, ZVKB))
+		crypto_unregister_skcipher(&riscv64_zvkned_zvkb_aes_skcipher_alg);
+	crypto_unregister_skciphers(riscv64_zvkned_aes_skcipher_algs,
+				    ARRAY_SIZE(riscv64_zvkned_aes_skcipher_algs));
+	crypto_unregister_alg(&riscv64_zvkned_aes_cipher_alg);
+}
+
+module_init(riscv64_aes_mod_init);
+module_exit(riscv64_aes_mod_exit);
+
+MODULE_DESCRIPTION("AES-ECB/CBC/CTS/CTR/XTS (RISC-V accelerated)");
+MODULE_AUTHOR("Jerry Shih <jerry.shih@sifive.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("aes");
+MODULE_ALIAS_CRYPTO("ecb(aes)");
+MODULE_ALIAS_CRYPTO("cbc(aes)");
+MODULE_ALIAS_CRYPTO("cts(cbc(aes))");
+MODULE_ALIAS_CRYPTO("ctr(aes)");
+MODULE_ALIAS_CRYPTO("xts(aes)");
diff --git a/arch/riscv/crypto/aes-riscv64-zvkned-zvbb-zvkg.S b/arch/riscv/crypto/aes-riscv64-zvkned-zvbb-zvkg.S
new file mode 100644
index 000000000000..146fc9cfb268
--- /dev/null
+++ b/arch/riscv/crypto/aes-riscv64-zvkned-zvbb-zvkg.S
@@ -0,0 +1,312 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128 && VLEN < 2048
+// - RISC-V Vector AES block cipher extension ('Zvkned')
+// - RISC-V Vector Bit-manipulation extension ('Zvbb')
+// - RISC-V Vector GCM/GMAC extension ('Zvkg')
+
+#include <linux/linkage.h>
+
+.text
+.option arch, +zvkned, +zvbb, +zvkg
+
+#include "aes-macros.S"
+
+#define KEYP		a0
+#define INP		a1
+#define OUTP		a2
+#define LEN		a3
+#define TWEAKP		a4
+
+#define LEN32		a5
+#define TAIL_LEN	a6
+#define VL		a7
+#define VLMAX		t4
+
+// v1-v15 contain the AES round keys, but they are used for temporaries before
+// the AES round keys have been loaded.
+#define TWEAKS		v16	// LMUL=4 (most of the time)
+#define TWEAKS_BREV	v20	// LMUL=4 (most of the time)
+#define MULTS_BREV	v24	// LMUL=4 (most of the time)
+#define TMP0		v28
+#define TMP1		v29
+#define TMP2		v30
+#define TMP3		v31
+
+// xts_init initializes the following values:
+//
+//	TWEAKS: N 128-bit tweaks T*(x^i) for i in 0..(N - 1)
+//	TWEAKS_BREV: same as TWEAKS, but bit-reversed
+//	MULTS_BREV: N 128-bit values x^N, bit-reversed.  Only if N > 1.
+//
+// N is the maximum number of blocks that will be processed per loop iteration,
+// computed using vsetvli.
+//
+// The field convention used by XTS is the same as that of GHASH, but with the
+// bits reversed within each byte.  The zvkg extension provides the vgmul
+// instruction which does multiplication in this field.  Therefore, for tweak
+// computation we use vgmul to do multiplications in parallel, instead of
+// serially multiplying by x using shifting+xoring.  Note that for this to work,
+// the inputs and outputs to vgmul must be bit-reversed (we do it with vbrev8).
+.macro	xts_init
+
+	// Load the first tweak T.
+	vsetivli	zero, 4, e32, m1, ta, ma
+	vle32.v		TWEAKS, (TWEAKP)
+
+	// If there's only one block (or no blocks at all), then skip the tweak
+	// sequence computation because (at most) T itself is needed.
+	li		t0, 16
+	ble		LEN, t0, .Linit_single_block\@
+
+	// Save a copy of T bit-reversed in v12.
+	vbrev8.v	v12, TWEAKS
+
+	//
+	// Generate x^i for i in 0..(N - 1), i.e. 128-bit values 1 << i assuming
+	// that N <= 128.  Though, this code actually requires N < 64 (or
+	// equivalently VLEN < 2048) due to the use of 64-bit intermediate
+	// values here and in the x^N computation later.
+	//
+	vsetvli		VL, LEN32, e32, m4, ta, ma
+	srli		t0, VL, 2	// t0 = N (num blocks)
+	// Generate two sequences, each with N 32-bit values:
+	// v0=[1, 1, 1, ...] and v1=[0, 1, 2, ...].
+	vsetvli		zero, t0, e32, m1, ta, ma
+	vmv.v.i		v0, 1
+	vid.v		v1
+	// Use vzext to zero-extend the sequences to 64 bits.  Reinterpret them
+	// as two sequences, each with 2*N 32-bit values:
+	// v2=[1, 0, 1, 0, 1, 0, ...] and v4=[0, 0, 1, 0, 2, 0, ...].
+	vsetvli		zero, t0, e64, m2, ta, ma
+	vzext.vf2	v2, v0
+	vzext.vf2	v4, v1
+	slli		t1, t0, 1	// t1 = 2*N
+	vsetvli		zero, t1, e32, m2, ta, ma
+	// Use vwsll to compute [1<<0, 0<<0, 1<<1, 0<<0, 1<<2, 0<<0, ...],
+	// widening to 64 bits per element.  When reinterpreted as N 128-bit
+	// values, this is the needed sequence of 128-bit values 1 << i (x^i).
+	vwsll.vv	v8, v2, v4
+
+	// Copy the bit-reversed T to all N elements of TWEAKS_BREV, then
+	// multiply by x^i.  This gives the sequence T*(x^i), bit-reversed.
+	vsetvli		zero, LEN32, e32, m4, ta, ma
+	vmv.v.i		TWEAKS_BREV, 0
+	vaesz.vs	TWEAKS_BREV, v12
+	vbrev8.v	v8, v8
+	vgmul.vv	TWEAKS_BREV, v8
+
+	// Save a copy of the sequence T*(x^i) with the bit reversal undone.
+	vbrev8.v	TWEAKS, TWEAKS_BREV
+
+	// Generate N copies of x^N, i.e. 128-bit values 1 << N, bit-reversed.
+	li		t1, 1
+	sll		t1, t1, t0	// t1 = 1 << N
+	vsetivli	zero, 2, e64, m1, ta, ma
+	vmv.v.i		v0, 0
+	vsetivli	zero, 1, e64, m1, tu, ma
+	vmv.v.x		v0, t1
+	vbrev8.v	v0, v0
+	vsetvli		zero, LEN32, e32, m4, ta, ma
+	vmv.v.i		MULTS_BREV, 0
+	vaesz.vs	MULTS_BREV, v0
+
+	j		.Linit_done\@
+
+.Linit_single_block\@:
+	vbrev8.v	TWEAKS_BREV, TWEAKS
+.Linit_done\@:
+.endm
+
+// Set the first 128 bits of MULTS_BREV to 0x40, i.e. 'x' bit-reversed.  This is
+// the multiplier required to advance the tweak by one.
+.macro	load_x
+	li		t0, 0x40
+	vsetivli	zero, 4, e32, m1, ta, ma
+	vmv.v.i		MULTS_BREV, 0
+	vsetivli	zero, 1, e8, m1, tu, ma
+	vmv.v.x		MULTS_BREV, t0
+.endm
+
+.macro	__aes_xts_crypt	enc, keylen
+	// With 16 < len <= 31, there's no main loop, just ciphertext stealing.
+	beqz		LEN32, .Lcts_without_main_loop\@
+
+	vsetvli		VLMAX, zero, e32, m4, ta, ma
+1:
+	vsetvli		VL, LEN32, e32, m4, ta, ma
+2:
+	// Encrypt or decrypt VL/4 blocks.
+	vle32.v		TMP0, (INP)
+	vxor.vv		TMP0, TMP0, TWEAKS
+	aes_crypt	TMP0, \enc, \keylen
+	vxor.vv		TMP0, TMP0, TWEAKS
+	vse32.v		TMP0, (OUTP)
+
+	// Update the pointers and the remaining length.
+	slli		t0, VL, 2
+	add		INP, INP, t0
+	add		OUTP, OUTP, t0
+	sub		LEN32, LEN32, VL
+
+	// Check whether more blocks remain.
+	beqz		LEN32, .Lmain_loop_done\@
+
+	// Compute the next sequence of tweaks by multiplying the previous
+	// sequence by x^N.  Store the result in both bit-reversed order and
+	// regular order (i.e. with the bit reversal undone).
+	vgmul.vv	TWEAKS_BREV, MULTS_BREV
+	vbrev8.v	TWEAKS, TWEAKS_BREV
+
+	// Since we compute the tweak multipliers x^N in advance, we require
+	// that each iteration process the same length except possibly the last.
+	// This conflicts slightly with the behavior allowed by RISC-V Vector
+	// Extension, where CPUs can select a lower length for both of the last
+	// two iterations.  E.g., vl might take the sequence of values
+	// [16, 16, 16, 12, 12], whereas we need [16, 16, 16, 16, 8] so that we
+	// can use x^4 again instead of computing x^3.  Therefore, we explicitly
+	// keep the vl at VLMAX if there is at least VLMAX remaining.
+	bge		LEN32, VLMAX, 2b
+	j		1b
+
+.Lmain_loop_done\@:
+	load_x
+
+	// Compute the next tweak.
+	addi		t0, VL, -4
+	vsetivli	zero, 4, e32, m4, ta, ma
+	vslidedown.vx	TWEAKS_BREV, TWEAKS_BREV, t0	// Extract last tweak
+	vsetivli	zero, 4, e32, m1, ta, ma
+	vgmul.vv	TWEAKS_BREV, MULTS_BREV		// Advance to next tweak
+
+	bnez		TAIL_LEN, .Lcts\@
+
+	// Update *TWEAKP to contain the next tweak.
+	vbrev8.v	TWEAKS, TWEAKS_BREV
+	vse32.v		TWEAKS, (TWEAKP)
+	ret
+
+.Lcts_without_main_loop\@:
+	load_x
+.Lcts\@:
+	// TWEAKS_BREV now contains the next tweak.  Compute the one after that.
+	vsetivli	zero, 4, e32, m1, ta, ma
+	vmv.v.v		TMP0, TWEAKS_BREV
+	vgmul.vv	TMP0, MULTS_BREV
+	// Undo the bit reversal of the next two tweaks and store them in TMP1
+	// and TMP2, such that TMP1 is the first needed and TMP2 the second.
+.if \enc
+	vbrev8.v	TMP1, TWEAKS_BREV
+	vbrev8.v	TMP2, TMP0
+.else
+	vbrev8.v	TMP1, TMP0
+	vbrev8.v	TMP2, TWEAKS_BREV
+.endif
+
+	// Encrypt/decrypt the last full block.
+	vle32.v		TMP0, (INP)
+	vxor.vv		TMP0, TMP0, TMP1
+	aes_crypt	TMP0, \enc, \keylen
+	vxor.vv		TMP0, TMP0, TMP1
+
+	// Swap the first TAIL_LEN bytes of the above result with the tail.
+	// Note that to support in-place encryption/decryption, the load from
+	// the input tail must happen before the store to the output tail.
+	addi		t0, INP, 16
+	addi		t1, OUTP, 16
+	vmv.v.v		TMP3, TMP0
+	vsetvli		zero, TAIL_LEN, e8, m1, tu, ma
+	vle8.v		TMP0, (t0)
+	vse8.v		TMP3, (t1)
+
+	// Encrypt/decrypt again and store the last full block.
+	vsetivli	zero, 4, e32, m1, ta, ma
+	vxor.vv		TMP0, TMP0, TMP2
+	aes_crypt	TMP0, \enc, \keylen
+	vxor.vv		TMP0, TMP0, TMP2
+	vse32.v		TMP0, (OUTP)
+
+	ret
+.endm
+
+.macro	aes_xts_crypt	enc
+
+	// Check whether the length is a multiple of the AES block size.
+	andi		TAIL_LEN, LEN, 15
+	beqz		TAIL_LEN, 1f
+
+	// The length isn't a multiple of the AES block size, so ciphertext
+	// stealing will be required.  Ciphertext stealing involves special
+	// handling of the partial block and the last full block, so subtract
+	// the length of both from the length to be processed in the main loop.
+	sub		LEN, LEN, TAIL_LEN
+	addi		LEN, LEN, -16
+1:
+	srli		LEN32, LEN, 2
+	// LEN and LEN32 now contain the total length of the blocks that will be
+	// processed in the main loop, in bytes and 32-bit words respectively.
+
+	xts_init
+	aes_begin	KEYP, 128f, 192f
+	__aes_xts_crypt	\enc, 256
+128:
+	__aes_xts_crypt	\enc, 128
+192:
+	__aes_xts_crypt	\enc, 192
+.endm
+
+// void aes_xts_encrypt_zvkned_zvbb_zvkg(const struct crypto_aes_ctx *key,
+//					 const u8 *in, u8 *out, size_t len,
+//					 u8 tweak[16]);
+//
+// |key| is the data key.  |tweak| contains the next tweak; the encryption of
+// the original IV with the tweak key was already done.  This function supports
+// incremental computation, but |len| must always be >= 16 (AES_BLOCK_SIZE), and
+// |len| must be a multiple of 16 except on the last call.  If |len| is a
+// multiple of 16, then this function updates |tweak| to contain the next tweak.
+SYM_FUNC_START(aes_xts_encrypt_zvkned_zvbb_zvkg)
+	aes_xts_crypt	1
+SYM_FUNC_END(aes_xts_encrypt_zvkned_zvbb_zvkg)
+
+// Same prototype and calling convention as the encryption function
+SYM_FUNC_START(aes_xts_decrypt_zvkned_zvbb_zvkg)
+	aes_xts_crypt	0
+SYM_FUNC_END(aes_xts_decrypt_zvkned_zvbb_zvkg)
diff --git a/arch/riscv/crypto/aes-riscv64-zvkned-zvkb.S b/arch/riscv/crypto/aes-riscv64-zvkned-zvkb.S
new file mode 100644
index 000000000000..9962d4500587
--- /dev/null
+++ b/arch/riscv/crypto/aes-riscv64-zvkned-zvkb.S
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector AES block cipher extension ('Zvkned')
+// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
+
+#include <linux/linkage.h>
+
+.text
+.option arch, +zvkned, +zvkb
+
+#include "aes-macros.S"
+
+#define KEYP		a0
+#define INP		a1
+#define OUTP		a2
+#define LEN		a3
+#define IVP		a4
+
+#define LEN32		a5
+#define VL_E32		a6
+#define VL_BLOCKS	a7
+
+.macro	aes_ctr32_crypt	keylen
+	// LEN32 = number of blocks, rounded up, in 32-bit words.
+	addi		t0, LEN, 15
+	srli		t0, t0, 4
+	slli		LEN32, t0, 2
+
+	// Create a mask that selects the last 32-bit word of each 128-bit
+	// block.  This is the word that contains the (big-endian) counter.
+	li		t0, 0x88
+	vsetvli		t1, zero, e8, m1, ta, ma
+	vmv.v.x		v0, t0
+
+	// Load the IV into v31.  The last 32-bit word contains the counter.
+	vsetivli	zero, 4, e32, m1, ta, ma
+	vle32.v		v31, (IVP)
+
+	// Convert the big-endian counter into little-endian.
+	vsetivli	zero, 4, e32, m1, ta, mu
+	vrev8.v		v31, v31, v0.t
+
+	// Splat the IV to v16 (with LMUL=4).  The number of copies is the
+	// maximum number of blocks that will be processed per iteration.
+	vsetvli		zero, LEN32, e32, m4, ta, ma
+	vmv.v.i		v16, 0
+	vaesz.vs	v16, v31
+
+	// v20 = [x, x, x, 0, x, x, x, 1, ...]
+	viota.m		v20, v0, v0.t
+	// v16 = [IV0, IV1, IV2, counter+0, IV0, IV1, IV2, counter+1, ...]
+	vsetvli		VL_E32, LEN32, e32, m4, ta, mu
+	vadd.vv		v16, v16, v20, v0.t
+
+	j 2f
+1:
+	// Set the number of blocks to process in this iteration.  vl=VL_E32 is
+	// the length in 32-bit words, i.e. 4 times the number of blocks.
+	vsetvli		VL_E32, LEN32, e32, m4, ta, mu
+
+	// Increment the counters by the number of blocks processed in the
+	// previous iteration.
+	vadd.vx		v16, v16, VL_BLOCKS, v0.t
+2:
+	// Prepare the AES inputs into v24.
+	vmv.v.v		v24, v16
+	vrev8.v		v24, v24, v0.t	// Convert counters back to big-endian.
+
+	// Encrypt the AES inputs to create the next portion of the keystream.
+	aes_encrypt	v24, \keylen
+
+	// XOR the data with the keystream.
+	vsetvli		t0, LEN, e8, m4, ta, ma
+	vle8.v		v20, (INP)
+	vxor.vv		v20, v20, v24
+	vse8.v		v20, (OUTP)
+
+	// Advance the pointers and update the remaining length.
+	add		INP, INP, t0
+	add		OUTP, OUTP, t0
+	sub		LEN, LEN, t0
+	sub		LEN32, LEN32, VL_E32
+	srli		VL_BLOCKS, VL_E32, 2
+
+	// Repeat if more data remains.
+	bnez		LEN, 1b
+
+	// Update *IVP to contain the next counter.
+	vsetivli	zero, 4, e32, m1, ta, mu
+	vadd.vx		v16, v16, VL_BLOCKS, v0.t
+	vrev8.v		v16, v16, v0.t	// Convert counters back to big-endian.
+	vse32.v		v16, (IVP)
+
+	ret
+.endm
+
+// void aes_ctr32_crypt_zvkned_zvkb(const struct crypto_aes_ctx *key,
+//				    const u8 *in, u8 *out, size_t len,
+//				    u8 iv[16]);
+SYM_FUNC_START(aes_ctr32_crypt_zvkned_zvkb)
+	aes_begin	KEYP, 128f, 192f
+	aes_ctr32_crypt	256
+128:
+	aes_ctr32_crypt	128
+192:
+	aes_ctr32_crypt	192
+SYM_FUNC_END(aes_ctr32_crypt_zvkned_zvkb)
diff --git a/arch/riscv/crypto/aes-riscv64-zvkned.S b/arch/riscv/crypto/aes-riscv64-zvkned.S
new file mode 100644
index 000000000000..23d063f94ce6
--- /dev/null
+++ b/arch/riscv/crypto/aes-riscv64-zvkned.S
@@ -0,0 +1,339 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
+// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
+// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector AES block cipher extension ('Zvkned')
+
+#include <linux/linkage.h>
+
+.text
+.option arch, +zvkned
+
+#include "aes-macros.S"
+
+#define KEYP		a0
+#define INP		a1
+#define OUTP		a2
+#define LEN		a3
+#define IVP		a4
+
+.macro	__aes_crypt_zvkned	enc, keylen
+	vle32.v		v16, (INP)
+	aes_crypt	v16, \enc, \keylen
+	vse32.v		v16, (OUTP)
+	ret
+.endm
+
+.macro	aes_crypt_zvkned	enc
+	aes_begin	KEYP, 128f, 192f
+	__aes_crypt_zvkned	\enc, 256
+128:
+	__aes_crypt_zvkned	\enc, 128
+192:
+	__aes_crypt_zvkned	\enc, 192
+.endm
+
+// void aes_encrypt_zvkned(const struct crypto_aes_ctx *key,
+//			   const u8 in[16], u8 out[16]);
+SYM_FUNC_START(aes_encrypt_zvkned)
+	aes_crypt_zvkned	1
+SYM_FUNC_END(aes_encrypt_zvkned)
+
+// Same prototype and calling convention as the encryption function
+SYM_FUNC_START(aes_decrypt_zvkned)
+	aes_crypt_zvkned	0
+SYM_FUNC_END(aes_decrypt_zvkned)
+
+.macro	__aes_ecb_crypt	enc, keylen
+	srli		t0, LEN, 2
+	// t0 is the remaining length in 32-bit words.  It's a multiple of 4.
+1:
+	vsetvli		t1, t0, e32, m8, ta, ma
+	sub		t0, t0, t1	// Subtract number of words processed
+	slli		t1, t1, 2	// Words to bytes
+	vle32.v		v16, (INP)
+	aes_crypt	v16, \enc, \keylen
+	vse32.v		v16, (OUTP)
+	add		INP, INP, t1
+	add		OUTP, OUTP, t1
+	bnez		t0, 1b
+
+	ret
+.endm
+
+.macro	aes_ecb_crypt	enc
+	aes_begin	KEYP, 128f, 192f
+	__aes_ecb_crypt	\enc, 256
+128:
+	__aes_ecb_crypt	\enc, 128
+192:
+	__aes_ecb_crypt	\enc, 192
+.endm
+
+// void aes_ecb_encrypt_zvkned(const struct crypto_aes_ctx *key,
+//			       const u8 *in, u8 *out, size_t len);
+//
+// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE).
+SYM_FUNC_START(aes_ecb_encrypt_zvkned)
+	aes_ecb_crypt	1
+SYM_FUNC_END(aes_ecb_encrypt_zvkned)
+
+// Same prototype and calling convention as the encryption function
+SYM_FUNC_START(aes_ecb_decrypt_zvkned)
+	aes_ecb_crypt	0
+SYM_FUNC_END(aes_ecb_decrypt_zvkned)
+
+.macro	aes_cbc_encrypt	keylen
+	vle32.v		v16, (IVP)	// Load IV
+1:
+	vle32.v		v17, (INP)	// Load plaintext block
+	vxor.vv		v16, v16, v17	// XOR with IV or prev ciphertext block
+	aes_encrypt	v16, \keylen	// Encrypt
+	vse32.v		v16, (OUTP)	// Store ciphertext block
+	addi		INP, INP, 16
+	addi		OUTP, OUTP, 16
+	addi		LEN, LEN, -16
+	bnez		LEN, 1b
+
+	vse32.v		v16, (IVP)	// Store next IV
+	ret
+.endm
+
+.macro	aes_cbc_decrypt	keylen
+	srli		LEN, LEN, 2	// Convert LEN from bytes to words
+	vle32.v		v16, (IVP)	// Load IV
+1:
+	vsetvli		t0, LEN, e32, m4, ta, ma
+	vle32.v		v20, (INP)	// Load ciphertext blocks
+	vslideup.vi	v16, v20, 4	// Setup prev ciphertext blocks
+	addi		t1, t0, -4
+	vslidedown.vx	v24, v20, t1	// Save last ciphertext block
+	aes_decrypt	v20, \keylen	// Decrypt the blocks
+	vxor.vv		v20, v20, v16	// XOR with prev ciphertext blocks
+	vse32.v		v20, (OUTP)	// Store plaintext blocks
+	vmv.v.v		v16, v24	// Next "IV" is last ciphertext block
+	slli		t1, t0, 2	// Words to bytes
+	add		INP, INP, t1
+	add		OUTP, OUTP, t1
+	sub		LEN, LEN, t0
+	bnez		LEN, 1b
+
+	vsetivli	zero, 4, e32, m1, ta, ma
+	vse32.v		v16, (IVP)	// Store next IV
+	ret
+.endm
+
+// void aes_cbc_encrypt_zvkned(const struct crypto_aes_ctx *key,
+//			       const u8 *in, u8 *out, size_t len, u8 iv[16]);
+//
+// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE).
+SYM_FUNC_START(aes_cbc_encrypt_zvkned)
+	aes_begin	KEYP, 128f, 192f
+	aes_cbc_encrypt	256
+128:
+	aes_cbc_encrypt	128
+192:
+	aes_cbc_encrypt	192
+SYM_FUNC_END(aes_cbc_encrypt_zvkned)
+
+// Same prototype and calling convention as the encryption function
+SYM_FUNC_START(aes_cbc_decrypt_zvkned)
+	aes_begin	KEYP, 128f, 192f
+	aes_cbc_decrypt	256
+128:
+	aes_cbc_decrypt	128
+192:
+	aes_cbc_decrypt	192
+SYM_FUNC_END(aes_cbc_decrypt_zvkned)
+
+.macro	aes_cbc_cts_encrypt	keylen
+
+	// CBC-encrypt all blocks except the last.  But don't store the
+	// second-to-last block to the output buffer yet, since it will be
+	// handled specially in the ciphertext stealing step.  Exception: if the
+	// message is single-block, still encrypt the last (and only) block.
+	li		t0, 16
+	j		2f
+1:
+	vse32.v		v16, (OUTP)	// Store ciphertext block
+	addi		OUTP, OUTP, 16
+2:
+	vle32.v		v17, (INP)	// Load plaintext block
+	vxor.vv		v16, v16, v17	// XOR with IV or prev ciphertext block
+	aes_encrypt	v16, \keylen	// Encrypt
+	addi		INP, INP, 16
+	addi		LEN, LEN, -16
+	bgt		LEN, t0, 1b	// Repeat if more than one block remains
+
+	// Special case: if the message is a single block, just do CBC.
+	beqz		LEN, .Lcts_encrypt_done\@
+
+	// Encrypt the last two blocks using ciphertext stealing as follows:
+	//	C[n-1] = Encrypt(Encrypt(P[n-1] ^ C[n-2]) ^ P[n])
+	//	C[n] = Encrypt(P[n-1] ^ C[n-2])[0..LEN]
+	//
+	// C[i] denotes the i'th ciphertext block, and likewise P[i] the i'th
+	// plaintext block.  Block n, the last block, may be partial; its length
+	// is 1 <= LEN <= 16.  If there are only 2 blocks, C[n-2] means the IV.
+	//
+	// v16 already contains Encrypt(P[n-1] ^ C[n-2]).
+	// INP points to P[n].  OUTP points to where C[n-1] should go.
+	// To support in-place encryption, load P[n] before storing C[n].
+	addi		t0, OUTP, 16	// Get pointer to where C[n] should go
+	vsetvli		zero, LEN, e8, m1, tu, ma
+	vle8.v		v17, (INP)	// Load P[n]
+	vse8.v		v16, (t0)	// Store C[n]
+	vxor.vv		v16, v16, v17	// v16 = Encrypt(P[n-1] ^ C[n-2]) ^ P[n]
+	vsetivli	zero, 4, e32, m1, ta, ma
+	aes_encrypt	v16, \keylen
+.Lcts_encrypt_done\@:
+	vse32.v		v16, (OUTP)	// Store C[n-1] (or C[n] in single-block case)
+	ret
+.endm
+
+#define LEN32		t4 // Length of remaining full blocks in 32-bit words
+#define LEN_MOD16	t5 // Length of message in bytes mod 16
+
+.macro	aes_cbc_cts_decrypt	keylen
+	andi		LEN32, LEN, ~15
+	srli		LEN32, LEN32, 2
+	andi		LEN_MOD16, LEN, 15
+
+	// Save C[n-2] in v28 so that it's available later during the ciphertext
+	// stealing step.  If there are fewer than three blocks, C[n-2] means
+	// the IV, otherwise it means the third-to-last ciphertext block.
+	vmv.v.v		v28, v16	// IV
+	add		t0, LEN, -33
+	bltz		t0, .Lcts_decrypt_loop\@
+	andi		t0, t0, ~15
+	add		t0, t0, INP
+	vle32.v		v28, (t0)
+
+	// CBC-decrypt all full blocks.  For the last full block, or the last 2
+	// full blocks if the message is block-aligned, this doesn't write the
+	// correct output blocks (unless the message is only a single block),
+	// because it XORs the wrong values with the raw AES plaintexts.  But we
+	// fix this after this loop without redoing the AES decryptions.  This
+	// approach allows more of the AES decryptions to be parallelized.
+.Lcts_decrypt_loop\@:
+	vsetvli		t0, LEN32, e32, m4, ta, ma
+	addi		t1, t0, -4
+	vle32.v		v20, (INP)	// Load next set of ciphertext blocks
+	vmv.v.v		v24, v16	// Get IV or last ciphertext block of prev set
+	vslideup.vi	v24, v20, 4	// Setup prev ciphertext blocks
+	vslidedown.vx	v16, v20, t1	// Save last ciphertext block of this set
+	aes_decrypt	v20, \keylen	// Decrypt this set of blocks
+	vxor.vv		v24, v24, v20	// XOR prev ciphertext blocks with decrypted blocks
+	vse32.v		v24, (OUTP)	// Store this set of plaintext blocks
+	sub		LEN32, LEN32, t0
+	slli		t0, t0, 2	// Words to bytes
+	add		INP, INP, t0
+	add		OUTP, OUTP, t0
+	bnez		LEN32, .Lcts_decrypt_loop\@
+
+	vsetivli	zero, 4, e32, m4, ta, ma
+	vslidedown.vx	v20, v20, t1	// Extract raw plaintext of last full block
+	addi		t0, OUTP, -16	// Get pointer to last full plaintext block
+	bnez		LEN_MOD16, .Lcts_decrypt_non_block_aligned\@
+
+	// Special case: if the message is a single block, just do CBC.
+	li		t1, 16
+	beq		LEN, t1, .Lcts_decrypt_done\@
+
+	// Block-aligned message.  Just fix up the last 2 blocks.  We need:
+	//
+	//	P[n-1] = Decrypt(C[n]) ^ C[n-2]
+	//	P[n] = Decrypt(C[n-1]) ^ C[n]
+	//
+	// We have C[n] in v16, Decrypt(C[n]) in v20, and C[n-2] in v28.
+	// Together with Decrypt(C[n-1]) ^ C[n-2] from the output buffer, this
+	// is everything needed to fix the output without re-decrypting blocks.
+	addi		t1, OUTP, -32	// Get pointer to where P[n-1] should go
+	vxor.vv		v20, v20, v28	// Decrypt(C[n]) ^ C[n-2] == P[n-1]
+	vle32.v		v24, (t1)	// Decrypt(C[n-1]) ^ C[n-2]
+	vse32.v		v20, (t1)	// Store P[n-1]
+	vxor.vv		v20, v24, v16	// Decrypt(C[n-1]) ^ C[n-2] ^ C[n] == P[n] ^ C[n-2]
+	j		.Lcts_decrypt_finish\@
+
+.Lcts_decrypt_non_block_aligned\@:
+	// Decrypt the last two blocks using ciphertext stealing as follows:
+	//
+	//	P[n-1] = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16]) ^ C[n-2]
+	//	P[n] = (Decrypt(C[n-1]) ^ C[n])[0..LEN_MOD16]
+	//
+	// We already have Decrypt(C[n-1]) in v20 and C[n-2] in v28.
+	vmv.v.v		v16, v20	// v16 = Decrypt(C[n-1])
+	vsetvli		zero, LEN_MOD16, e8, m1, tu, ma
+	vle8.v		v20, (INP)	// v20 = C[n] || Decrypt(C[n-1])[LEN_MOD16..16]
+	vxor.vv		v16, v16, v20	// v16 = Decrypt(C[n-1]) ^ C[n]
+	vse8.v		v16, (OUTP)	// Store P[n]
+	vsetivli	zero, 4, e32, m1, ta, ma
+	aes_decrypt	v20, \keylen	// v20 = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16])
+.Lcts_decrypt_finish\@:
+	vxor.vv		v20, v20, v28	// XOR with C[n-2]
+	vse32.v		v20, (t0)	// Store last full plaintext block
+.Lcts_decrypt_done\@:
+	ret
+.endm
+
+.macro	aes_cbc_cts_crypt	keylen
+	vle32.v		v16, (IVP)	// Load IV
+	beqz		a5, .Lcts_decrypt\@
+	aes_cbc_cts_encrypt \keylen
+.Lcts_decrypt\@:
+	aes_cbc_cts_decrypt \keylen
+.endm
+
+// void aes_cbc_cts_crypt_zvkned(const struct crypto_aes_ctx *key,
+//			         const u8 *in, u8 *out, size_t len,
+//				 const u8 iv[16], bool enc);
+//
+// Encrypts or decrypts a message with the CS3 variant of AES-CBC-CTS.
+// This is the variant that unconditionally swaps the last two blocks.
+SYM_FUNC_START(aes_cbc_cts_crypt_zvkned)
+	aes_begin	KEYP, 128f, 192f
+	aes_cbc_cts_crypt 256
+128:
+	aes_cbc_cts_crypt 128
+192:
+	aes_cbc_cts_crypt 192
+SYM_FUNC_END(aes_cbc_cts_crypt_zvkned)
diff --git a/arch/riscv/crypto/chacha-riscv64-glue.c b/arch/riscv/crypto/chacha-riscv64-glue.c
new file mode 100644
index 000000000000..10b46f36375a
--- /dev/null
+++ b/arch/riscv/crypto/chacha-riscv64-glue.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ChaCha20 using the RISC-V vector crypto extensions
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+asmlinkage void chacha20_zvkb(const u32 key[8], const u8 *in, u8 *out,
+			      size_t len, const u32 iv[4]);
+
+static int riscv64_chacha20_crypt(struct skcipher_request *req)
+{
+	u32 iv[CHACHA_IV_SIZE / sizeof(u32)];
+	u8 block_buffer[CHACHA_BLOCK_SIZE];
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
+	unsigned int tail_bytes;
+	int err;
+
+	iv[0] = get_unaligned_le32(req->iv);
+	iv[1] = get_unaligned_le32(req->iv + 4);
+	iv[2] = get_unaligned_le32(req->iv + 8);
+	iv[3] = get_unaligned_le32(req->iv + 12);
+
+	err = skcipher_walk_virt(&walk, req, false);
+	while (walk.nbytes) {
+		nbytes = walk.nbytes & ~(CHACHA_BLOCK_SIZE - 1);
+		tail_bytes = walk.nbytes & (CHACHA_BLOCK_SIZE - 1);
+		kernel_vector_begin();
+		if (nbytes) {
+			chacha20_zvkb(ctx->key, walk.src.virt.addr,
+				      walk.dst.virt.addr, nbytes, iv);
+			iv[0] += nbytes / CHACHA_BLOCK_SIZE;
+		}
+		if (walk.nbytes == walk.total && tail_bytes > 0) {
+			memcpy(block_buffer, walk.src.virt.addr + nbytes,
+			       tail_bytes);
+			chacha20_zvkb(ctx->key, block_buffer, block_buffer,
+				      CHACHA_BLOCK_SIZE, iv);
+			memcpy(walk.dst.virt.addr + nbytes, block_buffer,
+			       tail_bytes);
+			tail_bytes = 0;
+		}
+		kernel_vector_end();
+
+		err = skcipher_walk_done(&walk, tail_bytes);
+	}
+
+	return err;
+}
+
+static struct skcipher_alg riscv64_chacha_alg = {
+	.setkey = chacha20_setkey,
+	.encrypt = riscv64_chacha20_crypt,
+	.decrypt = riscv64_chacha20_crypt,
+	.min_keysize = CHACHA_KEY_SIZE,
+	.max_keysize = CHACHA_KEY_SIZE,
+	.ivsize = CHACHA_IV_SIZE,
+	.chunksize = CHACHA_BLOCK_SIZE,
+	.walksize = 4 * CHACHA_BLOCK_SIZE,
+	.base = {
+		.cra_blocksize = 1,
+		.cra_ctxsize = sizeof(struct chacha_ctx),
+		.cra_priority = 300,
+		.cra_name = "chacha20",
+		.cra_driver_name = "chacha20-riscv64-zvkb",
+		.cra_module = THIS_MODULE,
+	},
+};
+
+static int __init riscv64_chacha_mod_init(void)
+{
+	if (riscv_isa_extension_available(NULL, ZVKB) &&
+	    riscv_vector_vlen() >= 128)
+		return crypto_register_skcipher(&riscv64_chacha_alg);
+
+	return -ENODEV;
+}
+
+static void __exit riscv64_chacha_mod_exit(void)
+{
+	crypto_unregister_skcipher(&riscv64_chacha_alg);
+}
+
+module_init(riscv64_chacha_mod_init);
+module_exit(riscv64_chacha_mod_exit);
+
+MODULE_DESCRIPTION("ChaCha20 (RISC-V accelerated)");
+MODULE_AUTHOR("Jerry Shih <jerry.shih@sifive.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("chacha20");
diff --git a/arch/riscv/crypto/chacha-riscv64-zvkb.S b/arch/riscv/crypto/chacha-riscv64-zvkb.S
new file mode 100644
index 000000000000..bf057737ac69
--- /dev/null
+++ b/arch/riscv/crypto/chacha-riscv64-zvkb.S
@@ -0,0 +1,294 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
+
+#include <linux/linkage.h>
+
+.text
+.option arch, +zvkb
+
+#define KEYP		a0
+#define INP		a1
+#define OUTP		a2
+#define LEN		a3
+#define IVP		a4
+
+#define CONSTS0		a5
+#define CONSTS1		a6
+#define CONSTS2		a7
+#define CONSTS3		t0
+#define TMP		t1
+#define VL		t2
+#define STRIDE		t3
+#define NROUNDS		t4
+#define KEY0		s0
+#define KEY1		s1
+#define KEY2		s2
+#define KEY3		s3
+#define KEY4		s4
+#define KEY5		s5
+#define KEY6		s6
+#define KEY7		s7
+#define COUNTER		s8
+#define NONCE0		s9
+#define NONCE1		s10
+#define NONCE2		s11
+
+.macro	chacha_round	a0, b0, c0, d0,  a1, b1, c1, d1, \
+			a2, b2, c2, d2,  a3, b3, c3, d3
+	// a += b; d ^= a; d = rol(d, 16);
+	vadd.vv		\a0, \a0, \b0
+	vadd.vv		\a1, \a1, \b1
+	vadd.vv		\a2, \a2, \b2
+	vadd.vv		\a3, \a3, \b3
+	vxor.vv		\d0, \d0, \a0
+	vxor.vv		\d1, \d1, \a1
+	vxor.vv		\d2, \d2, \a2
+	vxor.vv		\d3, \d3, \a3
+	vror.vi		\d0, \d0, 32 - 16
+	vror.vi		\d1, \d1, 32 - 16
+	vror.vi		\d2, \d2, 32 - 16
+	vror.vi		\d3, \d3, 32 - 16
+
+	// c += d; b ^= c; b = rol(b, 12);
+	vadd.vv		\c0, \c0, \d0
+	vadd.vv		\c1, \c1, \d1
+	vadd.vv		\c2, \c2, \d2
+	vadd.vv		\c3, \c3, \d3
+	vxor.vv		\b0, \b0, \c0
+	vxor.vv		\b1, \b1, \c1
+	vxor.vv		\b2, \b2, \c2
+	vxor.vv		\b3, \b3, \c3
+	vror.vi		\b0, \b0, 32 - 12
+	vror.vi		\b1, \b1, 32 - 12
+	vror.vi		\b2, \b2, 32 - 12
+	vror.vi		\b3, \b3, 32 - 12
+
+	// a += b; d ^= a; d = rol(d, 8);
+	vadd.vv		\a0, \a0, \b0
+	vadd.vv		\a1, \a1, \b1
+	vadd.vv		\a2, \a2, \b2
+	vadd.vv		\a3, \a3, \b3
+	vxor.vv		\d0, \d0, \a0
+	vxor.vv		\d1, \d1, \a1
+	vxor.vv		\d2, \d2, \a2
+	vxor.vv		\d3, \d3, \a3
+	vror.vi		\d0, \d0, 32 - 8
+	vror.vi		\d1, \d1, 32 - 8
+	vror.vi		\d2, \d2, 32 - 8
+	vror.vi		\d3, \d3, 32 - 8
+
+	// c += d; b ^= c; b = rol(b, 7);
+	vadd.vv		\c0, \c0, \d0
+	vadd.vv		\c1, \c1, \d1
+	vadd.vv		\c2, \c2, \d2
+	vadd.vv		\c3, \c3, \d3
+	vxor.vv		\b0, \b0, \c0
+	vxor.vv		\b1, \b1, \c1
+	vxor.vv		\b2, \b2, \c2
+	vxor.vv		\b3, \b3, \c3
+	vror.vi		\b0, \b0, 32 - 7
+	vror.vi		\b1, \b1, 32 - 7
+	vror.vi		\b2, \b2, 32 - 7
+	vror.vi		\b3, \b3, 32 - 7
+.endm
+
+// void chacha20_zvkb(const u32 key[8], const u8 *in, u8 *out, size_t len,
+//		      const u32 iv[4]);
+//
+// |len| must be nonzero and a multiple of 64 (CHACHA_BLOCK_SIZE).
+// The counter is treated as 32-bit, following the RFC7539 convention.
+SYM_FUNC_START(chacha20_zvkb)
+	srli		LEN, LEN, 6	// Bytes to blocks
+
+	addi		sp, sp, -96
+	sd		s0, 0(sp)
+	sd		s1, 8(sp)
+	sd		s2, 16(sp)
+	sd		s3, 24(sp)
+	sd		s4, 32(sp)
+	sd		s5, 40(sp)
+	sd		s6, 48(sp)
+	sd		s7, 56(sp)
+	sd		s8, 64(sp)
+	sd		s9, 72(sp)
+	sd		s10, 80(sp)
+	sd		s11, 88(sp)
+
+	li		STRIDE, 64
+
+	// Set up the initial state matrix in scalar registers.
+	li		CONSTS0, 0x61707865	// "expa" little endian
+	li		CONSTS1, 0x3320646e	// "nd 3" little endian
+	li		CONSTS2, 0x79622d32	// "2-by" little endian
+	li		CONSTS3, 0x6b206574	// "te k" little endian
+	lw		KEY0, 0(KEYP)
+	lw		KEY1, 4(KEYP)
+	lw		KEY2, 8(KEYP)
+	lw		KEY3, 12(KEYP)
+	lw		KEY4, 16(KEYP)
+	lw		KEY5, 20(KEYP)
+	lw		KEY6, 24(KEYP)
+	lw		KEY7, 28(KEYP)
+	lw		COUNTER, 0(IVP)
+	lw		NONCE0, 4(IVP)
+	lw		NONCE1, 8(IVP)
+	lw		NONCE2, 12(IVP)
+
+.Lblock_loop:
+	// Set vl to the number of blocks to process in this iteration.
+	vsetvli		VL, LEN, e32, m1, ta, ma
+
+	// Set up the initial state matrix for the next VL blocks in v0-v15.
+	// v{i} holds the i'th 32-bit word of the state matrix for all blocks.
+	// Note that only the counter word, at index 12, differs across blocks.
+	vmv.v.x		v0, CONSTS0
+	vmv.v.x		v1, CONSTS1
+	vmv.v.x		v2, CONSTS2
+	vmv.v.x		v3, CONSTS3
+	vmv.v.x		v4, KEY0
+	vmv.v.x		v5, KEY1
+	vmv.v.x		v6, KEY2
+	vmv.v.x		v7, KEY3
+	vmv.v.x		v8, KEY4
+	vmv.v.x		v9, KEY5
+	vmv.v.x		v10, KEY6
+	vmv.v.x		v11, KEY7
+	vid.v		v12
+	vadd.vx		v12, v12, COUNTER
+	vmv.v.x		v13, NONCE0
+	vmv.v.x		v14, NONCE1
+	vmv.v.x		v15, NONCE2
+
+	// Load the first half of the input data for each block into v16-v23.
+	// v{16+i} holds the i'th 32-bit word for all blocks.
+	vlsseg8e32.v	v16, (INP), STRIDE
+
+	li		NROUNDS, 20
+.Lnext_doubleround:
+	addi		NROUNDS, NROUNDS, -2
+	// column round
+	chacha_round	v0, v4, v8, v12, v1, v5, v9, v13, \
+			v2, v6, v10, v14, v3, v7, v11, v15
+	// diagonal round
+	chacha_round	v0, v5, v10, v15, v1, v6, v11, v12, \
+			v2, v7, v8, v13, v3, v4, v9, v14
+	bnez		NROUNDS, .Lnext_doubleround
+
+	// Load the second half of the input data for each block into v24-v31.
+	// v{24+i} holds the {8+i}'th 32-bit word for all blocks.
+	addi		TMP, INP, 32
+	vlsseg8e32.v	v24, (TMP), STRIDE
+
+	// Finalize the first half of the keystream for each block.
+	vadd.vx		v0, v0, CONSTS0
+	vadd.vx		v1, v1, CONSTS1
+	vadd.vx		v2, v2, CONSTS2
+	vadd.vx		v3, v3, CONSTS3
+	vadd.vx		v4, v4, KEY0
+	vadd.vx		v5, v5, KEY1
+	vadd.vx		v6, v6, KEY2
+	vadd.vx		v7, v7, KEY3
+
+	// Encrypt/decrypt the first half of the data for each block.
+	vxor.vv		v16, v16, v0
+	vxor.vv		v17, v17, v1
+	vxor.vv		v18, v18, v2
+	vxor.vv		v19, v19, v3
+	vxor.vv		v20, v20, v4
+	vxor.vv		v21, v21, v5
+	vxor.vv		v22, v22, v6
+	vxor.vv		v23, v23, v7
+
+	// Store the first half of the output data for each block.
+	vssseg8e32.v	v16, (OUTP), STRIDE
+
+	// Finalize the second half of the keystream for each block.
+	vadd.vx		v8, v8, KEY4
+	vadd.vx		v9, v9, KEY5
+	vadd.vx		v10, v10, KEY6
+	vadd.vx		v11, v11, KEY7
+	vid.v		v0
+	vadd.vx		v12, v12, COUNTER
+	vadd.vx		v13, v13, NONCE0
+	vadd.vx		v14, v14, NONCE1
+	vadd.vx		v15, v15, NONCE2
+	vadd.vv		v12, v12, v0
+
+	// Encrypt/decrypt the second half of the data for each block.
+	vxor.vv		v24, v24, v8
+	vxor.vv		v25, v25, v9
+	vxor.vv		v26, v26, v10
+	vxor.vv		v27, v27, v11
+	vxor.vv		v29, v29, v13
+	vxor.vv		v28, v28, v12
+	vxor.vv		v30, v30, v14
+	vxor.vv		v31, v31, v15
+
+	// Store the second half of the output data for each block.
+	addi		TMP, OUTP, 32
+	vssseg8e32.v	v24, (TMP), STRIDE
+
+	// Update the counter, the remaining number of blocks, and the input and
+	// output pointers according to the number of blocks processed (VL).
+	add		COUNTER, COUNTER, VL
+	sub		LEN, LEN, VL
+	slli		TMP, VL, 6
+	add		OUTP, OUTP, TMP
+	add		INP, INP, TMP
+	bnez		LEN, .Lblock_loop
+
+	ld		s0, 0(sp)
+	ld		s1, 8(sp)
+	ld		s2, 16(sp)
+	ld		s3, 24(sp)
+	ld		s4, 32(sp)
+	ld		s5, 40(sp)
+	ld		s6, 48(sp)
+	ld		s7, 56(sp)
+	ld		s8, 64(sp)
+	ld		s9, 72(sp)
+	ld		s10, 80(sp)
+	ld		s11, 88(sp)
+	addi		sp, sp, 96
+	ret
+SYM_FUNC_END(chacha20_zvkb)
diff --git a/arch/riscv/crypto/ghash-riscv64-glue.c b/arch/riscv/crypto/ghash-riscv64-glue.c
new file mode 100644
index 000000000000..312e7891fd0a
--- /dev/null
+++ b/arch/riscv/crypto/ghash-riscv64-glue.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * GHASH using the RISC-V vector crypto extensions
+ *
+ * Copyright (C) 2023 VRULL GmbH
+ * Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/ghash.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+asmlinkage void ghash_zvkg(be128 *accumulator, const be128 *key, const u8 *data,
+			   size_t len);
+
+struct riscv64_ghash_tfm_ctx {
+	be128 key;
+};
+
+struct riscv64_ghash_desc_ctx {
+	be128 accumulator;
+	u8 buffer[GHASH_BLOCK_SIZE];
+	u32 bytes;
+};
+
+static int riscv64_ghash_setkey(struct crypto_shash *tfm, const u8 *key,
+				unsigned int keylen)
+{
+	struct riscv64_ghash_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+
+	if (keylen != GHASH_BLOCK_SIZE)
+		return -EINVAL;
+
+	memcpy(&tctx->key, key, GHASH_BLOCK_SIZE);
+
+	return 0;
+}
+
+static int riscv64_ghash_init(struct shash_desc *desc)
+{
+	struct riscv64_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	*dctx = (struct riscv64_ghash_desc_ctx){};
+
+	return 0;
+}
+
+static inline void
+riscv64_ghash_blocks(const struct riscv64_ghash_tfm_ctx *tctx,
+		     struct riscv64_ghash_desc_ctx *dctx,
+		     const u8 *src, size_t srclen)
+{
+	/* The srclen is nonzero and a multiple of 16. */
+	if (crypto_simd_usable()) {
+		kernel_vector_begin();
+		ghash_zvkg(&dctx->accumulator, &tctx->key, src, srclen);
+		kernel_vector_end();
+	} else {
+		do {
+			crypto_xor((u8 *)&dctx->accumulator, src,
+				   GHASH_BLOCK_SIZE);
+			gf128mul_lle(&dctx->accumulator, &tctx->key);
+			src += GHASH_BLOCK_SIZE;
+			srclen -= GHASH_BLOCK_SIZE;
+		} while (srclen);
+	}
+}
+
+static int riscv64_ghash_update(struct shash_desc *desc, const u8 *src,
+				unsigned int srclen)
+{
+	const struct riscv64_ghash_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct riscv64_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+	unsigned int len;
+
+	if (dctx->bytes) {
+		if (dctx->bytes + srclen < GHASH_BLOCK_SIZE) {
+			memcpy(dctx->buffer + dctx->bytes, src, srclen);
+			dctx->bytes += srclen;
+			return 0;
+		}
+		memcpy(dctx->buffer + dctx->bytes, src,
+		       GHASH_BLOCK_SIZE - dctx->bytes);
+		riscv64_ghash_blocks(tctx, dctx, dctx->buffer,
+				     GHASH_BLOCK_SIZE);
+		src += GHASH_BLOCK_SIZE - dctx->bytes;
+		srclen -= GHASH_BLOCK_SIZE - dctx->bytes;
+		dctx->bytes = 0;
+	}
+
+	len = round_down(srclen, GHASH_BLOCK_SIZE);
+	if (len) {
+		riscv64_ghash_blocks(tctx, dctx, src, len);
+		src += len;
+		srclen -= len;
+	}
+
+	if (srclen) {
+		memcpy(dctx->buffer, src, srclen);
+		dctx->bytes = srclen;
+	}
+
+	return 0;
+}
+
+static int riscv64_ghash_final(struct shash_desc *desc, u8 *out)
+{
+	const struct riscv64_ghash_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct riscv64_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+	int i;
+
+	if (dctx->bytes) {
+		for (i = dctx->bytes; i < GHASH_BLOCK_SIZE; i++)
+			dctx->buffer[i] = 0;
+
+		riscv64_ghash_blocks(tctx, dctx, dctx->buffer,
+				     GHASH_BLOCK_SIZE);
+	}
+
+	memcpy(out, &dctx->accumulator, GHASH_DIGEST_SIZE);
+	return 0;
+}
+
+static struct shash_alg riscv64_ghash_alg = {
+	.init = riscv64_ghash_init,
+	.update = riscv64_ghash_update,
+	.final = riscv64_ghash_final,
+	.setkey = riscv64_ghash_setkey,
+	.descsize = sizeof(struct riscv64_ghash_desc_ctx),
+	.digestsize = GHASH_DIGEST_SIZE,
+	.base = {
+		.cra_blocksize = GHASH_BLOCK_SIZE,
+		.cra_ctxsize = sizeof(struct riscv64_ghash_tfm_ctx),
+		.cra_priority = 300,
+		.cra_name = "ghash",
+		.cra_driver_name = "ghash-riscv64-zvkg",
+		.cra_module = THIS_MODULE,
+	},
+};
+
+static int __init riscv64_ghash_mod_init(void)
+{
+	if (riscv_isa_extension_available(NULL, ZVKG) &&
+	    riscv_vector_vlen() >= 128)
+		return crypto_register_shash(&riscv64_ghash_alg);
+
+	return -ENODEV;
+}
+
+static void __exit riscv64_ghash_mod_exit(void)
+{
+	crypto_unregister_shash(&riscv64_ghash_alg);
+}
+
+module_init(riscv64_ghash_mod_init);
+module_exit(riscv64_ghash_mod_exit);
+
+MODULE_DESCRIPTION("GHASH (RISC-V accelerated)");
+MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("ghash");
diff --git a/arch/riscv/crypto/ghash-riscv64-zvkg.S b/arch/riscv/crypto/ghash-riscv64-zvkg.S
new file mode 100644
index 000000000000..f2b43fb4d434
--- /dev/null
+++ b/arch/riscv/crypto/ghash-riscv64-zvkg.S
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
+// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector GCM/GMAC extension ('Zvkg')
+
+#include <linux/linkage.h>
+
+.text
+.option arch, +zvkg
+
+#define ACCUMULATOR	a0
+#define KEY		a1
+#define DATA		a2
+#define LEN		a3
+
+// void ghash_zvkg(be128 *accumulator, const be128 *key, const u8 *data,
+//		   size_t len);
+//
+// |len| must be nonzero and a multiple of 16 (GHASH_BLOCK_SIZE).
+SYM_FUNC_START(ghash_zvkg)
+	vsetivli	zero, 4, e32, m1, ta, ma
+	vle32.v		v1, (ACCUMULATOR)
+	vle32.v		v2, (KEY)
+.Lnext_block:
+	vle32.v		v3, (DATA)
+	vghsh.vv	v1, v2, v3
+	addi		DATA, DATA, 16
+	addi		LEN, LEN, -16
+	bnez		LEN, .Lnext_block
+
+	vse32.v		v1, (ACCUMULATOR)
+	ret
+SYM_FUNC_END(ghash_zvkg)
diff --git a/arch/riscv/crypto/sha256-riscv64-glue.c b/arch/riscv/crypto/sha256-riscv64-glue.c
new file mode 100644
index 000000000000..71e051e40a64
--- /dev/null
+++ b/arch/riscv/crypto/sha256-riscv64-glue.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SHA-256 and SHA-224 using the RISC-V vector crypto extensions
+ *
+ * Copyright (C) 2022 VRULL GmbH
+ * Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
+#include <crypto/sha256_base.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+/*
+ * Note: the asm function only uses the 'state' field of struct sha256_state.
+ * It is assumed to be the first field.
+ */
+asmlinkage void sha256_transform_zvknha_or_zvknhb_zvkb(
+	struct sha256_state *state, const u8 *data, int num_blocks);
+
+static int riscv64_sha256_update(struct shash_desc *desc, const u8 *data,
+				 unsigned int len)
+{
+	/*
+	 * Ensure struct sha256_state begins directly with the SHA-256
+	 * 256-bit internal state, as this is what the asm function expects.
+	 */
+	BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
+
+	if (crypto_simd_usable()) {
+		kernel_vector_begin();
+		sha256_base_do_update(desc, data, len,
+				      sha256_transform_zvknha_or_zvknhb_zvkb);
+		kernel_vector_end();
+	} else {
+		crypto_sha256_update(desc, data, len);
+	}
+	return 0;
+}
+
+static int riscv64_sha256_finup(struct shash_desc *desc, const u8 *data,
+				unsigned int len, u8 *out)
+{
+	if (crypto_simd_usable()) {
+		kernel_vector_begin();
+		if (len)
+			sha256_base_do_update(
+				desc, data, len,
+				sha256_transform_zvknha_or_zvknhb_zvkb);
+		sha256_base_do_finalize(
+			desc, sha256_transform_zvknha_or_zvknhb_zvkb);
+		kernel_vector_end();
+
+		return sha256_base_finish(desc, out);
+	}
+
+	return crypto_sha256_finup(desc, data, len, out);
+}
+
+static int riscv64_sha256_final(struct shash_desc *desc, u8 *out)
+{
+	return riscv64_sha256_finup(desc, NULL, 0, out);
+}
+
+static int riscv64_sha256_digest(struct shash_desc *desc, const u8 *data,
+				 unsigned int len, u8 *out)
+{
+	return sha256_base_init(desc) ?:
+	       riscv64_sha256_finup(desc, data, len, out);
+}
+
+static struct shash_alg riscv64_sha256_algs[] = {
+	{
+		.init = sha256_base_init,
+		.update = riscv64_sha256_update,
+		.final = riscv64_sha256_final,
+		.finup = riscv64_sha256_finup,
+		.digest = riscv64_sha256_digest,
+		.descsize = sizeof(struct sha256_state),
+		.digestsize = SHA256_DIGEST_SIZE,
+		.base = {
+			.cra_blocksize = SHA256_BLOCK_SIZE,
+			.cra_priority = 300,
+			.cra_name = "sha256",
+			.cra_driver_name = "sha256-riscv64-zvknha_or_zvknhb-zvkb",
+			.cra_module = THIS_MODULE,
+		},
+	}, {
+		.init = sha224_base_init,
+		.update = riscv64_sha256_update,
+		.final = riscv64_sha256_final,
+		.finup = riscv64_sha256_finup,
+		.descsize = sizeof(struct sha256_state),
+		.digestsize = SHA224_DIGEST_SIZE,
+		.base = {
+			.cra_blocksize = SHA224_BLOCK_SIZE,
+			.cra_priority = 300,
+			.cra_name = "sha224",
+			.cra_driver_name = "sha224-riscv64-zvknha_or_zvknhb-zvkb",
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int __init riscv64_sha256_mod_init(void)
+{
+	/* Both zvknha and zvknhb provide the SHA-256 instructions. */
+	if ((riscv_isa_extension_available(NULL, ZVKNHA) ||
+	     riscv_isa_extension_available(NULL, ZVKNHB)) &&
+	    riscv_isa_extension_available(NULL, ZVKB) &&
+	    riscv_vector_vlen() >= 128)
+		return crypto_register_shashes(riscv64_sha256_algs,
+					       ARRAY_SIZE(riscv64_sha256_algs));
+
+	return -ENODEV;
+}
+
+static void __exit riscv64_sha256_mod_exit(void)
+{
+	crypto_unregister_shashes(riscv64_sha256_algs,
+				  ARRAY_SIZE(riscv64_sha256_algs));
+}
+
+module_init(riscv64_sha256_mod_init);
+module_exit(riscv64_sha256_mod_exit);
+
+MODULE_DESCRIPTION("SHA-256 (RISC-V accelerated)");
+MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("sha256");
+MODULE_ALIAS_CRYPTO("sha224");
diff --git a/arch/riscv/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S b/arch/riscv/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S
new file mode 100644
index 000000000000..8ebcc17de4dc
--- /dev/null
+++ b/arch/riscv/crypto/sha256-riscv64-zvknha_or_zvknhb-zvkb.S
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
+// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector SHA-2 Secure Hash extension ('Zvknha' or 'Zvknhb')
+// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
+
+#include <linux/cfi_types.h>
+
+.text
+.option arch, +zvknha, +zvkb
+
+#define STATEP		a0
+#define DATA		a1
+#define NUM_BLOCKS	a2
+
+#define STATEP_C	a3
+
+#define MASK		v0
+#define INDICES		v1
+#define W0		v2
+#define W1		v3
+#define W2		v4
+#define W3		v5
+#define VTMP		v6
+#define FEBA		v7
+#define HGDC		v8
+#define K0		v10
+#define K1		v11
+#define K2		v12
+#define K3		v13
+#define K4		v14
+#define K5		v15
+#define K6		v16
+#define K7		v17
+#define K8		v18
+#define K9		v19
+#define K10		v20
+#define K11		v21
+#define K12		v22
+#define K13		v23
+#define K14		v24
+#define K15		v25
+#define PREV_FEBA	v26
+#define PREV_HGDC	v27
+
+// Do 4 rounds of SHA-256.  w0 contains the current 4 message schedule words.
+//
+// If not all the message schedule words have been computed yet, then this also
+// computes 4 more message schedule words.  w1-w3 contain the next 3 groups of 4
+// message schedule words; this macro computes the group after w3 and writes it
+// to w0.  This means that the next (w0, w1, w2, w3) is the current (w1, w2, w3,
+// w0), so the caller must cycle through the registers accordingly.
+.macro	sha256_4rounds	last, k, w0, w1, w2, w3
+	vadd.vv		VTMP, \k, \w0
+	vsha2cl.vv	HGDC, FEBA, VTMP
+	vsha2ch.vv	FEBA, HGDC, VTMP
+.if !\last
+	vmerge.vvm	VTMP, \w2, \w1, MASK
+	vsha2ms.vv	\w0, VTMP, \w3
+.endif
+.endm
+
+.macro	sha256_16rounds	last, k0, k1, k2, k3
+	sha256_4rounds	\last, \k0, W0, W1, W2, W3
+	sha256_4rounds	\last, \k1, W1, W2, W3, W0
+	sha256_4rounds	\last, \k2, W2, W3, W0, W1
+	sha256_4rounds	\last, \k3, W3, W0, W1, W2
+.endm
+
+// void sha256_transform_zvknha_or_zvknhb_zvkb(u32 state[8], const u8 *data,
+//					       int num_blocks);
+SYM_TYPED_FUNC_START(sha256_transform_zvknha_or_zvknhb_zvkb)
+
+	// Load the round constants into K0-K15.
+	vsetivli	zero, 4, e32, m1, ta, ma
+	la		t0, K256
+	vle32.v		K0, (t0)
+	addi		t0, t0, 16
+	vle32.v		K1, (t0)
+	addi		t0, t0, 16
+	vle32.v		K2, (t0)
+	addi		t0, t0, 16
+	vle32.v		K3, (t0)
+	addi		t0, t0, 16
+	vle32.v		K4, (t0)
+	addi		t0, t0, 16
+	vle32.v		K5, (t0)
+	addi		t0, t0, 16
+	vle32.v		K6, (t0)
+	addi		t0, t0, 16
+	vle32.v		K7, (t0)
+	addi		t0, t0, 16
+	vle32.v		K8, (t0)
+	addi		t0, t0, 16
+	vle32.v		K9, (t0)
+	addi		t0, t0, 16
+	vle32.v		K10, (t0)
+	addi		t0, t0, 16
+	vle32.v		K11, (t0)
+	addi		t0, t0, 16
+	vle32.v		K12, (t0)
+	addi		t0, t0, 16
+	vle32.v		K13, (t0)
+	addi		t0, t0, 16
+	vle32.v		K14, (t0)
+	addi		t0, t0, 16
+	vle32.v		K15, (t0)
+
+	// Setup mask for the vmerge to replace the first word (idx==0) in
+	// message scheduling.  There are 4 words, so an 8-bit mask suffices.
+	vsetivli	zero, 1, e8, m1, ta, ma
+	vmv.v.i		MASK, 0x01
+
+	// Load the state.  The state is stored as {a,b,c,d,e,f,g,h}, but we
+	// need {f,e,b,a},{h,g,d,c}.  The dst vtype is e32m1 and the index vtype
+	// is e8mf4.  We use index-load with the i8 indices {20, 16, 4, 0},
+	// loaded using the 32-bit little endian value 0x00041014.
+	li		t0, 0x00041014
+	vsetivli	zero, 1, e32, m1, ta, ma
+	vmv.v.x		INDICES, t0
+	addi		STATEP_C, STATEP, 8
+	vsetivli	zero, 4, e32, m1, ta, ma
+	vluxei8.v	FEBA, (STATEP), INDICES
+	vluxei8.v	HGDC, (STATEP_C), INDICES
+
+.Lnext_block:
+	addi		NUM_BLOCKS, NUM_BLOCKS, -1
+
+	// Save the previous state, as it's needed later.
+	vmv.v.v		PREV_FEBA, FEBA
+	vmv.v.v		PREV_HGDC, HGDC
+
+	// Load the next 512-bit message block and endian-swap each 32-bit word.
+	vle32.v		W0, (DATA)
+	vrev8.v		W0, W0
+	addi		DATA, DATA, 16
+	vle32.v		W1, (DATA)
+	vrev8.v		W1, W1
+	addi		DATA, DATA, 16
+	vle32.v		W2, (DATA)
+	vrev8.v		W2, W2
+	addi		DATA, DATA, 16
+	vle32.v		W3, (DATA)
+	vrev8.v		W3, W3
+	addi		DATA, DATA, 16
+
+	// Do the 64 rounds of SHA-256.
+	sha256_16rounds	0, K0, K1, K2, K3
+	sha256_16rounds	0, K4, K5, K6, K7
+	sha256_16rounds	0, K8, K9, K10, K11
+	sha256_16rounds	1, K12, K13, K14, K15
+
+	// Add the previous state.
+	vadd.vv		FEBA, FEBA, PREV_FEBA
+	vadd.vv		HGDC, HGDC, PREV_HGDC
+
+	// Repeat if more blocks remain.
+	bnez		NUM_BLOCKS, .Lnext_block
+
+	// Store the new state and return.
+	vsuxei8.v	FEBA, (STATEP), INDICES
+	vsuxei8.v	HGDC, (STATEP_C), INDICES
+	ret
+SYM_FUNC_END(sha256_transform_zvknha_or_zvknhb_zvkb)
+
+.section ".rodata"
+.p2align 2
+.type K256, @object
+K256:
+	.word		0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
+	.word		0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
+	.word		0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
+	.word		0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
+	.word		0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
+	.word		0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
+	.word		0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
+	.word		0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
+	.word		0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
+	.word		0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
+	.word		0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
+	.word		0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
+	.word		0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
+	.word		0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
+	.word		0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
+	.word		0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+.size K256, . - K256
diff --git a/arch/riscv/crypto/sha512-riscv64-glue.c b/arch/riscv/crypto/sha512-riscv64-glue.c
new file mode 100644
index 000000000000..43b56a08aeb5
--- /dev/null
+++ b/arch/riscv/crypto/sha512-riscv64-glue.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SHA-512 and SHA-384 using the RISC-V vector crypto extensions
+ *
+ * Copyright (C) 2023 VRULL GmbH
+ * Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
+#include <crypto/sha512_base.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+/*
+ * Note: the asm function only uses the 'state' field of struct sha512_state.
+ * It is assumed to be the first field.
+ */
+asmlinkage void sha512_transform_zvknhb_zvkb(
+	struct sha512_state *state, const u8 *data, int num_blocks);
+
+static int riscv64_sha512_update(struct shash_desc *desc, const u8 *data,
+				 unsigned int len)
+{
+	/*
+	 * Ensure struct sha512_state begins directly with the SHA-512
+	 * 512-bit internal state, as this is what the asm function expects.
+	 */
+	BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0);
+
+	if (crypto_simd_usable()) {
+		kernel_vector_begin();
+		sha512_base_do_update(desc, data, len,
+				      sha512_transform_zvknhb_zvkb);
+		kernel_vector_end();
+	} else {
+		crypto_sha512_update(desc, data, len);
+	}
+	return 0;
+}
+
+static int riscv64_sha512_finup(struct shash_desc *desc, const u8 *data,
+				unsigned int len, u8 *out)
+{
+	if (crypto_simd_usable()) {
+		kernel_vector_begin();
+		if (len)
+			sha512_base_do_update(desc, data, len,
+					      sha512_transform_zvknhb_zvkb);
+		sha512_base_do_finalize(desc, sha512_transform_zvknhb_zvkb);
+		kernel_vector_end();
+
+		return sha512_base_finish(desc, out);
+	}
+
+	return crypto_sha512_finup(desc, data, len, out);
+}
+
+static int riscv64_sha512_final(struct shash_desc *desc, u8 *out)
+{
+	return riscv64_sha512_finup(desc, NULL, 0, out);
+}
+
+static int riscv64_sha512_digest(struct shash_desc *desc, const u8 *data,
+				 unsigned int len, u8 *out)
+{
+	return sha512_base_init(desc) ?:
+	       riscv64_sha512_finup(desc, data, len, out);
+}
+
+static struct shash_alg riscv64_sha512_algs[] = {
+	{
+		.init = sha512_base_init,
+		.update = riscv64_sha512_update,
+		.final = riscv64_sha512_final,
+		.finup = riscv64_sha512_finup,
+		.digest = riscv64_sha512_digest,
+		.descsize = sizeof(struct sha512_state),
+		.digestsize = SHA512_DIGEST_SIZE,
+		.base = {
+			.cra_blocksize = SHA512_BLOCK_SIZE,
+			.cra_priority = 300,
+			.cra_name = "sha512",
+			.cra_driver_name = "sha512-riscv64-zvknhb-zvkb",
+			.cra_module = THIS_MODULE,
+		},
+	}, {
+		.init = sha384_base_init,
+		.update = riscv64_sha512_update,
+		.final = riscv64_sha512_final,
+		.finup = riscv64_sha512_finup,
+		.descsize = sizeof(struct sha512_state),
+		.digestsize = SHA384_DIGEST_SIZE,
+		.base = {
+			.cra_blocksize = SHA384_BLOCK_SIZE,
+			.cra_priority = 300,
+			.cra_name = "sha384",
+			.cra_driver_name = "sha384-riscv64-zvknhb-zvkb",
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int __init riscv64_sha512_mod_init(void)
+{
+	if (riscv_isa_extension_available(NULL, ZVKNHB) &&
+	    riscv_isa_extension_available(NULL, ZVKB) &&
+	    riscv_vector_vlen() >= 128)
+		return crypto_register_shashes(riscv64_sha512_algs,
+					       ARRAY_SIZE(riscv64_sha512_algs));
+
+	return -ENODEV;
+}
+
+static void __exit riscv64_sha512_mod_exit(void)
+{
+	crypto_unregister_shashes(riscv64_sha512_algs,
+				  ARRAY_SIZE(riscv64_sha512_algs));
+}
+
+module_init(riscv64_sha512_mod_init);
+module_exit(riscv64_sha512_mod_exit);
+
+MODULE_DESCRIPTION("SHA-512 (RISC-V accelerated)");
+MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("sha512");
+MODULE_ALIAS_CRYPTO("sha384");
diff --git a/arch/riscv/crypto/sha512-riscv64-zvknhb-zvkb.S b/arch/riscv/crypto/sha512-riscv64-zvknhb-zvkb.S
new file mode 100644
index 000000000000..3a9ae210f915
--- /dev/null
+++ b/arch/riscv/crypto/sha512-riscv64-zvknhb-zvkb.S
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
+// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector SHA-2 Secure Hash extension ('Zvknhb')
+// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
+
+#include <linux/cfi_types.h>
+
+.text
+.option arch, +zvknhb, +zvkb
+
+#define STATEP		a0
+#define DATA		a1
+#define NUM_BLOCKS	a2
+
+#define STATEP_C	a3
+#define K		a4
+
+#define MASK		v0
+#define INDICES		v1
+#define W0		v10	// LMUL=2
+#define W1		v12	// LMUL=2
+#define W2		v14	// LMUL=2
+#define W3		v16	// LMUL=2
+#define VTMP		v20	// LMUL=2
+#define FEBA		v22	// LMUL=2
+#define HGDC		v24	// LMUL=2
+#define PREV_FEBA	v26	// LMUL=2
+#define PREV_HGDC	v28	// LMUL=2
+
+// Do 4 rounds of SHA-512.  w0 contains the current 4 message schedule words.
+//
+// If not all the message schedule words have been computed yet, then this also
+// computes 4 more message schedule words.  w1-w3 contain the next 3 groups of 4
+// message schedule words; this macro computes the group after w3 and writes it
+// to w0.  This means that the next (w0, w1, w2, w3) is the current (w1, w2, w3,
+// w0), so the caller must cycle through the registers accordingly.
+.macro	sha512_4rounds	last, w0, w1, w2, w3
+	vle64.v		VTMP, (K)
+	addi		K, K, 32
+	vadd.vv		VTMP, VTMP, \w0
+	vsha2cl.vv	HGDC, FEBA, VTMP
+	vsha2ch.vv	FEBA, HGDC, VTMP
+.if !\last
+	vmerge.vvm	VTMP, \w2, \w1, MASK
+	vsha2ms.vv	\w0, VTMP, \w3
+.endif
+.endm
+
+.macro	sha512_16rounds	last
+	sha512_4rounds	\last, W0, W1, W2, W3
+	sha512_4rounds	\last, W1, W2, W3, W0
+	sha512_4rounds	\last, W2, W3, W0, W1
+	sha512_4rounds	\last, W3, W0, W1, W2
+.endm
+
+// void sha512_transform_zvknhb_zvkb(u64 state[8], const u8 *data,
+//				     int num_blocks);
+SYM_TYPED_FUNC_START(sha512_transform_zvknhb_zvkb)
+
+	// Setup mask for the vmerge to replace the first word (idx==0) in
+	// message scheduling.  There are 4 words, so an 8-bit mask suffices.
+	vsetivli	zero, 1, e8, m1, ta, ma
+	vmv.v.i		MASK, 0x01
+
+	// Load the state.  The state is stored as {a,b,c,d,e,f,g,h}, but we
+	// need {f,e,b,a},{h,g,d,c}.  The dst vtype is e64m2 and the index vtype
+	// is e8mf4.  We use index-load with the i8 indices {40, 32, 8, 0},
+	// loaded using the 32-bit little endian value 0x00082028.
+	li		t0, 0x00082028
+	vsetivli	zero, 1, e32, m1, ta, ma
+	vmv.v.x		INDICES, t0
+	addi		STATEP_C, STATEP, 16
+	vsetivli	zero, 4, e64, m2, ta, ma
+	vluxei8.v	FEBA, (STATEP), INDICES
+	vluxei8.v	HGDC, (STATEP_C), INDICES
+
+.Lnext_block:
+	la		K, K512
+	addi		NUM_BLOCKS, NUM_BLOCKS, -1
+
+	// Save the previous state, as it's needed later.
+	vmv.v.v		PREV_FEBA, FEBA
+	vmv.v.v		PREV_HGDC, HGDC
+
+	// Load the next 1024-bit message block and endian-swap each 64-bit word
+	vle64.v		W0, (DATA)
+	vrev8.v		W0, W0
+	addi		DATA, DATA, 32
+	vle64.v		W1, (DATA)
+	vrev8.v		W1, W1
+	addi		DATA, DATA, 32
+	vle64.v		W2, (DATA)
+	vrev8.v		W2, W2
+	addi		DATA, DATA, 32
+	vle64.v		W3, (DATA)
+	vrev8.v		W3, W3
+	addi		DATA, DATA, 32
+
+	// Do the 80 rounds of SHA-512.
+	sha512_16rounds 0
+	sha512_16rounds 0
+	sha512_16rounds 0
+	sha512_16rounds 0
+	sha512_16rounds 1
+
+	// Add the previous state.
+	vadd.vv		FEBA, FEBA, PREV_FEBA
+	vadd.vv		HGDC, HGDC, PREV_HGDC
+
+	// Repeat if more blocks remain.
+	bnez		NUM_BLOCKS, .Lnext_block
+
+	// Store the new state and return.
+	vsuxei8.v	FEBA, (STATEP), INDICES
+	vsuxei8.v	HGDC, (STATEP_C), INDICES
+	ret
+SYM_FUNC_END(sha512_transform_zvknhb_zvkb)
+
+.section ".rodata"
+.p2align 3
+.type K512, @object
+K512:
+	.dword		0x428a2f98d728ae22, 0x7137449123ef65cd
+	.dword		0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc
+	.dword		0x3956c25bf348b538, 0x59f111f1b605d019
+	.dword		0x923f82a4af194f9b, 0xab1c5ed5da6d8118
+	.dword		0xd807aa98a3030242, 0x12835b0145706fbe
+	.dword		0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2
+	.dword		0x72be5d74f27b896f, 0x80deb1fe3b1696b1
+	.dword		0x9bdc06a725c71235, 0xc19bf174cf692694
+	.dword		0xe49b69c19ef14ad2, 0xefbe4786384f25e3
+	.dword		0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65
+	.dword		0x2de92c6f592b0275, 0x4a7484aa6ea6e483
+	.dword		0x5cb0a9dcbd41fbd4, 0x76f988da831153b5
+	.dword		0x983e5152ee66dfab, 0xa831c66d2db43210
+	.dword		0xb00327c898fb213f, 0xbf597fc7beef0ee4
+	.dword		0xc6e00bf33da88fc2, 0xd5a79147930aa725
+	.dword		0x06ca6351e003826f, 0x142929670a0e6e70
+	.dword		0x27b70a8546d22ffc, 0x2e1b21385c26c926
+	.dword		0x4d2c6dfc5ac42aed, 0x53380d139d95b3df
+	.dword		0x650a73548baf63de, 0x766a0abb3c77b2a8
+	.dword		0x81c2c92e47edaee6, 0x92722c851482353b
+	.dword		0xa2bfe8a14cf10364, 0xa81a664bbc423001
+	.dword		0xc24b8b70d0f89791, 0xc76c51a30654be30
+	.dword		0xd192e819d6ef5218, 0xd69906245565a910
+	.dword		0xf40e35855771202a, 0x106aa07032bbd1b8
+	.dword		0x19a4c116b8d2d0c8, 0x1e376c085141ab53
+	.dword		0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8
+	.dword		0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb
+	.dword		0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3
+	.dword		0x748f82ee5defb2fc, 0x78a5636f43172f60
+	.dword		0x84c87814a1f0ab72, 0x8cc702081a6439ec
+	.dword		0x90befffa23631e28, 0xa4506cebde82bde9
+	.dword		0xbef9a3f7b2c67915, 0xc67178f2e372532b
+	.dword		0xca273eceea26619c, 0xd186b8c721c0c207
+	.dword		0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178
+	.dword		0x06f067aa72176fba, 0x0a637dc5a2c898a6
+	.dword		0x113f9804bef90dae, 0x1b710b35131c471b
+	.dword		0x28db77f523047d84, 0x32caab7b40c72493
+	.dword		0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c
+	.dword		0x4cc5d4becb3e42b6, 0x597f299cfc657e2a
+	.dword		0x5fcb6fab3ad6faec, 0x6c44198c4a475817
+.size K512, . - K512
diff --git a/arch/riscv/crypto/sm3-riscv64-glue.c b/arch/riscv/crypto/sm3-riscv64-glue.c
new file mode 100644
index 000000000000..e1737a970c7c
--- /dev/null
+++ b/arch/riscv/crypto/sm3-riscv64-glue.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SM3 using the RISC-V vector crypto extensions
+ *
+ * Copyright (C) 2023 VRULL GmbH
+ * Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
+#include <crypto/sm3_base.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+/*
+ * Note: the asm function only uses the 'state' field of struct sm3_state.
+ * It is assumed to be the first field.
+ */
+asmlinkage void sm3_transform_zvksh_zvkb(
+	struct sm3_state *state, const u8 *data, int num_blocks);
+
+static int riscv64_sm3_update(struct shash_desc *desc, const u8 *data,
+			      unsigned int len)
+{
+	/*
+	 * Ensure struct sm3_state begins directly with the SM3
+	 * 256-bit internal state, as this is what the asm function expects.
+	 */
+	BUILD_BUG_ON(offsetof(struct sm3_state, state) != 0);
+
+	if (crypto_simd_usable()) {
+		kernel_vector_begin();
+		sm3_base_do_update(desc, data, len, sm3_transform_zvksh_zvkb);
+		kernel_vector_end();
+	} else {
+		sm3_update(shash_desc_ctx(desc), data, len);
+	}
+	return 0;
+}
+
+static int riscv64_sm3_finup(struct shash_desc *desc, const u8 *data,
+			     unsigned int len, u8 *out)
+{
+	struct sm3_state *ctx;
+
+	if (crypto_simd_usable()) {
+		kernel_vector_begin();
+		if (len)
+			sm3_base_do_update(desc, data, len,
+					   sm3_transform_zvksh_zvkb);
+		sm3_base_do_finalize(desc, sm3_transform_zvksh_zvkb);
+		kernel_vector_end();
+
+		return sm3_base_finish(desc, out);
+	}
+
+	ctx = shash_desc_ctx(desc);
+	if (len)
+		sm3_update(ctx, data, len);
+	sm3_final(ctx, out);
+
+	return 0;
+}
+
+static int riscv64_sm3_final(struct shash_desc *desc, u8 *out)
+{
+	return riscv64_sm3_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg riscv64_sm3_alg = {
+	.init = sm3_base_init,
+	.update = riscv64_sm3_update,
+	.final = riscv64_sm3_final,
+	.finup = riscv64_sm3_finup,
+	.descsize = sizeof(struct sm3_state),
+	.digestsize = SM3_DIGEST_SIZE,
+	.base = {
+		.cra_blocksize = SM3_BLOCK_SIZE,
+		.cra_priority = 300,
+		.cra_name = "sm3",
+		.cra_driver_name = "sm3-riscv64-zvksh-zvkb",
+		.cra_module = THIS_MODULE,
+	},
+};
+
+static int __init riscv64_sm3_mod_init(void)
+{
+	if (riscv_isa_extension_available(NULL, ZVKSH) &&
+	    riscv_isa_extension_available(NULL, ZVKB) &&
+	    riscv_vector_vlen() >= 128)
+		return crypto_register_shash(&riscv64_sm3_alg);
+
+	return -ENODEV;
+}
+
+static void __exit riscv64_sm3_mod_exit(void)
+{
+	crypto_unregister_shash(&riscv64_sm3_alg);
+}
+
+module_init(riscv64_sm3_mod_init);
+module_exit(riscv64_sm3_mod_exit);
+
+MODULE_DESCRIPTION("SM3 (RISC-V accelerated)");
+MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("sm3");
diff --git a/arch/riscv/crypto/sm3-riscv64-zvksh-zvkb.S b/arch/riscv/crypto/sm3-riscv64-zvksh-zvkb.S
new file mode 100644
index 000000000000..a2b65d961c04
--- /dev/null
+++ b/arch/riscv/crypto/sm3-riscv64-zvksh-zvkb.S
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
+// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector SM3 Secure Hash extension ('Zvksh')
+// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
+
+#include <linux/cfi_types.h>
+
+.text
+.option arch, +zvksh, +zvkb
+
+#define STATEP		a0
+#define DATA		a1
+#define NUM_BLOCKS	a2
+
+#define STATE		v0	// LMUL=2
+#define PREV_STATE	v2	// LMUL=2
+#define W0		v4	// LMUL=2
+#define W1		v6	// LMUL=2
+#define VTMP		v8	// LMUL=2
+
+.macro	sm3_8rounds	i, w0, w1
+	// Do 4 rounds using W_{0+i}..W_{7+i}.
+	vsm3c.vi	STATE, \w0, \i + 0
+	vslidedown.vi	VTMP, \w0, 2
+	vsm3c.vi	STATE, VTMP, \i + 1
+
+	// Compute W_{4+i}..W_{11+i}.
+	vslidedown.vi	VTMP, \w0, 4
+	vslideup.vi	VTMP, \w1, 4
+
+	// Do 4 rounds using W_{4+i}..W_{11+i}.
+	vsm3c.vi	STATE, VTMP, \i + 2
+	vslidedown.vi	VTMP, VTMP, 2
+	vsm3c.vi	STATE, VTMP, \i + 3
+
+.if \i < 28
+	// Compute W_{16+i}..W_{23+i}.
+	vsm3me.vv	\w0, \w1, \w0
+.endif
+	// For the next 8 rounds, w0 and w1 are swapped.
+.endm
+
+// void sm3_transform_zvksh_zvkb(u32 state[8], const u8 *data, int num_blocks);
+SYM_TYPED_FUNC_START(sm3_transform_zvksh_zvkb)
+
+	// Load the state and endian-swap each 32-bit word.
+	vsetivli	zero, 8, e32, m2, ta, ma
+	vle32.v		STATE, (STATEP)
+	vrev8.v		STATE, STATE
+
+.Lnext_block:
+	addi		NUM_BLOCKS, NUM_BLOCKS, -1
+
+	// Save the previous state, as it's needed later.
+	vmv.v.v		PREV_STATE, STATE
+
+	// Load the next 512-bit message block into W0-W1.
+	vle32.v		W0, (DATA)
+	addi		DATA, DATA, 32
+	vle32.v		W1, (DATA)
+	addi		DATA, DATA, 32
+
+	// Do the 64 rounds of SM3.
+	sm3_8rounds	0, W0, W1
+	sm3_8rounds	4, W1, W0
+	sm3_8rounds	8, W0, W1
+	sm3_8rounds	12, W1, W0
+	sm3_8rounds	16, W0, W1
+	sm3_8rounds	20, W1, W0
+	sm3_8rounds	24, W0, W1
+	sm3_8rounds	28, W1, W0
+
+	// XOR in the previous state.
+	vxor.vv		STATE, STATE, PREV_STATE
+
+	// Repeat if more blocks remain.
+	bnez		NUM_BLOCKS, .Lnext_block
+
+	// Store the new state and return.
+	vrev8.v		STATE, STATE
+	vse32.v		STATE, (STATEP)
+	ret
+SYM_FUNC_END(sm3_transform_zvksh_zvkb)
diff --git a/arch/riscv/crypto/sm4-riscv64-glue.c b/arch/riscv/crypto/sm4-riscv64-glue.c
new file mode 100644
index 000000000000..47fb84ebe577
--- /dev/null
+++ b/arch/riscv/crypto/sm4-riscv64-glue.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * SM4 using the RISC-V vector crypto extensions
+ *
+ * Copyright (C) 2023 VRULL GmbH
+ * Author: Heiko Stuebner <heiko.stuebner@vrull.eu>
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/internal/cipher.h>
+#include <crypto/internal/simd.h>
+#include <crypto/sm4.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+asmlinkage void sm4_expandkey_zvksed_zvkb(const u8 user_key[SM4_KEY_SIZE],
+					  u32 rkey_enc[SM4_RKEY_WORDS],
+					  u32 rkey_dec[SM4_RKEY_WORDS]);
+asmlinkage void sm4_crypt_zvksed_zvkb(const u32 rkey[SM4_RKEY_WORDS],
+				      const u8 in[SM4_BLOCK_SIZE],
+				      u8 out[SM4_BLOCK_SIZE]);
+
+static int riscv64_sm4_setkey(struct crypto_tfm *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (crypto_simd_usable()) {
+		if (keylen != SM4_KEY_SIZE)
+			return -EINVAL;
+		kernel_vector_begin();
+		sm4_expandkey_zvksed_zvkb(key, ctx->rkey_enc, ctx->rkey_dec);
+		kernel_vector_end();
+		return 0;
+	}
+	return sm4_expandkey(ctx, key, keylen);
+}
+
+static void riscv64_sm4_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (crypto_simd_usable()) {
+		kernel_vector_begin();
+		sm4_crypt_zvksed_zvkb(ctx->rkey_enc, src, dst);
+		kernel_vector_end();
+	} else {
+		sm4_crypt_block(ctx->rkey_enc, dst, src);
+	}
+}
+
+static void riscv64_sm4_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (crypto_simd_usable()) {
+		kernel_vector_begin();
+		sm4_crypt_zvksed_zvkb(ctx->rkey_dec, src, dst);
+		kernel_vector_end();
+	} else {
+		sm4_crypt_block(ctx->rkey_dec, dst, src);
+	}
+}
+
+static struct crypto_alg riscv64_sm4_alg = {
+	.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize = SM4_BLOCK_SIZE,
+	.cra_ctxsize = sizeof(struct sm4_ctx),
+	.cra_priority = 300,
+	.cra_name = "sm4",
+	.cra_driver_name = "sm4-riscv64-zvksed-zvkb",
+	.cra_cipher = {
+		.cia_min_keysize = SM4_KEY_SIZE,
+		.cia_max_keysize = SM4_KEY_SIZE,
+		.cia_setkey = riscv64_sm4_setkey,
+		.cia_encrypt = riscv64_sm4_encrypt,
+		.cia_decrypt = riscv64_sm4_decrypt,
+	},
+	.cra_module = THIS_MODULE,
+};
+
+static int __init riscv64_sm4_mod_init(void)
+{
+	if (riscv_isa_extension_available(NULL, ZVKSED) &&
+	    riscv_isa_extension_available(NULL, ZVKB) &&
+	    riscv_vector_vlen() >= 128)
+		return crypto_register_alg(&riscv64_sm4_alg);
+
+	return -ENODEV;
+}
+
+static void __exit riscv64_sm4_mod_exit(void)
+{
+	crypto_unregister_alg(&riscv64_sm4_alg);
+}
+
+module_init(riscv64_sm4_mod_init);
+module_exit(riscv64_sm4_mod_exit);
+
+MODULE_DESCRIPTION("SM4 (RISC-V accelerated)");
+MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@vrull.eu>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("sm4");
diff --git a/arch/riscv/crypto/sm4-riscv64-zvksed-zvkb.S b/arch/riscv/crypto/sm4-riscv64-zvksed-zvkb.S
new file mode 100644
index 000000000000..fae62179a4a3
--- /dev/null
+++ b/arch/riscv/crypto/sm4-riscv64-zvksed-zvkb.S
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
+// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector SM4 Block Cipher extension ('Zvksed')
+// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
+
+#include <linux/linkage.h>
+
+.text
+.option arch, +zvksed, +zvkb
+
+// void sm4_expandkey_zksed_zvkb(const u8 user_key[16], u32 rkey_enc[32],
+//				 u32 rkey_dec[32]);
+SYM_FUNC_START(sm4_expandkey_zvksed_zvkb)
+	vsetivli	zero, 4, e32, m1, ta, ma
+
+	// Load the user key.
+	vle32.v		v1, (a0)
+	vrev8.v		v1, v1
+
+	// XOR the user key with the family key.
+	la		t0, FAMILY_KEY
+	vle32.v		v2, (t0)
+	vxor.vv		v1, v1, v2
+
+	// Compute the round keys.  Store them in forwards order in rkey_enc
+	// and in reverse order in rkey_dec.
+	addi		a2, a2, 31*4
+	li		t0, -4
+	.set		i, 0
+.rept 8
+	vsm4k.vi	v1, v1, i
+	vse32.v		v1, (a1)	// Store to rkey_enc.
+	vsse32.v	v1, (a2), t0	// Store to rkey_dec.
+.if i < 7
+	addi		a1, a1, 16
+	addi		a2, a2, -16
+.endif
+	.set		i, i + 1
+.endr
+
+	ret
+SYM_FUNC_END(sm4_expandkey_zvksed_zvkb)
+
+// void sm4_crypt_zvksed_zvkb(const u32 rkey[32], const u8 in[16], u8 out[16]);
+SYM_FUNC_START(sm4_crypt_zvksed_zvkb)
+	vsetivli	zero, 4, e32, m1, ta, ma
+
+	// Load the input data.
+	vle32.v		v1, (a1)
+	vrev8.v		v1, v1
+
+	// Do the 32 rounds of SM4, 4 at a time.
+	.set		i, 0
+.rept 8
+	vle32.v		v2, (a0)
+	vsm4r.vs	v1, v2
+.if i < 7
+	addi		a0, a0, 16
+.endif
+	.set		i, i + 1
+.endr
+
+	// Store the output data (in reverse element order).
+	vrev8.v		v1, v1
+	li		t0, -4
+	addi		a2, a2, 12
+	vsse32.v	v1, (a2), t0
+
+	ret
+SYM_FUNC_END(sm4_crypt_zvksed_zvkb)
+
+.section ".rodata"
+.p2align 2
+.type FAMILY_KEY, @object
+FAMILY_KEY:
+	.word 0xA3B1BAC6, 0x56AA3350, 0x677D9197, 0xB27022DC
+.size FAMILY_KEY, . - FAMILY_KEY
diff --git a/arch/riscv/errata/andes/errata.c b/arch/riscv/errata/andes/errata.c
index 17a904869724..f2708a9494a1 100644
--- a/arch/riscv/errata/andes/errata.c
+++ b/arch/riscv/errata/andes/errata.c
@@ -18,9 +18,9 @@
 #include <asm/sbi.h>
 #include <asm/vendorid_list.h>
 
-#define ANDESTECH_AX45MP_MARCHID	0x8000000000008a45UL
-#define ANDESTECH_AX45MP_MIMPID		0x500UL
-#define ANDESTECH_SBI_EXT_ANDES		0x0900031E
+#define ANDES_AX45MP_MARCHID		0x8000000000008a45UL
+#define ANDES_AX45MP_MIMPID		0x500UL
+#define ANDES_SBI_EXT_ANDES		0x0900031E
 
 #define ANDES_SBI_EXT_IOCP_SW_WORKAROUND	1
 
@@ -32,7 +32,7 @@ static long ax45mp_iocp_sw_workaround(void)
 	 * ANDES_SBI_EXT_IOCP_SW_WORKAROUND SBI EXT checks if the IOCP is missing and
 	 * cache is controllable only then CMO will be applied to the platform.
 	 */
-	ret = sbi_ecall(ANDESTECH_SBI_EXT_ANDES, ANDES_SBI_EXT_IOCP_SW_WORKAROUND,
+	ret = sbi_ecall(ANDES_SBI_EXT_ANDES, ANDES_SBI_EXT_IOCP_SW_WORKAROUND,
 			0, 0, 0, 0, 0, 0);
 
 	return ret.error ? 0 : ret.value;
@@ -50,7 +50,7 @@ static void errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigne
 
 	done = true;
 
-	if (arch_id != ANDESTECH_AX45MP_MARCHID || impid != ANDESTECH_AX45MP_MIMPID)
+	if (arch_id != ANDES_AX45MP_MARCHID || impid != ANDES_AX45MP_MIMPID)
 		return;
 
 	if (!ax45mp_iocp_sw_workaround())
diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h
index b0487b39e674..776354895b81 100644
--- a/arch/riscv/include/asm/asm.h
+++ b/arch/riscv/include/asm/asm.h
@@ -183,6 +183,16 @@
 	REG_L x31, PT_T6(sp)
 	.endm
 
+/* Annotate a function as being unsuitable for kprobes. */
+#ifdef CONFIG_KPROBES
+#define ASM_NOKPROBE(name)				\
+	.pushsection "_kprobe_blacklist", "aw";		\
+	RISCV_PTR name;					\
+	.popsection
+#else
+#define ASM_NOKPROBE(name)
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_RISCV_ASM_H */
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index f5dfef6c2153..0e0522e588ca 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -17,7 +17,6 @@
 #endif
 
 #include <asm/cmpxchg.h>
-#include <asm/barrier.h>
 
 #define __atomic_acquire_fence()					\
 	__asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory")
@@ -207,7 +206,7 @@ static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int
 		"	add      %[rc], %[p], %[a]\n"
 		"	sc.w.rl  %[rc], %[rc], %[c]\n"
 		"	bnez     %[rc], 0b\n"
-		"	fence    rw, rw\n"
+		RISCV_FULL_BARRIER
 		"1:\n"
 		: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
 		: [a]"r" (a), [u]"r" (u)
@@ -228,7 +227,7 @@ static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a,
 		"	add      %[rc], %[p], %[a]\n"
 		"	sc.d.rl  %[rc], %[rc], %[c]\n"
 		"	bnez     %[rc], 0b\n"
-		"	fence    rw, rw\n"
+		RISCV_FULL_BARRIER
 		"1:\n"
 		: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
 		: [a]"r" (a), [u]"r" (u)
@@ -248,7 +247,7 @@ static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v)
 		"	addi      %[rc], %[p], 1\n"
 		"	sc.w.rl   %[rc], %[rc], %[c]\n"
 		"	bnez      %[rc], 0b\n"
-		"	fence     rw, rw\n"
+		RISCV_FULL_BARRIER
 		"1:\n"
 		: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
 		:
@@ -268,7 +267,7 @@ static __always_inline bool arch_atomic_dec_unless_positive(atomic_t *v)
 		"	addi      %[rc], %[p], -1\n"
 		"	sc.w.rl   %[rc], %[rc], %[c]\n"
 		"	bnez      %[rc], 0b\n"
-		"	fence     rw, rw\n"
+		RISCV_FULL_BARRIER
 		"1:\n"
 		: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
 		:
@@ -288,7 +287,7 @@ static __always_inline int arch_atomic_dec_if_positive(atomic_t *v)
 		"	bltz     %[rc], 1f\n"
 		"	sc.w.rl  %[rc], %[rc], %[c]\n"
 		"	bnez     %[rc], 0b\n"
-		"	fence    rw, rw\n"
+		RISCV_FULL_BARRIER
 		"1:\n"
 		: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
 		:
@@ -310,7 +309,7 @@ static __always_inline bool arch_atomic64_inc_unless_negative(atomic64_t *v)
 		"	addi      %[rc], %[p], 1\n"
 		"	sc.d.rl   %[rc], %[rc], %[c]\n"
 		"	bnez      %[rc], 0b\n"
-		"	fence     rw, rw\n"
+		RISCV_FULL_BARRIER
 		"1:\n"
 		: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
 		:
@@ -331,7 +330,7 @@ static __always_inline bool arch_atomic64_dec_unless_positive(atomic64_t *v)
 		"	addi      %[rc], %[p], -1\n"
 		"	sc.d.rl   %[rc], %[rc], %[c]\n"
 		"	bnez      %[rc], 0b\n"
-		"	fence     rw, rw\n"
+		RISCV_FULL_BARRIER
 		"1:\n"
 		: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
 		:
@@ -352,7 +351,7 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
 		"	bltz     %[rc], 1f\n"
 		"	sc.d.rl  %[rc], %[rc], %[c]\n"
 		"	bnez     %[rc], 0b\n"
-		"	fence    rw, rw\n"
+		RISCV_FULL_BARRIER
 		"1:\n"
 		: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
 		:
diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h
index 110752594228..880b56d8480d 100644
--- a/arch/riscv/include/asm/barrier.h
+++ b/arch/riscv/include/asm/barrier.h
@@ -11,28 +11,27 @@
 #define _ASM_RISCV_BARRIER_H
 
 #ifndef __ASSEMBLY__
+#include <asm/fence.h>
 
 #define nop()		__asm__ __volatile__ ("nop")
 #define __nops(n)	".rept	" #n "\nnop\n.endr\n"
 #define nops(n)		__asm__ __volatile__ (__nops(n))
 
-#define RISCV_FENCE(p, s) \
-	__asm__ __volatile__ ("fence " #p "," #s : : : "memory")
 
 /* These barriers need to enforce ordering on both devices or memory. */
-#define mb()		RISCV_FENCE(iorw,iorw)
-#define rmb()		RISCV_FENCE(ir,ir)
-#define wmb()		RISCV_FENCE(ow,ow)
+#define __mb()		RISCV_FENCE(iorw, iorw)
+#define __rmb()		RISCV_FENCE(ir, ir)
+#define __wmb()		RISCV_FENCE(ow, ow)
 
 /* These barriers do not need to enforce ordering on devices, just memory. */
-#define __smp_mb()	RISCV_FENCE(rw,rw)
-#define __smp_rmb()	RISCV_FENCE(r,r)
-#define __smp_wmb()	RISCV_FENCE(w,w)
+#define __smp_mb()	RISCV_FENCE(rw, rw)
+#define __smp_rmb()	RISCV_FENCE(r, r)
+#define __smp_wmb()	RISCV_FENCE(w, w)
 
 #define __smp_store_release(p, v)					\
 do {									\
 	compiletime_assert_atomic_type(*p);				\
-	RISCV_FENCE(rw,w);						\
+	RISCV_FENCE(rw, w);						\
 	WRITE_ONCE(*p, v);						\
 } while (0)
 
@@ -40,7 +39,7 @@ do {									\
 ({									\
 	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
-	RISCV_FENCE(r,rw);						\
+	RISCV_FENCE(r, rw);						\
 	___p1;								\
 })
 
@@ -69,7 +68,7 @@ do {									\
  * instances the scheduler pairs this with an mb(), so nothing is necessary on
  * the new hart.
  */
-#define smp_mb__after_spinlock()	RISCV_FENCE(iorw,iorw)
+#define smp_mb__after_spinlock()	RISCV_FENCE(iorw, iorw)
 
 #include <asm-generic/barrier.h>
 
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index 329d8244a9b3..880606b0469a 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -22,6 +22,16 @@
 #include <asm-generic/bitops/fls.h>
 
 #else
+#define __HAVE_ARCH___FFS
+#define __HAVE_ARCH___FLS
+#define __HAVE_ARCH_FFS
+#define __HAVE_ARCH_FLS
+
+#include <asm-generic/bitops/__ffs.h>
+#include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/ffs.h>
+#include <asm-generic/bitops/fls.h>
+
 #include <asm/alternative-macros.h>
 #include <asm/hwcap.h>
 
@@ -37,8 +47,6 @@
 
 static __always_inline unsigned long variable__ffs(unsigned long word)
 {
-	int num;
-
 	asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
 				      RISCV_ISA_EXT_ZBB, 1)
 			  : : : : legacy);
@@ -52,32 +60,7 @@ static __always_inline unsigned long variable__ffs(unsigned long word)
 	return word;
 
 legacy:
-	num = 0;
-#if BITS_PER_LONG == 64
-	if ((word & 0xffffffff) == 0) {
-		num += 32;
-		word >>= 32;
-	}
-#endif
-	if ((word & 0xffff) == 0) {
-		num += 16;
-		word >>= 16;
-	}
-	if ((word & 0xff) == 0) {
-		num += 8;
-		word >>= 8;
-	}
-	if ((word & 0xf) == 0) {
-		num += 4;
-		word >>= 4;
-	}
-	if ((word & 0x3) == 0) {
-		num += 2;
-		word >>= 2;
-	}
-	if ((word & 0x1) == 0)
-		num += 1;
-	return num;
+	return generic___ffs(word);
 }
 
 /**
@@ -93,8 +76,6 @@ legacy:
 
 static __always_inline unsigned long variable__fls(unsigned long word)
 {
-	int num;
-
 	asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
 				      RISCV_ISA_EXT_ZBB, 1)
 			  : : : : legacy);
@@ -108,32 +89,7 @@ static __always_inline unsigned long variable__fls(unsigned long word)
 	return BITS_PER_LONG - 1 - word;
 
 legacy:
-	num = BITS_PER_LONG - 1;
-#if BITS_PER_LONG == 64
-	if (!(word & (~0ul << 32))) {
-		num -= 32;
-		word <<= 32;
-	}
-#endif
-	if (!(word & (~0ul << (BITS_PER_LONG - 16)))) {
-		num -= 16;
-		word <<= 16;
-	}
-	if (!(word & (~0ul << (BITS_PER_LONG - 8)))) {
-		num -= 8;
-		word <<= 8;
-	}
-	if (!(word & (~0ul << (BITS_PER_LONG - 4)))) {
-		num -= 4;
-		word <<= 4;
-	}
-	if (!(word & (~0ul << (BITS_PER_LONG - 2)))) {
-		num -= 2;
-		word <<= 2;
-	}
-	if (!(word & (~0ul << (BITS_PER_LONG - 1))))
-		num -= 1;
-	return num;
+	return generic___fls(word);
 }
 
 /**
@@ -149,46 +105,23 @@ legacy:
 
 static __always_inline int variable_ffs(int x)
 {
-	int r;
-
-	if (!x)
-		return 0;
-
 	asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
 				      RISCV_ISA_EXT_ZBB, 1)
 			  : : : : legacy);
 
+	if (!x)
+		return 0;
+
 	asm volatile (".option push\n"
 		      ".option arch,+zbb\n"
 		      CTZW "%0, %1\n"
 		      ".option pop\n"
-		      : "=r" (r) : "r" (x) :);
+		      : "=r" (x) : "r" (x) :);
 
-	return r + 1;
+	return x + 1;
 
 legacy:
-	r = 1;
-	if (!(x & 0xffff)) {
-		x >>= 16;
-		r += 16;
-	}
-	if (!(x & 0xff)) {
-		x >>= 8;
-		r += 8;
-	}
-	if (!(x & 0xf)) {
-		x >>= 4;
-		r += 4;
-	}
-	if (!(x & 3)) {
-		x >>= 2;
-		r += 2;
-	}
-	if (!(x & 1)) {
-		x >>= 1;
-		r += 1;
-	}
-	return r;
+	return generic_ffs(x);
 }
 
 /**
@@ -204,46 +137,23 @@ legacy:
 
 static __always_inline int variable_fls(unsigned int x)
 {
-	int r;
-
-	if (!x)
-		return 0;
-
 	asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
 				      RISCV_ISA_EXT_ZBB, 1)
 			  : : : : legacy);
 
+	if (!x)
+		return 0;
+
 	asm volatile (".option push\n"
 		      ".option arch,+zbb\n"
 		      CLZW "%0, %1\n"
 		      ".option pop\n"
-		      : "=r" (r) : "r" (x) :);
+		      : "=r" (x) : "r" (x) :);
 
-	return 32 - r;
+	return 32 - x;
 
 legacy:
-	r = 32;
-	if (!(x & 0xffff0000u)) {
-		x <<= 16;
-		r -= 16;
-	}
-	if (!(x & 0xff000000u)) {
-		x <<= 8;
-		r -= 8;
-	}
-	if (!(x & 0xf0000000u)) {
-		x <<= 4;
-		r -= 4;
-	}
-	if (!(x & 0xc0000000u)) {
-		x <<= 2;
-		r -= 2;
-	}
-	if (!(x & 0x80000000u)) {
-		x <<= 1;
-		r -= 1;
-	}
-	return r;
+	return generic_fls(x);
 }
 
 /**
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 2f4726d3cfcc..2fee65cc8443 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -8,7 +8,6 @@
 
 #include <linux/bug.h>
 
-#include <asm/barrier.h>
 #include <asm/fence.h>
 
 #define __xchg_relaxed(ptr, new, size)					\
@@ -313,7 +312,7 @@
 			"	bne  %0, %z3, 1f\n"			\
 			"	sc.w.rl %1, %z4, %2\n"			\
 			"	bnez %1, 0b\n"				\
-			"	fence rw, rw\n"				\
+			RISCV_FULL_BARRIER				\
 			"1:\n"						\
 			: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)	\
 			: "rJ" ((long)__old), "rJ" (__new)		\
@@ -325,7 +324,7 @@
 			"	bne %0, %z3, 1f\n"			\
 			"	sc.d.rl %1, %z4, %2\n"			\
 			"	bnez %1, 0b\n"				\
-			"	fence rw, rw\n"				\
+			RISCV_FULL_BARRIER				\
 			"1:\n"						\
 			: "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr)	\
 			: "rJ" (__old), "rJ" (__new)			\
diff --git a/arch/riscv/include/asm/compat.h b/arch/riscv/include/asm/compat.h
index 2ac955b51148..aa103530a5c8 100644
--- a/arch/riscv/include/asm/compat.h
+++ b/arch/riscv/include/asm/compat.h
@@ -14,9 +14,28 @@
 
 static inline int is_compat_task(void)
 {
+	if (!IS_ENABLED(CONFIG_COMPAT))
+		return 0;
+
 	return test_thread_flag(TIF_32BIT);
 }
 
+static inline int is_compat_thread(struct thread_info *thread)
+{
+	if (!IS_ENABLED(CONFIG_COMPAT))
+		return 0;
+
+	return test_ti_thread_flag(thread, TIF_32BIT);
+}
+
+static inline void set_compat_task(bool is_compat)
+{
+	if (is_compat)
+		set_thread_flag(TIF_32BIT);
+	else
+		clear_thread_flag(TIF_32BIT);
+}
+
 struct compat_user_regs_struct {
 	compat_ulong_t pc;
 	compat_ulong_t ra;
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index 0bd11862b760..347805446151 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright 2022-2023 Rivos, Inc
+ * Copyright 2022-2024 Rivos, Inc
  */
 
 #ifndef _ASM_CPUFEATURE_H
@@ -28,29 +28,38 @@ struct riscv_isainfo {
 
 DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
 
-DECLARE_PER_CPU(long, misaligned_access_speed);
-
 /* Per-cpu ISA extensions. */
 extern struct riscv_isainfo hart_isa[NR_CPUS];
 
 void riscv_user_isa_enable(void);
 
-#ifdef CONFIG_RISCV_MISALIGNED
-bool unaligned_ctl_available(void);
-bool check_unaligned_access_emulated(int cpu);
+#if defined(CONFIG_RISCV_MISALIGNED)
+bool check_unaligned_access_emulated_all_cpus(void);
 void unaligned_emulation_finish(void);
+bool unaligned_ctl_available(void);
+DECLARE_PER_CPU(long, misaligned_access_speed);
 #else
 static inline bool unaligned_ctl_available(void)
 {
 	return false;
 }
+#endif
+
+#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)
+DECLARE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key);
 
-static inline bool check_unaligned_access_emulated(int cpu)
+static __always_inline bool has_fast_unaligned_accesses(void)
 {
-	return false;
+	return static_branch_likely(&fast_unaligned_access_speed_key);
+}
+#else
+static __always_inline bool has_fast_unaligned_accesses(void)
+{
+	if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
+		return true;
+	else
+		return false;
 }
-
-static inline void unaligned_emulation_finish(void) {}
 #endif
 
 unsigned long riscv_get_elf_hwcap(void);
@@ -135,6 +144,4 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi
 	return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
 }
 
-DECLARE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
-
 #endif
diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
index 06c236bfab53..c7aea7886d22 100644
--- a/arch/riscv/include/asm/elf.h
+++ b/arch/riscv/include/asm/elf.h
@@ -53,13 +53,9 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr);
 #define ELF_ET_DYN_BASE		((DEFAULT_MAP_WINDOW / 3) * 2)
 
 #ifdef CONFIG_64BIT
-#ifdef CONFIG_COMPAT
-#define STACK_RND_MASK		(test_thread_flag(TIF_32BIT) ? \
+#define STACK_RND_MASK		(is_compat_task() ? \
 				 0x7ff >> (PAGE_SHIFT - 12) : \
 				 0x3ffff >> (PAGE_SHIFT - 12))
-#else
-#define STACK_RND_MASK		(0x3ffff >> (PAGE_SHIFT - 12))
-#endif
 #endif
 
 /*
@@ -139,10 +135,7 @@ do {							\
 #ifdef CONFIG_COMPAT
 
 #define SET_PERSONALITY(ex)					\
-do {    if ((ex).e_ident[EI_CLASS] == ELFCLASS32)		\
-		set_thread_flag(TIF_32BIT);			\
-	else							\
-		clear_thread_flag(TIF_32BIT);			\
+do {	set_compat_task((ex).e_ident[EI_CLASS] == ELFCLASS32);	\
 	if (personality(current->personality) != PER_LINUX32)	\
 		set_personality(PER_LINUX |			\
 			(current->personality & (~PER_MASK)));	\
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index ea33288f8a25..1f2dbfb8a8bf 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -12,8 +12,8 @@
 #include <asm/vendorid_list.h>
 
 #ifdef CONFIG_ERRATA_ANDES
-#define ERRATA_ANDESTECH_NO_IOCP	0
-#define ERRATA_ANDESTECH_NUMBER		1
+#define ERRATA_ANDES_NO_IOCP 0
+#define ERRATA_ANDES_NUMBER 1
 #endif
 
 #ifdef CONFIG_ERRATA_SIFIVE
@@ -112,15 +112,6 @@ asm volatile(ALTERNATIVE(						\
 #define THEAD_C9XX_RV_IRQ_PMU			17
 #define THEAD_C9XX_CSR_SCOUNTEROF		0x5c5
 
-#define ALT_SBI_PMU_OVERFLOW(__ovl)					\
-asm volatile(ALTERNATIVE(						\
-	"csrr %0, " __stringify(CSR_SSCOUNTOVF),			\
-	"csrr %0, " __stringify(THEAD_C9XX_CSR_SCOUNTEROF),		\
-		THEAD_VENDOR_ID, ERRATA_THEAD_PMU,			\
-		CONFIG_ERRATA_THEAD_PMU)				\
-	: "=r" (__ovl) :						\
-	: "memory")
-
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/riscv/include/asm/fence.h b/arch/riscv/include/asm/fence.h
index 2b443a3a487f..6bcd80325dfc 100644
--- a/arch/riscv/include/asm/fence.h
+++ b/arch/riscv/include/asm/fence.h
@@ -1,12 +1,18 @@
 #ifndef _ASM_RISCV_FENCE_H
 #define _ASM_RISCV_FENCE_H
 
+#define RISCV_FENCE_ASM(p, s)		"\tfence " #p "," #s "\n"
+#define RISCV_FENCE(p, s) \
+	({ __asm__ __volatile__ (RISCV_FENCE_ASM(p, s) : : : "memory"); })
+
 #ifdef CONFIG_SMP
-#define RISCV_ACQUIRE_BARRIER		"\tfence r , rw\n"
-#define RISCV_RELEASE_BARRIER		"\tfence rw,  w\n"
+#define RISCV_ACQUIRE_BARRIER		RISCV_FENCE_ASM(r, rw)
+#define RISCV_RELEASE_BARRIER		RISCV_FENCE_ASM(rw, w)
+#define RISCV_FULL_BARRIER		RISCV_FENCE_ASM(rw, rw)
 #else
 #define RISCV_ACQUIRE_BARRIER
 #define RISCV_RELEASE_BARRIER
+#define RISCV_FULL_BARRIER
 #endif
 
 #endif	/* _ASM_RISCV_FENCE_H */
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 1f2d2599c655..e17d0078a651 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -80,6 +80,7 @@
 #define RISCV_ISA_EXT_ZFA		71
 #define RISCV_ISA_EXT_ZTSO		72
 #define RISCV_ISA_EXT_ZACAS		73
+#define RISCV_ISA_EXT_XANDESPMU		74
 
 #define RISCV_ISA_EXT_XLINUXENVCFG	127
 
diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
index 42497d487a17..1c5c641075d2 100644
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -47,10 +47,10 @@
  * sufficient to ensure this works sanely on controllers that support I/O
  * writes.
  */
-#define __io_pbr()	__asm__ __volatile__ ("fence io,i"  : : : "memory");
-#define __io_par(v)	__asm__ __volatile__ ("fence i,ior" : : : "memory");
-#define __io_pbw()	__asm__ __volatile__ ("fence iow,o" : : : "memory");
-#define __io_paw()	__asm__ __volatile__ ("fence o,io"  : : : "memory");
+#define __io_pbr()	RISCV_FENCE(io, i)
+#define __io_par(v)	RISCV_FENCE(i, ior)
+#define __io_pbw()	RISCV_FENCE(iow, o)
+#define __io_paw()	RISCV_FENCE(o, io)
 
 /*
  * Accesses from a single hart to a single I/O address must be ordered.  This
diff --git a/arch/riscv/include/asm/membarrier.h b/arch/riscv/include/asm/membarrier.h
new file mode 100644
index 000000000000..47b240d0d596
--- /dev/null
+++ b/arch/riscv/include/asm/membarrier.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_RISCV_MEMBARRIER_H
+#define _ASM_RISCV_MEMBARRIER_H
+
+static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
+					     struct mm_struct *next,
+					     struct task_struct *tsk)
+{
+	/*
+	 * Only need the full barrier when switching between processes.
+	 * Barrier when switching from kernel to userspace is not
+	 * required here, given that it is implied by mmdrop(). Barrier
+	 * when switching from userspace to kernel is not needed after
+	 * store to rq->curr.
+	 */
+	if (IS_ENABLED(CONFIG_SMP) &&
+	    likely(!(atomic_read(&next->membarrier_state) &
+		     (MEMBARRIER_STATE_PRIVATE_EXPEDITED |
+		      MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev))
+		return;
+
+	/*
+	 * The membarrier system call requires a full memory barrier
+	 * after storing to rq->curr, before going back to user-space.
+	 *
+	 * This barrier is also needed for the SYNC_CORE command when
+	 * switching between processes; in particular, on a transition
+	 * from a thread belonging to another mm to a thread belonging
+	 * to the mm for which a membarrier SYNC_CORE is done on CPU0:
+	 *
+	 *   - [CPU0] sets all bits in the mm icache_stale_mask (in
+	 *     prepare_sync_core_cmd());
+	 *
+	 *   - [CPU1] stores to rq->curr (by the scheduler);
+	 *
+	 *   - [CPU0] loads rq->curr within membarrier and observes
+	 *     cpu_rq(1)->curr->mm != mm, so the IPI is skipped on
+	 *     CPU1; this means membarrier relies on switch_mm() to
+	 *     issue the sync-core;
+	 *
+	 *   - [CPU1] switch_mm() loads icache_stale_mask; if the bit
+	 *     is zero, switch_mm() may incorrectly skip the sync-core.
+	 *
+	 * Matches a full barrier in the proximity of the membarrier
+	 * system call entry.
+	 */
+	smp_mb();
+}
+
+#endif /* _ASM_RISCV_MEMBARRIER_H */
diff --git a/arch/riscv/include/asm/mmio.h b/arch/riscv/include/asm/mmio.h
index 4c58ee7f95ec..06cadfd7a237 100644
--- a/arch/riscv/include/asm/mmio.h
+++ b/arch/riscv/include/asm/mmio.h
@@ -12,6 +12,7 @@
 #define _ASM_RISCV_MMIO_H
 
 #include <linux/types.h>
+#include <asm/fence.h>
 #include <asm/mmiowb.h>
 
 /* Generic IO read/write.  These perform native-endian accesses. */
@@ -131,8 +132,8 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
  * doesn't define any ordering between the memory space and the I/O space.
  */
 #define __io_br()	do {} while (0)
-#define __io_ar(v)	({ __asm__ __volatile__ ("fence i,ir" : : : "memory"); })
-#define __io_bw()	({ __asm__ __volatile__ ("fence w,o" : : : "memory"); })
+#define __io_ar(v)	RISCV_FENCE(i, ir)
+#define __io_bw()	RISCV_FENCE(w, o)
 #define __io_aw()	mmiowb_set_pending()
 
 #define readb(c)	({ u8  __v; __io_br(); __v = readb_cpu(c); __io_ar(__v); __v; })
diff --git a/arch/riscv/include/asm/mmiowb.h b/arch/riscv/include/asm/mmiowb.h
index 0b2333e71fdc..52ce4a399d9b 100644
--- a/arch/riscv/include/asm/mmiowb.h
+++ b/arch/riscv/include/asm/mmiowb.h
@@ -7,7 +7,7 @@
  * "o,w" is sufficient to ensure that all writes to the device have completed
  * before the write to the spinlock is allowed to commit.
  */
-#define mmiowb()	__asm__ __volatile__ ("fence o,w" : : : "memory");
+#define mmiowb()	RISCV_FENCE(o, w)
 
 #include <linux/smp.h>
 #include <asm-generic/mmiowb.h>
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index c80bb9990d32..deaf971253a2 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -95,13 +95,19 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 		__pud_free(mm, pud);
 }
 
-#define __pud_free_tlb(tlb, pud, addr)					\
-do {									\
-	if (pgtable_l4_enabled) {					\
-		pagetable_pud_dtor(virt_to_ptdesc(pud));		\
-		tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud));	\
-	}								\
-} while (0)
+static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
+				  unsigned long addr)
+{
+	if (pgtable_l4_enabled) {
+		struct ptdesc *ptdesc = virt_to_ptdesc(pud);
+
+		pagetable_pud_dtor(ptdesc);
+		if (riscv_use_ipi_for_rfence())
+			tlb_remove_page_ptdesc(tlb, ptdesc);
+		else
+			tlb_remove_ptdesc(tlb, ptdesc);
+	}
+}
 
 #define p4d_alloc_one p4d_alloc_one
 static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -130,11 +136,16 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
 		__p4d_free(mm, p4d);
 }
 
-#define __p4d_free_tlb(tlb, p4d, addr)					\
-do {									\
-	if (pgtable_l5_enabled)						\
-		tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(p4d));	\
-} while (0)
+static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
+				  unsigned long addr)
+{
+	if (pgtable_l5_enabled) {
+		if (riscv_use_ipi_for_rfence())
+			tlb_remove_page_ptdesc(tlb, virt_to_ptdesc(p4d));
+		else
+			tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d));
+	}
+}
 #endif /* __PAGETABLE_PMD_FOLDED */
 
 static inline void sync_kernel_mappings(pgd_t *pgd)
@@ -159,19 +170,31 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 
 #ifndef __PAGETABLE_PMD_FOLDED
 
-#define __pmd_free_tlb(tlb, pmd, addr)				\
-do {								\
-	pagetable_pmd_dtor(virt_to_ptdesc(pmd));		\
-	tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd));	\
-} while (0)
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+				  unsigned long addr)
+{
+	struct ptdesc *ptdesc = virt_to_ptdesc(pmd);
+
+	pagetable_pmd_dtor(ptdesc);
+	if (riscv_use_ipi_for_rfence())
+		tlb_remove_page_ptdesc(tlb, ptdesc);
+	else
+		tlb_remove_ptdesc(tlb, ptdesc);
+}
 
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-#define __pte_free_tlb(tlb, pte, buf)			\
-do {							\
-	pagetable_pte_dtor(page_ptdesc(pte));		\
-	tlb_remove_page_ptdesc((tlb), page_ptdesc(pte));\
-} while (0)
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
+				  unsigned long addr)
+{
+	struct ptdesc *ptdesc = page_ptdesc(pte);
+
+	pagetable_pte_dtor(ptdesc);
+	if (riscv_use_ipi_for_rfence())
+		tlb_remove_page_ptdesc(tlb, ptdesc);
+	else
+		tlb_remove_ptdesc(tlb, ptdesc);
+}
 #endif /* CONFIG_MMU */
 
 #endif /* _ASM_RISCV_PGALLOC_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 20242402fc11..97fcde30e247 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -127,17 +127,11 @@
 #define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1))
 #define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1))
 
-#ifdef CONFIG_COMPAT
 #define MMAP_VA_BITS_64 ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS)
 #define MMAP_MIN_VA_BITS_64 (VA_BITS_SV39)
 #define MMAP_VA_BITS (is_compat_task() ? VA_BITS_SV32 : MMAP_VA_BITS_64)
 #define MMAP_MIN_VA_BITS (is_compat_task() ? VA_BITS_SV32 : MMAP_MIN_VA_BITS_64)
 #else
-#define MMAP_VA_BITS ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS)
-#define MMAP_MIN_VA_BITS (VA_BITS_SV39)
-#endif /* CONFIG_COMPAT */
-
-#else
 #include <asm/pgtable-32.h>
 #endif /* CONFIG_64BIT */
 
@@ -439,9 +433,11 @@ static inline pte_t pte_mkhuge(pte_t pte)
 	return pte;
 }
 
+#ifdef CONFIG_RISCV_ISA_SVNAPOT
 #define pte_leaf_size(pte)	(pte_napot(pte) ?				\
 					napot_cont_size(napot_cont_order(pte)) :\
 					PAGE_SIZE)
+#endif
 
 #ifdef CONFIG_NUMA_BALANCING
 /*
@@ -517,12 +513,12 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
 	WRITE_ONCE(*ptep, pteval);
 }
 
-void flush_icache_pte(pte_t pte);
+void flush_icache_pte(struct mm_struct *mm, pte_t pte);
 
-static inline void __set_pte_at(pte_t *ptep, pte_t pteval)
+static inline void __set_pte_at(struct mm_struct *mm, pte_t *ptep, pte_t pteval)
 {
 	if (pte_present(pteval) && pte_exec(pteval))
-		flush_icache_pte(pteval);
+		flush_icache_pte(mm, pteval);
 
 	set_pte(ptep, pteval);
 }
@@ -535,7 +531,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
 	page_table_check_ptes_set(mm, ptep, pteval, nr);
 
 	for (;;) {
-		__set_pte_at(ptep, pteval);
+		__set_pte_at(mm, ptep, pteval);
 		if (--nr == 0)
 			break;
 		ptep++;
@@ -547,7 +543,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
 static inline void pte_clear(struct mm_struct *mm,
 	unsigned long addr, pte_t *ptep)
 {
-	__set_pte_at(ptep, __pte(0));
+	__set_pte_at(mm, ptep, __pte(0));
 }
 
 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS	/* defined in mm/pgtable.c */
@@ -662,6 +658,12 @@ static inline int pmd_write(pmd_t pmd)
 	return pte_write(pmd_pte(pmd));
 }
 
+#define pud_write pud_write
+static inline int pud_write(pud_t pud)
+{
+	return pte_write(pud_pte(pud));
+}
+
 #define pmd_dirty pmd_dirty
 static inline int pmd_dirty(pmd_t pmd)
 {
@@ -713,14 +715,14 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 				pmd_t *pmdp, pmd_t pmd)
 {
 	page_table_check_pmd_set(mm, pmdp, pmd);
-	return __set_pte_at((pte_t *)pmdp, pmd_pte(pmd));
+	return __set_pte_at(mm, (pte_t *)pmdp, pmd_pte(pmd));
 }
 
 static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
 				pud_t *pudp, pud_t pud)
 {
 	page_table_check_pud_set(mm, pudp, pud);
-	return __set_pte_at((pte_t *)pudp, pud_pte(pud));
+	return __set_pte_at(mm, (pte_t *)pudp, pud_pte(pud));
 }
 
 #ifdef CONFIG_PAGE_TABLE_CHECK
@@ -871,8 +873,8 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
 #define TASK_SIZE_MIN	(PGDIR_SIZE_L3 * PTRS_PER_PGD / 2)
 
 #ifdef CONFIG_COMPAT
-#define TASK_SIZE_32	(_AC(0x80000000, UL))
-#define TASK_SIZE	(test_thread_flag(TIF_32BIT) ? \
+#define TASK_SIZE_32	(_AC(0x80000000, UL) - PAGE_SIZE)
+#define TASK_SIZE	(is_compat_task() ? \
 			 TASK_SIZE_32 : TASK_SIZE_64)
 #else
 #define TASK_SIZE	TASK_SIZE_64
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index a8509cc31ab2..0faf5f161f1e 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -14,22 +14,21 @@
 
 #include <asm/ptrace.h>
 
-#ifdef CONFIG_64BIT
-#define DEFAULT_MAP_WINDOW	(UL(1) << (MMAP_VA_BITS - 1))
-#define STACK_TOP_MAX		TASK_SIZE
-
+/*
+ * addr is a hint to the maximum userspace address that mmap should provide, so
+ * this macro needs to return the largest address space available so that
+ * mmap_end < addr, being mmap_end the top of that address space.
+ * See Documentation/arch/riscv/vm-layout.rst for more details.
+ */
 #define arch_get_mmap_end(addr, len, flags)			\
 ({								\
 	unsigned long mmap_end;					\
 	typeof(addr) _addr = (addr);				\
-	if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && is_compat_task())) \
-		mmap_end = STACK_TOP_MAX;			\
-	else if ((_addr) >= VA_USER_SV57)			\
+	if ((_addr) == 0 || is_compat_task() ||			\
+	    ((_addr + len) > BIT(VA_BITS - 1)))			\
 		mmap_end = STACK_TOP_MAX;			\
-	else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \
-		mmap_end = VA_USER_SV48;			\
 	else							\
-		mmap_end = VA_USER_SV39;			\
+		mmap_end = (_addr + len);			\
 	mmap_end;						\
 })
 
@@ -39,17 +38,17 @@
 	typeof(addr) _addr = (addr);				\
 	typeof(base) _base = (base);				\
 	unsigned long rnd_gap = DEFAULT_MAP_WINDOW - (_base);	\
-	if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && is_compat_task())) \
+	if ((_addr) == 0 || is_compat_task() || 		\
+	    ((_addr + len) > BIT(VA_BITS - 1)))			\
 		mmap_base = (_base);				\
-	else if (((_addr) >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) \
-		mmap_base = VA_USER_SV57 - rnd_gap;		\
-	else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \
-		mmap_base = VA_USER_SV48 - rnd_gap;		\
 	else							\
-		mmap_base = VA_USER_SV39 - rnd_gap;		\
+		mmap_base = (_addr + len) - rnd_gap;		\
 	mmap_base;						\
 })
 
+#ifdef CONFIG_64BIT
+#define DEFAULT_MAP_WINDOW	(UL(1) << (MMAP_VA_BITS - 1))
+#define STACK_TOP_MAX		TASK_SIZE_64
 #else
 #define DEFAULT_MAP_WINDOW	TASK_SIZE
 #define STACK_TOP_MAX		TASK_SIZE
diff --git a/arch/riscv/include/asm/simd.h b/arch/riscv/include/asm/simd.h
index 54efbf523d49..adb50f3ec205 100644
--- a/arch/riscv/include/asm/simd.h
+++ b/arch/riscv/include/asm/simd.h
@@ -34,9 +34,9 @@ static __must_check inline bool may_use_simd(void)
 		return false;
 
 	/*
-	 * Nesting is acheived in preempt_v by spreading the control for
+	 * Nesting is achieved in preempt_v by spreading the control for
 	 * preemptible and non-preemptible kernel-mode Vector into two fields.
-	 * Always try to match with prempt_v if kernel V-context exists. Then,
+	 * Always try to match with preempt_v if kernel V-context exists. Then,
 	 * fallback to check non preempt_v if nesting happens, or if the config
 	 * is not set.
 	 */
diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h
index 491296a335d0..4718096fa5e3 100644
--- a/arch/riscv/include/asm/suspend.h
+++ b/arch/riscv/include/asm/suspend.h
@@ -56,4 +56,7 @@ int hibernate_resume_nonboot_cpu_disable(void);
 asmlinkage void hibernate_restore_image(unsigned long resume_satp, unsigned long satp_temp,
 					unsigned long cpu_resume);
 asmlinkage int hibernate_core_restore_code(void);
+bool riscv_sbi_hsm_is_supported(void);
+bool riscv_sbi_suspend_state_is_valid(u32 state);
+int riscv_sbi_hart_suspend(u32 state);
 #endif
diff --git a/arch/riscv/include/asm/sync_core.h b/arch/riscv/include/asm/sync_core.h
new file mode 100644
index 000000000000..9153016da8f1
--- /dev/null
+++ b/arch/riscv/include/asm/sync_core.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_SYNC_CORE_H
+#define _ASM_RISCV_SYNC_CORE_H
+
+/*
+ * RISC-V implements return to user-space through an xRET instruction,
+ * which is not core serializing.
+ */
+static inline void sync_core_before_usermode(void)
+{
+	asm volatile ("fence.i" ::: "memory");
+}
+
+#ifdef CONFIG_SMP
+/*
+ * Ensure the next switch_mm() on every CPU issues a core serializing
+ * instruction for the given @mm.
+ */
+static inline void prepare_sync_core_cmd(struct mm_struct *mm)
+{
+	cpumask_setall(&mm->context.icache_stale_mask);
+}
+#else
+static inline void prepare_sync_core_cmd(struct mm_struct *mm)
+{
+}
+#endif /* CONFIG_SMP */
+
+#endif /* _ASM_RISCV_SYNC_CORE_H */
diff --git a/arch/riscv/include/asm/syscall_wrapper.h b/arch/riscv/include/asm/syscall_wrapper.h
index eeec04b7dae6..980094c2e976 100644
--- a/arch/riscv/include/asm/syscall_wrapper.h
+++ b/arch/riscv/include/asm/syscall_wrapper.h
@@ -12,25 +12,51 @@
 
 asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *);
 
-#define SC_RISCV_REGS_TO_ARGS(x, ...)				\
-	__MAP(x,__SC_ARGS					\
-	      ,,regs->orig_a0,,regs->a1,,regs->a2		\
+#ifdef CONFIG_64BIT
+
+#define __SYSCALL_SE_DEFINEx(x, prefix, name, ...)					\
+	static long __se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__));		\
+	static long __se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__))
+
+#define SC_RISCV_REGS_TO_ARGS(x, ...)							\
+	__MAP(x,__SC_ARGS								\
+	      ,,regs->orig_a0,,regs->a1,,regs->a2					\
 	      ,,regs->a3,,regs->a4,,regs->a5,,regs->a6)
 
+#else
+/*
+ * Use type aliasing to ensure registers a0-a6 are correctly passed to the syscall
+ * implementation when >word-size arguments are used.
+ */
+#define __SYSCALL_SE_DEFINEx(x, prefix, name, ...)					\
+	__diag_push();									\
+	__diag_ignore(GCC, 8, "-Wattribute-alias",					\
+			"Type aliasing is used to sanitize syscall arguments");		\
+	static long __se_##prefix##name(ulong, ulong, ulong, ulong, ulong, ulong, 	\
+					ulong)						\
+			__attribute__((alias(__stringify(___se_##prefix##name))));	\
+	__diag_pop();									\
+	static long noinline ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
+	static long ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__))
+
+#define SC_RISCV_REGS_TO_ARGS(x, ...) \
+	regs->orig_a0,regs->a1,regs->a2,regs->a3,regs->a4,regs->a5,regs->a6
+
+#endif /* CONFIG_64BIT */
+
 #ifdef CONFIG_COMPAT
 
 #define COMPAT_SYSCALL_DEFINEx(x, name, ...)						\
 	asmlinkage long __riscv_compat_sys##name(const struct pt_regs *regs);		\
 	ALLOW_ERROR_INJECTION(__riscv_compat_sys##name, ERRNO);				\
-	static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));		\
 	static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
-	asmlinkage long __riscv_compat_sys##name(const struct pt_regs *regs)		\
+	__SYSCALL_SE_DEFINEx(x, compat_sys, name, __VA_ARGS__)				\
 	{										\
-		return __se_compat_sys##name(SC_RISCV_REGS_TO_ARGS(x,__VA_ARGS__));	\
+		return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));	\
 	}										\
-	static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))		\
+	asmlinkage long __riscv_compat_sys##name(const struct pt_regs *regs)		\
 	{										\
-		return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));	\
+		return __se_compat_sys##name(SC_RISCV_REGS_TO_ARGS(x,__VA_ARGS__));	\
 	}										\
 	static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
 
@@ -51,19 +77,18 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *);
 #define __SYSCALL_DEFINEx(x, name, ...)						\
 	asmlinkage long __riscv_sys##name(const struct pt_regs *regs);		\
 	ALLOW_ERROR_INJECTION(__riscv_sys##name, ERRNO);			\
-	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));		\
 	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
-	asmlinkage long __riscv_sys##name(const struct pt_regs *regs)		\
-	{									\
-		return __se_sys##name(SC_RISCV_REGS_TO_ARGS(x,__VA_ARGS__));	\
-	}									\
-	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))		\
+	__SYSCALL_SE_DEFINEx(x, sys, name, __VA_ARGS__)				\
 	{									\
 		long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
 		__MAP(x,__SC_TEST,__VA_ARGS__);					\
 		__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));		\
 		return ret;							\
 	}									\
+	asmlinkage long __riscv_sys##name(const struct pt_regs *regs)		\
+	{									\
+		return __se_sys##name(SC_RISCV_REGS_TO_ARGS(x,__VA_ARGS__));	\
+	}									\
 	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
 
 #define SYSCALL_DEFINE0(sname)							\
diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h
index 50b63b5c15bd..1f6c38420d8e 100644
--- a/arch/riscv/include/asm/tlb.h
+++ b/arch/riscv/include/asm/tlb.h
@@ -10,6 +10,24 @@ struct mmu_gather;
 
 static void tlb_flush(struct mmu_gather *tlb);
 
+#ifdef CONFIG_MMU
+#include <linux/swap.h>
+
+/*
+ * While riscv platforms with riscv_ipi_for_rfence as true require an IPI to
+ * perform TLB shootdown, some platforms with riscv_ipi_for_rfence as false use
+ * SBI to perform TLB shootdown. To keep software pagetable walkers safe in this
+ * case we switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the
+ * comment below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h
+ * for more details.
+ */
+static inline void __tlb_remove_table(void *table)
+{
+	free_page_and_swap_cache(table);
+}
+
+#endif /* CONFIG_MMU */
+
 #define tlb_flush tlb_flush
 #include <asm-generic/tlb.h>
 
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index 0cd6f0a027d1..731dcd0ed4de 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -284,4 +284,15 @@ static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
 
 #endif /* CONFIG_RISCV_ISA_V */
 
+/*
+ * Return the implementation's vlen value.
+ *
+ * riscv_v_vsize contains the value of "32 vector registers with vlenb length"
+ * so rebuild the vlen value in bits from it.
+ */
+static inline int riscv_vector_vlen(void)
+{
+	return riscv_v_vsize / 32 * 8;
+}
+
 #endif /* ! __ASM_RISCV_VECTOR_H */
diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h
index e55407ace0c3..2f2bb0c84f9a 100644
--- a/arch/riscv/include/asm/vendorid_list.h
+++ b/arch/riscv/include/asm/vendorid_list.h
@@ -5,7 +5,7 @@
 #ifndef ASM_VENDOR_LIST_H
 #define ASM_VENDOR_LIST_H
 
-#define ANDESTECH_VENDOR_ID	0x31e
+#define ANDES_VENDOR_ID		0x31e
 #define SIFIVE_VENDOR_ID	0x489
 #define THEAD_VENDOR_ID		0x5b7
 
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 5e591f831638..81d94a8ee10f 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -39,7 +39,6 @@ extra-y += vmlinux.lds
 obj-y	+= head.o
 obj-y	+= soc.o
 obj-$(CONFIG_RISCV_ALTERNATIVE) += alternative.o
-obj-y	+= copy-unaligned.o
 obj-y	+= cpu.o
 obj-y	+= cpufeature.o
 obj-y	+= entry.o
@@ -64,6 +63,9 @@ obj-y	+= tests/
 obj-$(CONFIG_MMU) += vdso.o vdso/
 
 obj-$(CONFIG_RISCV_MISALIGNED)	+= traps_misaligned.o
+obj-$(CONFIG_RISCV_MISALIGNED)	+= unaligned_access_speed.o
+obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)	+= copy-unaligned.o
+
 obj-$(CONFIG_FPU)		+= fpu.o
 obj-$(CONFIG_RISCV_ISA_V)	+= vector.o
 obj-$(CONFIG_RISCV_ISA_V)	+= kernel_mode_vector.o
diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c
index 319a1da0358b..0128b161bfda 100644
--- a/arch/riscv/kernel/alternative.c
+++ b/arch/riscv/kernel/alternative.c
@@ -43,7 +43,7 @@ static void riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info
 
 	switch (cpu_mfr_info->vendor_id) {
 #ifdef CONFIG_ERRATA_ANDES
-	case ANDESTECH_VENDOR_ID:
+	case ANDES_VENDOR_ID:
 		cpu_mfr_info->patch_func = andes_errata_patch_func;
 		break;
 #endif
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 79a5a35fab96..3ed2359eae35 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -11,7 +11,6 @@
 #include <linux/cpu.h>
 #include <linux/cpuhotplug.h>
 #include <linux/ctype.h>
-#include <linux/jump_label.h>
 #include <linux/log2.h>
 #include <linux/memory.h>
 #include <linux/module.h>
@@ -21,21 +20,13 @@
 #include <asm/cacheflush.h>
 #include <asm/cpufeature.h>
 #include <asm/hwcap.h>
-#include <asm/hwprobe.h>
 #include <asm/patch.h>
 #include <asm/processor.h>
 #include <asm/sbi.h>
 #include <asm/vector.h>
 
-#include "copy-unaligned.h"
-
 #define NUM_ALPHA_EXTS ('z' - 'a' + 1)
 
-#define MISALIGNED_ACCESS_JIFFIES_LG2 1
-#define MISALIGNED_BUFFER_SIZE 0x4000
-#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
-#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
-
 unsigned long elf_hwcap __read_mostly;
 
 /* Host ISA bitmap */
@@ -44,11 +35,6 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
 /* Per-cpu ISA extensions. */
 struct riscv_isainfo hart_isa[NR_CPUS];
 
-/* Performance information */
-DEFINE_PER_CPU(long, misaligned_access_speed);
-
-static cpumask_t fast_misaligned_access;
-
 /**
  * riscv_isa_extension_base() - Get base extension word
  *
@@ -318,6 +304,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
 	__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
 	__RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT),
 	__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
+	__RISCV_ISA_EXT_DATA(xandespmu, RISCV_ISA_EXT_XANDESPMU),
 };
 
 const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext);
@@ -731,247 +718,6 @@ unsigned long riscv_get_elf_hwcap(void)
 	return hwcap;
 }
 
-static int check_unaligned_access(void *param)
-{
-	int cpu = smp_processor_id();
-	u64 start_cycles, end_cycles;
-	u64 word_cycles;
-	u64 byte_cycles;
-	int ratio;
-	unsigned long start_jiffies, now;
-	struct page *page = param;
-	void *dst;
-	void *src;
-	long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
-
-	if (check_unaligned_access_emulated(cpu))
-		return 0;
-
-	/* Make an unaligned destination buffer. */
-	dst = (void *)((unsigned long)page_address(page) | 0x1);
-	/* Unalign src as well, but differently (off by 1 + 2 = 3). */
-	src = dst + (MISALIGNED_BUFFER_SIZE / 2);
-	src += 2;
-	word_cycles = -1ULL;
-	/* Do a warmup. */
-	__riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
-	preempt_disable();
-	start_jiffies = jiffies;
-	while ((now = jiffies) == start_jiffies)
-		cpu_relax();
-
-	/*
-	 * For a fixed amount of time, repeatedly try the function, and take
-	 * the best time in cycles as the measurement.
-	 */
-	while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
-		start_cycles = get_cycles64();
-		/* Ensure the CSR read can't reorder WRT to the copy. */
-		mb();
-		__riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
-		/* Ensure the copy ends before the end time is snapped. */
-		mb();
-		end_cycles = get_cycles64();
-		if ((end_cycles - start_cycles) < word_cycles)
-			word_cycles = end_cycles - start_cycles;
-	}
-
-	byte_cycles = -1ULL;
-	__riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
-	start_jiffies = jiffies;
-	while ((now = jiffies) == start_jiffies)
-		cpu_relax();
-
-	while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
-		start_cycles = get_cycles64();
-		mb();
-		__riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
-		mb();
-		end_cycles = get_cycles64();
-		if ((end_cycles - start_cycles) < byte_cycles)
-			byte_cycles = end_cycles - start_cycles;
-	}
-
-	preempt_enable();
-
-	/* Don't divide by zero. */
-	if (!word_cycles || !byte_cycles) {
-		pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
-			cpu);
-
-		return 0;
-	}
-
-	if (word_cycles < byte_cycles)
-		speed = RISCV_HWPROBE_MISALIGNED_FAST;
-
-	ratio = div_u64((byte_cycles * 100), word_cycles);
-	pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n",
-		cpu,
-		ratio / 100,
-		ratio % 100,
-		(speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
-
-	per_cpu(misaligned_access_speed, cpu) = speed;
-
-	/*
-	 * Set the value of fast_misaligned_access of a CPU. These operations
-	 * are atomic to avoid race conditions.
-	 */
-	if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
-		cpumask_set_cpu(cpu, &fast_misaligned_access);
-	else
-		cpumask_clear_cpu(cpu, &fast_misaligned_access);
-
-	return 0;
-}
-
-static void check_unaligned_access_nonboot_cpu(void *param)
-{
-	unsigned int cpu = smp_processor_id();
-	struct page **pages = param;
-
-	if (smp_processor_id() != 0)
-		check_unaligned_access(pages[cpu]);
-}
-
-DEFINE_STATIC_KEY_FALSE(fast_misaligned_access_speed_key);
-
-static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
-{
-	if (cpumask_weight(mask) == weight)
-		static_branch_enable_cpuslocked(&fast_misaligned_access_speed_key);
-	else
-		static_branch_disable_cpuslocked(&fast_misaligned_access_speed_key);
-}
-
-static void set_unaligned_access_static_branches_except_cpu(int cpu)
-{
-	/*
-	 * Same as set_unaligned_access_static_branches, except excludes the
-	 * given CPU from the result. When a CPU is hotplugged into an offline
-	 * state, this function is called before the CPU is set to offline in
-	 * the cpumask, and thus the CPU needs to be explicitly excluded.
-	 */
-
-	cpumask_t fast_except_me;
-
-	cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask);
-	cpumask_clear_cpu(cpu, &fast_except_me);
-
-	modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1);
-}
-
-static void set_unaligned_access_static_branches(void)
-{
-	/*
-	 * This will be called after check_unaligned_access_all_cpus so the
-	 * result of unaligned access speed for all CPUs will be available.
-	 *
-	 * To avoid the number of online cpus changing between reading
-	 * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
-	 * held before calling this function.
-	 */
-
-	cpumask_t fast_and_online;
-
-	cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
-
-	modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
-}
-
-static int lock_and_set_unaligned_access_static_branch(void)
-{
-	cpus_read_lock();
-	set_unaligned_access_static_branches();
-	cpus_read_unlock();
-
-	return 0;
-}
-
-arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
-
-static int riscv_online_cpu(unsigned int cpu)
-{
-	static struct page *buf;
-
-	/* We are already set since the last check */
-	if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
-		goto exit;
-
-	buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
-	if (!buf) {
-		pr_warn("Allocation failure, not measuring misaligned performance\n");
-		return -ENOMEM;
-	}
-
-	check_unaligned_access(buf);
-	__free_pages(buf, MISALIGNED_BUFFER_ORDER);
-
-exit:
-	set_unaligned_access_static_branches();
-
-	return 0;
-}
-
-static int riscv_offline_cpu(unsigned int cpu)
-{
-	set_unaligned_access_static_branches_except_cpu(cpu);
-
-	return 0;
-}
-
-/* Measure unaligned access on all CPUs present at boot in parallel. */
-static int check_unaligned_access_all_cpus(void)
-{
-	unsigned int cpu;
-	unsigned int cpu_count = num_possible_cpus();
-	struct page **bufs = kzalloc(cpu_count * sizeof(struct page *),
-				     GFP_KERNEL);
-
-	if (!bufs) {
-		pr_warn("Allocation failure, not measuring misaligned performance\n");
-		return 0;
-	}
-
-	/*
-	 * Allocate separate buffers for each CPU so there's no fighting over
-	 * cache lines.
-	 */
-	for_each_cpu(cpu, cpu_online_mask) {
-		bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
-		if (!bufs[cpu]) {
-			pr_warn("Allocation failure, not measuring misaligned performance\n");
-			goto out;
-		}
-	}
-
-	/* Check everybody except 0, who stays behind to tend jiffies. */
-	on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
-
-	/* Check core 0. */
-	smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
-
-	/*
-	 * Setup hotplug callbacks for any new CPUs that come online or go
-	 * offline.
-	 */
-	cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
-				  riscv_online_cpu, riscv_offline_cpu);
-
-out:
-	unaligned_emulation_finish();
-	for_each_cpu(cpu, cpu_online_mask) {
-		if (bufs[cpu])
-			__free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
-	}
-
-	kfree(bufs);
-	return 0;
-}
-
-arch_initcall(check_unaligned_access_all_cpus);
-
 void riscv_user_isa_enable(void)
 {
 	if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ))
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 9d1a305d5508..68a24cf9481a 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -111,6 +111,7 @@ SYM_CODE_START(handle_exception)
 1:
 	tail do_trap_unknown
 SYM_CODE_END(handle_exception)
+ASM_NOKPROBE(handle_exception)
 
 /*
  * The ret_from_exception must be called with interrupt disabled. Here is the
@@ -184,6 +185,7 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
 	sret
 #endif
 SYM_CODE_END(ret_from_exception)
+ASM_NOKPROBE(ret_from_exception)
 
 #ifdef CONFIG_VMAP_STACK
 SYM_CODE_START_LOCAL(handle_kernel_stack_overflow)
@@ -219,6 +221,7 @@ SYM_CODE_START_LOCAL(handle_kernel_stack_overflow)
 	move a0, sp
 	tail handle_bad_stack
 SYM_CODE_END(handle_kernel_stack_overflow)
+ASM_NOKPROBE(handle_kernel_stack_overflow)
 #endif
 
 SYM_CODE_START(ret_from_fork)
diff --git a/arch/riscv/kernel/pi/Makefile b/arch/riscv/kernel/pi/Makefile
index 07915dc9279e..b75f150b923d 100644
--- a/arch/riscv/kernel/pi/Makefile
+++ b/arch/riscv/kernel/pi/Makefile
@@ -9,6 +9,9 @@ KBUILD_CFLAGS	:= $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
 		   -fno-asynchronous-unwind-tables -fno-unwind-tables \
 		   $(call cc-option,-fno-addrsig)
 
+# Disable LTO
+KBUILD_CFLAGS	:= $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS))
+
 KBUILD_CFLAGS	+= -mcmodel=medany
 
 CFLAGS_cmdline_early.o += -D__NO_FORTIFY
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index e8515aa9d80b..92731ff8c79a 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -377,14 +377,14 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 
 	return ret;
 }
+#else
+static const struct user_regset_view compat_riscv_user_native_view = {};
 #endif /* CONFIG_COMPAT */
 
 const struct user_regset_view *task_user_regset_view(struct task_struct *task)
 {
-#ifdef CONFIG_COMPAT
-	if (test_tsk_thread_flag(task, TIF_32BIT))
+	if (is_compat_thread(&task->thread_info))
 		return &compat_riscv_user_native_view;
 	else
-#endif
 		return &riscv_user_native_view;
 }
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index c4ed7d977f57..d41090fc3203 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -28,7 +28,6 @@
 
 #include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
-#include <asm/cpufeature.h>
 #include <asm/irq.h>
 #include <asm/mmu_context.h>
 #include <asm/numa.h>
diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c
index 299795341e8a..8a327b485b90 100644
--- a/arch/riscv/kernel/suspend.c
+++ b/arch/riscv/kernel/suspend.c
@@ -132,4 +132,53 @@ static int __init sbi_system_suspend_init(void)
 }
 
 arch_initcall(sbi_system_suspend_init);
+
+static int sbi_suspend_finisher(unsigned long suspend_type,
+				unsigned long resume_addr,
+				unsigned long opaque)
+{
+	struct sbiret ret;
+
+	ret = sbi_ecall(SBI_EXT_HSM, SBI_EXT_HSM_HART_SUSPEND,
+			suspend_type, resume_addr, opaque, 0, 0, 0);
+
+	return (ret.error) ? sbi_err_map_linux_errno(ret.error) : 0;
+}
+
+int riscv_sbi_hart_suspend(u32 state)
+{
+	if (state & SBI_HSM_SUSP_NON_RET_BIT)
+		return cpu_suspend(state, sbi_suspend_finisher);
+	else
+		return sbi_suspend_finisher(state, 0, 0);
+}
+
+bool riscv_sbi_suspend_state_is_valid(u32 state)
+{
+	if (state > SBI_HSM_SUSPEND_RET_DEFAULT &&
+	    state < SBI_HSM_SUSPEND_RET_PLATFORM)
+		return false;
+
+	if (state > SBI_HSM_SUSPEND_NON_RET_DEFAULT &&
+	    state < SBI_HSM_SUSPEND_NON_RET_PLATFORM)
+		return false;
+
+	return true;
+}
+
+bool riscv_sbi_hsm_is_supported(void)
+{
+	/*
+	 * The SBI HSM suspend function is only available when:
+	 * 1) SBI version is 0.3 or higher
+	 * 2) SBI HSM extension is available
+	 */
+	if (sbi_spec_version < sbi_mk_version(0, 3) ||
+	    !sbi_probe_extension(SBI_EXT_HSM)) {
+		pr_info("HSM suspend not available\n");
+		return false;
+	}
+
+	return true;
+}
 #endif /* CONFIG_RISCV_SBI */
diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c
index a7c56b41efd2..8cae41a502dd 100644
--- a/arch/riscv/kernel/sys_hwprobe.c
+++ b/arch/riscv/kernel/sys_hwprobe.c
@@ -147,6 +147,7 @@ static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
 	return (pair.value & ext);
 }
 
+#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS)
 static u64 hwprobe_misaligned(const struct cpumask *cpus)
 {
 	int cpu;
@@ -169,6 +170,18 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus)
 
 	return perf;
 }
+#else
+static u64 hwprobe_misaligned(const struct cpumask *cpus)
+{
+	if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS))
+		return RISCV_HWPROBE_MISALIGNED_FAST;
+
+	if (IS_ENABLED(CONFIG_RISCV_EMULATED_UNALIGNED_ACCESS) && unaligned_ctl_available())
+		return RISCV_HWPROBE_MISALIGNED_EMULATED;
+
+	return RISCV_HWPROBE_MISALIGNED_SLOW;
+}
+#endif
 
 static void hwprobe_one_pair(struct riscv_hwprobe *pair,
 			     const struct cpumask *cpus)
diff --git a/arch/riscv/kernel/tests/Kconfig.debug b/arch/riscv/kernel/tests/Kconfig.debug
index 5dba64e8e977..78cea5d2c270 100644
--- a/arch/riscv/kernel/tests/Kconfig.debug
+++ b/arch/riscv/kernel/tests/Kconfig.debug
@@ -6,7 +6,7 @@ config AS_HAS_ULEB128
 
 menuconfig RUNTIME_KERNEL_TESTING_MENU
        bool "arch/riscv/kernel runtime Testing"
-       def_bool y
+       default y
        help
          Enable riscv kernel runtime testing.
 
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index a1b9be3c4332..868d6280cf66 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -6,6 +6,7 @@
 #include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/randomize_kstack.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/signal.h>
@@ -310,7 +311,8 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs)
 	}
 }
 
-asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
+asmlinkage __visible __trap_section  __no_stack_protector
+void do_trap_ecall_u(struct pt_regs *regs)
 {
 	if (user_mode(regs)) {
 		long syscall = regs->a7;
@@ -322,10 +324,23 @@ asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
 
 		syscall = syscall_enter_from_user_mode(regs, syscall);
 
+		add_random_kstack_offset();
+
 		if (syscall >= 0 && syscall < NR_syscalls)
 			syscall_handler(regs, syscall);
 		else if (syscall != -1)
 			regs->a0 = -ENOSYS;
+		/*
+		 * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
+		 * so the maximum stack offset is 1k bytes (10 bits).
+		 *
+		 * The actual entropy will be further reduced by the compiler when
+		 * applying stack alignment constraints: 16-byte (i.e. 4-bit) aligned
+		 * for RV32I or RV64I.
+		 *
+		 * The resulting 6 bits of entropy is seen in SP[9:4].
+		 */
+		choose_random_kstack_offset(get_random_u16());
 
 		syscall_exit_to_user_mode(regs);
 	} else {
diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
index 8ded225e8c5b..2adb7c3e4dd5 100644
--- a/arch/riscv/kernel/traps_misaligned.c
+++ b/arch/riscv/kernel/traps_misaligned.c
@@ -413,7 +413,9 @@ int handle_misaligned_load(struct pt_regs *regs)
 
 	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
 
+#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
 	*this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED;
+#endif
 
 	if (!unaligned_enabled)
 		return -1;
@@ -596,7 +598,7 @@ int handle_misaligned_store(struct pt_regs *regs)
 	return 0;
 }
 
-bool check_unaligned_access_emulated(int cpu)
+static bool check_unaligned_access_emulated(int cpu)
 {
 	long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu);
 	unsigned long tmp_var, tmp_val;
@@ -623,7 +625,7 @@ bool check_unaligned_access_emulated(int cpu)
 	return misaligned_emu_detected;
 }
 
-void unaligned_emulation_finish(void)
+bool check_unaligned_access_emulated_all_cpus(void)
 {
 	int cpu;
 
@@ -632,13 +634,12 @@ void unaligned_emulation_finish(void)
 	 * accesses emulated since tasks requesting such control can run on any
 	 * CPU.
 	 */
-	for_each_present_cpu(cpu) {
-		if (per_cpu(misaligned_access_speed, cpu) !=
-					RISCV_HWPROBE_MISALIGNED_EMULATED) {
-			return;
-		}
-	}
+	for_each_online_cpu(cpu)
+		if (!check_unaligned_access_emulated(cpu))
+			return false;
+
 	unaligned_ctl = true;
+	return true;
 }
 
 bool unaligned_ctl_available(void)
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
new file mode 100644
index 000000000000..a9a6bcb02acf
--- /dev/null
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2024 Rivos Inc.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/jump_label.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+#include <asm/cpufeature.h>
+#include <asm/hwprobe.h>
+
+#include "copy-unaligned.h"
+
+#define MISALIGNED_ACCESS_JIFFIES_LG2 1
+#define MISALIGNED_BUFFER_SIZE 0x4000
+#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
+#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
+
+DEFINE_PER_CPU(long, misaligned_access_speed);
+
+#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
+static cpumask_t fast_misaligned_access;
+static int check_unaligned_access(void *param)
+{
+	int cpu = smp_processor_id();
+	u64 start_cycles, end_cycles;
+	u64 word_cycles;
+	u64 byte_cycles;
+	int ratio;
+	unsigned long start_jiffies, now;
+	struct page *page = param;
+	void *dst;
+	void *src;
+	long speed = RISCV_HWPROBE_MISALIGNED_SLOW;
+
+	if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
+		return 0;
+
+	/* Make an unaligned destination buffer. */
+	dst = (void *)((unsigned long)page_address(page) | 0x1);
+	/* Unalign src as well, but differently (off by 1 + 2 = 3). */
+	src = dst + (MISALIGNED_BUFFER_SIZE / 2);
+	src += 2;
+	word_cycles = -1ULL;
+	/* Do a warmup. */
+	__riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+	preempt_disable();
+	start_jiffies = jiffies;
+	while ((now = jiffies) == start_jiffies)
+		cpu_relax();
+
+	/*
+	 * For a fixed amount of time, repeatedly try the function, and take
+	 * the best time in cycles as the measurement.
+	 */
+	while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+		start_cycles = get_cycles64();
+		/* Ensure the CSR read can't reorder WRT to the copy. */
+		mb();
+		__riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+		/* Ensure the copy ends before the end time is snapped. */
+		mb();
+		end_cycles = get_cycles64();
+		if ((end_cycles - start_cycles) < word_cycles)
+			word_cycles = end_cycles - start_cycles;
+	}
+
+	byte_cycles = -1ULL;
+	__riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+	start_jiffies = jiffies;
+	while ((now = jiffies) == start_jiffies)
+		cpu_relax();
+
+	while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+		start_cycles = get_cycles64();
+		mb();
+		__riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+		mb();
+		end_cycles = get_cycles64();
+		if ((end_cycles - start_cycles) < byte_cycles)
+			byte_cycles = end_cycles - start_cycles;
+	}
+
+	preempt_enable();
+
+	/* Don't divide by zero. */
+	if (!word_cycles || !byte_cycles) {
+		pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
+			cpu);
+
+		return 0;
+	}
+
+	if (word_cycles < byte_cycles)
+		speed = RISCV_HWPROBE_MISALIGNED_FAST;
+
+	ratio = div_u64((byte_cycles * 100), word_cycles);
+	pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n",
+		cpu,
+		ratio / 100,
+		ratio % 100,
+		(speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow");
+
+	per_cpu(misaligned_access_speed, cpu) = speed;
+
+	/*
+	 * Set the value of fast_misaligned_access of a CPU. These operations
+	 * are atomic to avoid race conditions.
+	 */
+	if (speed == RISCV_HWPROBE_MISALIGNED_FAST)
+		cpumask_set_cpu(cpu, &fast_misaligned_access);
+	else
+		cpumask_clear_cpu(cpu, &fast_misaligned_access);
+
+	return 0;
+}
+
+static void check_unaligned_access_nonboot_cpu(void *param)
+{
+	unsigned int cpu = smp_processor_id();
+	struct page **pages = param;
+
+	if (smp_processor_id() != 0)
+		check_unaligned_access(pages[cpu]);
+}
+
+DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key);
+
+static void modify_unaligned_access_branches(cpumask_t *mask, int weight)
+{
+	if (cpumask_weight(mask) == weight)
+		static_branch_enable_cpuslocked(&fast_unaligned_access_speed_key);
+	else
+		static_branch_disable_cpuslocked(&fast_unaligned_access_speed_key);
+}
+
+static void set_unaligned_access_static_branches_except_cpu(int cpu)
+{
+	/*
+	 * Same as set_unaligned_access_static_branches, except excludes the
+	 * given CPU from the result. When a CPU is hotplugged into an offline
+	 * state, this function is called before the CPU is set to offline in
+	 * the cpumask, and thus the CPU needs to be explicitly excluded.
+	 */
+
+	cpumask_t fast_except_me;
+
+	cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask);
+	cpumask_clear_cpu(cpu, &fast_except_me);
+
+	modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1);
+}
+
+static void set_unaligned_access_static_branches(void)
+{
+	/*
+	 * This will be called after check_unaligned_access_all_cpus so the
+	 * result of unaligned access speed for all CPUs will be available.
+	 *
+	 * To avoid the number of online cpus changing between reading
+	 * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be
+	 * held before calling this function.
+	 */
+
+	cpumask_t fast_and_online;
+
+	cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask);
+
+	modify_unaligned_access_branches(&fast_and_online, num_online_cpus());
+}
+
+static int lock_and_set_unaligned_access_static_branch(void)
+{
+	cpus_read_lock();
+	set_unaligned_access_static_branches();
+	cpus_read_unlock();
+
+	return 0;
+}
+
+arch_initcall_sync(lock_and_set_unaligned_access_static_branch);
+
+static int riscv_online_cpu(unsigned int cpu)
+{
+	static struct page *buf;
+
+	/* We are already set since the last check */
+	if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN)
+		goto exit;
+
+	buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
+	if (!buf) {
+		pr_warn("Allocation failure, not measuring misaligned performance\n");
+		return -ENOMEM;
+	}
+
+	check_unaligned_access(buf);
+	__free_pages(buf, MISALIGNED_BUFFER_ORDER);
+
+exit:
+	set_unaligned_access_static_branches();
+
+	return 0;
+}
+
+static int riscv_offline_cpu(unsigned int cpu)
+{
+	set_unaligned_access_static_branches_except_cpu(cpu);
+
+	return 0;
+}
+
+/* Measure unaligned access speed on all CPUs present at boot in parallel. */
+static int check_unaligned_access_speed_all_cpus(void)
+{
+	unsigned int cpu;
+	unsigned int cpu_count = num_possible_cpus();
+	struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL);
+
+	if (!bufs) {
+		pr_warn("Allocation failure, not measuring misaligned performance\n");
+		return 0;
+	}
+
+	/*
+	 * Allocate separate buffers for each CPU so there's no fighting over
+	 * cache lines.
+	 */
+	for_each_cpu(cpu, cpu_online_mask) {
+		bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
+		if (!bufs[cpu]) {
+			pr_warn("Allocation failure, not measuring misaligned performance\n");
+			goto out;
+		}
+	}
+
+	/* Check everybody except 0, who stays behind to tend jiffies. */
+	on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
+
+	/* Check core 0. */
+	smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
+
+	/*
+	 * Setup hotplug callbacks for any new CPUs that come online or go
+	 * offline.
+	 */
+	cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
+				  riscv_online_cpu, riscv_offline_cpu);
+
+out:
+	for_each_cpu(cpu, cpu_online_mask) {
+		if (bufs[cpu])
+			__free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER);
+	}
+
+	kfree(bufs);
+	return 0;
+}
+
+static int check_unaligned_access_all_cpus(void)
+{
+	bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
+
+	if (!all_cpus_emulated)
+		return check_unaligned_access_speed_all_cpus();
+
+	return 0;
+}
+#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
+static int check_unaligned_access_all_cpus(void)
+{
+	check_unaligned_access_emulated_all_cpus();
+
+	return 0;
+}
+#endif
+
+arch_initcall(check_unaligned_access_all_cpus);
diff --git a/arch/riscv/lib/csum.c b/arch/riscv/lib/csum.c
index 74af3ab520b6..7fb12c59e571 100644
--- a/arch/riscv/lib/csum.c
+++ b/arch/riscv/lib/csum.c
@@ -3,7 +3,7 @@
  * Checksum library
  *
  * Influenced by arch/arm64/lib/csum.c
- * Copyright (C) 2023 Rivos Inc.
+ * Copyright (C) 2023-2024 Rivos Inc.
  */
 #include <linux/bitops.h>
 #include <linux/compiler.h>
@@ -318,10 +318,7 @@ unsigned int do_csum(const unsigned char *buff, int len)
 	 * branches. The largest chunk of overlap was delegated into the
 	 * do_csum_common function.
 	 */
-	if (static_branch_likely(&fast_misaligned_access_speed_key))
-		return do_csum_no_alignment(buff, len);
-
-	if (((unsigned long)buff & OFFSET_MASK) == 0)
+	if (has_fast_unaligned_accesses() || (((unsigned long)buff & OFFSET_MASK) == 0))
 		return do_csum_no_alignment(buff, len);
 
 	return do_csum_with_alignment(buff, len);
diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S
index 51ab5588e9ff..7c45f26de4f7 100644
--- a/arch/riscv/lib/uaccess_vector.S
+++ b/arch/riscv/lib/uaccess_vector.S
@@ -1,7 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 
 #include <linux/linkage.h>
-#include <asm-generic/export.h>
 #include <asm/asm.h>
 #include <asm/asm-extable.h>
 #include <asm/csr.h>
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index 55a34f2020a8..bc61ee5975e4 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -82,12 +82,12 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_MMU
-void flush_icache_pte(pte_t pte)
+void flush_icache_pte(struct mm_struct *mm, pte_t pte)
 {
 	struct folio *folio = page_folio(pte_page(pte));
 
 	if (!test_bit(PG_dcache_clean, &folio->flags)) {
-		flush_icache_all();
+		flush_icache_mm(mm, false);
 		set_bit(PG_dcache_clean, &folio->flags);
 	}
 }
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index 217fd4de6134..ba8eb3944687 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -323,6 +323,8 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	if (unlikely(prev == next))
 		return;
 
+	membarrier_arch_switch_mm(prev, next, task);
+
 	/*
 	 * Mark the current MM context as inactive, and the next as
 	 * active.  This is at least used by the icache flushing
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index b5ffb2ef54ad..fe8e159394d8 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -764,6 +764,11 @@ static int __init print_no5lvl(char *p)
 }
 early_param("no5lvl", print_no5lvl);
 
+static void __init set_mmap_rnd_bits_max(void)
+{
+	mmap_rnd_bits_max = MMAP_VA_BITS - PAGE_SHIFT - 3;
+}
+
 /*
  * There is a simple way to determine if 4-level is supported by the
  * underlying hardware: establish 1:1 mapping in 4-level page table mode
@@ -1078,6 +1083,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 
 #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
 	set_satp_mode(dtb_pa);
+	set_mmap_rnd_bits_max();
 #endif
 
 	/*
diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c
index ef887efcb679..533ec9055fa0 100644
--- a/arch/riscv/mm/pgtable.c
+++ b/arch/riscv/mm/pgtable.c
@@ -10,7 +10,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 			  pte_t entry, int dirty)
 {
 	if (!pte_same(ptep_get(ptep), entry))
-		__set_pte_at(ptep, entry);
+		__set_pte_at(vma->vm_mm, ptep, entry);
 	/*
 	 * update_mmu_cache will unconditionally execute, handling both
 	 * the case that the PTE changed and the spurious fault case.
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 367bf5bc4a5b..8f01ada6845e 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -63,6 +63,7 @@ config S390
 	select ARCH_ENABLE_MEMORY_HOTREMOVE
 	select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
 	select ARCH_HAS_CURRENT_STACK_POINTER
+	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEBUG_VM_PGTABLE
 	select ARCH_HAS_DEBUG_WX
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 2a58e1864931..2dbb2d2f22f9 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -29,6 +29,7 @@ KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf))
 endif
 KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack
 KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
+KBUILD_CFLAGS_DECOMPRESSOR += -D__DECOMPRESSOR
 KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain
 KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables
 KBUILD_CFLAGS_DECOMPRESSOR += -ffreestanding
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 4032e6e136ac..f1fb01cd5a25 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -810,6 +810,7 @@ CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y
 CONFIG_DEBUG_STACK_USAGE=y
 CONFIG_DEBUG_VM=y
 CONFIG_DEBUG_VM_PGFLAGS=y
+CONFIG_DEBUG_VIRTUAL=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
 CONFIG_DEBUG_PER_CPU_MAPS=y
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index 91d261751d25..436365ff6c19 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -217,7 +217,8 @@ extern void ccw_device_destroy_console(struct ccw_device *);
 extern int ccw_device_enable_console(struct ccw_device *);
 extern void ccw_device_wait_idle(struct ccw_device *);
 
-extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size);
+extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size,
+				   dma32_t *dma_handle);
 extern void ccw_device_dma_free(struct ccw_device *cdev,
 				void *cpu_addr, size_t size);
 
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index 1c4f585dd39b..b6b619f340a5 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -7,6 +7,7 @@
 
 #include <linux/bitops.h>
 #include <linux/genalloc.h>
+#include <asm/dma-types.h>
 #include <asm/types.h>
 #include <asm/tpi.h>
 
@@ -32,7 +33,7 @@ struct ccw1 {
 	__u8  cmd_code;
 	__u8  flags;
 	__u16 count;
-	__u32 cda;
+	dma32_t cda;
 } __attribute__ ((packed,aligned(8)));
 
 /**
@@ -152,8 +153,8 @@ struct sublog {
 struct esw0 {
 	struct sublog sublog;
 	struct erw erw;
-	__u32  faddr[2];
-	__u32  saddr;
+	dma32_t faddr[2];
+	dma32_t saddr;
 } __attribute__ ((packed));
 
 /**
@@ -364,6 +365,8 @@ extern struct device *cio_get_dma_css_dev(void);
 
 void *cio_gp_dma_zalloc(struct gen_pool *gp_dma, struct device *dma_dev,
 			size_t size);
+void *__cio_gp_dma_zalloc(struct gen_pool *gp_dma, struct device *dma_dev,
+			  size_t size, dma32_t *dma_handle);
 void cio_gp_dma_free(struct gen_pool *gp_dma, void *cpu_addr, size_t size);
 void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev);
 struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages);
diff --git a/arch/s390/include/asm/dma-types.h b/arch/s390/include/asm/dma-types.h
new file mode 100644
index 000000000000..5c5734e6946c
--- /dev/null
+++ b/arch/s390/include/asm/dma-types.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_DMA_TYPES_H_
+#define _ASM_S390_DMA_TYPES_H_
+
+#include <linux/types.h>
+#include <linux/io.h>
+
+/*
+ * typedef dma32_t
+ * Contains a 31 bit absolute address to a DMA capable piece of storage.
+ *
+ * For CIO, DMA addresses are always absolute addresses. These addresses tend
+ * to be used in architectured memory blocks (like ORB, IDAW, MIDAW). Under
+ * certain circumstances 31 bit wide addresses must be used because the
+ * address must fit in 31 bits.
+ *
+ * This type is to be used when such fields can be modelled as 32 bit wide.
+ */
+typedef u32 __bitwise dma32_t;
+
+/*
+ * typedef dma64_t
+ * Contains a 64 bit absolute address to a DMA capable piece of storage.
+ *
+ * For CIO, DMA addresses are always absolute addresses. These addresses tend
+ * to be used in architectured memory blocks (like ORB, IDAW, MIDAW).
+ *
+ * This type is to be used to model such 64 bit wide fields.
+ */
+typedef u64 __bitwise dma64_t;
+
+/*
+ * Although DMA addresses should be obtained using the DMA API, in cases when
+ * it is known that the first argument holds a virtual address that points to
+ * DMA-able 31 bit addressable storage, then this function can be safely used.
+ */
+static inline dma32_t virt_to_dma32(void *ptr)
+{
+	return (__force dma32_t)__pa32(ptr);
+}
+
+static inline void *dma32_to_virt(dma32_t addr)
+{
+	return __va((__force unsigned long)addr);
+}
+
+static inline dma32_t u32_to_dma32(u32 addr)
+{
+	return (__force dma32_t)addr;
+}
+
+static inline u32 dma32_to_u32(dma32_t addr)
+{
+	return (__force u32)addr;
+}
+
+static inline dma32_t dma32_add(dma32_t a, u32 b)
+{
+	return (__force dma32_t)((__force u32)a + b);
+}
+
+static inline dma32_t dma32_and(dma32_t a, u32 b)
+{
+	return (__force dma32_t)((__force u32)a & b);
+}
+
+/*
+ * Although DMA addresses should be obtained using the DMA API, in cases when
+ * it is known that the first argument holds a virtual address that points to
+ * DMA-able storage, then this function can be safely used.
+ */
+static inline dma64_t virt_to_dma64(void *ptr)
+{
+	return (__force dma64_t)__pa(ptr);
+}
+
+static inline void *dma64_to_virt(dma64_t addr)
+{
+	return __va((__force unsigned long)addr);
+}
+
+static inline dma64_t u64_to_dma64(u64 addr)
+{
+	return (__force dma64_t)addr;
+}
+
+static inline u64 dma64_to_u64(dma64_t addr)
+{
+	return (__force u64)addr;
+}
+
+static inline dma64_t dma64_add(dma64_t a, u64 b)
+{
+	return (__force dma64_t)((__force u64)a + b);
+}
+
+static inline dma64_t dma64_and(dma64_t a, u64 b)
+{
+	return (__force dma64_t)((__force u64)a & b);
+}
+
+#endif /* _ASM_S390_DMA_TYPES_H_ */
diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
index 06f795855af7..c4589ec4505e 100644
--- a/arch/s390/include/asm/eadm.h
+++ b/arch/s390/include/asm/eadm.h
@@ -5,6 +5,7 @@
 #include <linux/types.h>
 #include <linux/device.h>
 #include <linux/blk_types.h>
+#include <asm/dma-types.h>
 
 struct arqb {
 	u64 data;
@@ -45,7 +46,7 @@ struct msb {
 	u16:12;
 	u16 bs:4;
 	u32 blk_count;
-	u64 data_addr;
+	dma64_t data_addr;
 	u64 scm_addr;
 	u64:64;
 } __packed;
@@ -54,7 +55,7 @@ struct aidaw {
 	u8 flags;
 	u32 :24;
 	u32 :32;
-	u64 data_addr;
+	dma64_t data_addr;
 } __packed;
 
 #define MSB_OC_CLEAR	0
diff --git a/arch/s390/include/asm/fcx.h b/arch/s390/include/asm/fcx.h
index 29784b4b44f6..80f82a739b45 100644
--- a/arch/s390/include/asm/fcx.h
+++ b/arch/s390/include/asm/fcx.h
@@ -10,6 +10,7 @@
 #define _ASM_S390_FCX_H
 
 #include <linux/types.h>
+#include <asm/dma-types.h>
 
 #define TCW_FORMAT_DEFAULT		0
 #define TCW_TIDAW_FORMAT_DEFAULT	0
@@ -43,16 +44,16 @@ struct tcw {
 	u32 r:1;
 	u32 w:1;
 	u32 :16;
-	u64 output;
-	u64 input;
-	u64 tsb;
-	u64 tccb;
+	dma64_t output;
+	dma64_t input;
+	dma64_t tsb;
+	dma64_t tccb;
 	u32 output_count;
 	u32 input_count;
 	u32 :32;
 	u32 :32;
 	u32 :32;
-	u32 intrg;
+	dma32_t intrg;
 } __attribute__ ((packed, aligned(64)));
 
 #define TIDAW_FLAGS_LAST		(1 << (7 - 0))
@@ -73,7 +74,7 @@ struct tidaw {
 	u32 flags:8;
 	u32 :24;
 	u32 count;
-	u64 addr;
+	dma64_t addr;
 } __attribute__ ((packed, aligned(16)));
 
 /**
diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h
index 59fcc3c72edf..ac68c657b28c 100644
--- a/arch/s390/include/asm/idals.h
+++ b/arch/s390/include/asm/idals.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* 
+/*
  * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
  *		    Martin Schwidefsky <schwidefsky@de.ibm.com>
  * Bugreports.to..: <Linux390@de.ibm.com>
@@ -17,32 +17,37 @@
 #include <linux/err.h>
 #include <linux/types.h>
 #include <linux/slab.h>
-#include <asm/cio.h>
 #include <linux/uaccess.h>
+#include <asm/dma-types.h>
+#include <asm/cio.h>
 
-#define IDA_SIZE_LOG 12 /* 11 for 2k , 12 for 4k */
-#define IDA_BLOCK_SIZE (1L<<IDA_SIZE_LOG)
+#define IDA_SIZE_SHIFT		12
+#define IDA_BLOCK_SIZE		(1UL << IDA_SIZE_SHIFT)
 
-#define IDA_2K_SIZE_LOG 11
-#define IDA_2K_BLOCK_SIZE (1L << IDA_2K_SIZE_LOG)
+#define IDA_2K_SIZE_SHIFT	11
+#define IDA_2K_BLOCK_SIZE	(1UL << IDA_2K_SIZE_SHIFT)
 
 /*
  * Test if an address/length pair needs an idal list.
  */
-static inline int
-idal_is_needed(void *vaddr, unsigned int length)
+static inline bool idal_is_needed(void *vaddr, unsigned int length)
 {
-	return ((__pa(vaddr) + length - 1) >> 31) != 0;
-}
+	dma64_t paddr = virt_to_dma64(vaddr);
 
+	return (((__force unsigned long)(paddr) + length - 1) >> 31) != 0;
+}
 
 /*
  * Return the number of idal words needed for an address/length pair.
  */
 static inline unsigned int idal_nr_words(void *vaddr, unsigned int length)
 {
-	return ((__pa(vaddr) & (IDA_BLOCK_SIZE-1)) + length +
-		(IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG;
+	unsigned int cidaw;
+
+	cidaw = (unsigned long)vaddr & (IDA_BLOCK_SIZE - 1);
+	cidaw += length + IDA_BLOCK_SIZE - 1;
+	cidaw >>= IDA_SIZE_SHIFT;
+	return cidaw;
 }
 
 /*
@@ -50,26 +55,27 @@ static inline unsigned int idal_nr_words(void *vaddr, unsigned int length)
  */
 static inline unsigned int idal_2k_nr_words(void *vaddr, unsigned int length)
 {
-	return ((__pa(vaddr) & (IDA_2K_BLOCK_SIZE - 1)) + length +
-		(IDA_2K_BLOCK_SIZE - 1)) >> IDA_2K_SIZE_LOG;
+	unsigned int cidaw;
+
+	cidaw = (unsigned long)vaddr & (IDA_2K_BLOCK_SIZE - 1);
+	cidaw += length + IDA_2K_BLOCK_SIZE - 1;
+	cidaw >>= IDA_2K_SIZE_SHIFT;
+	return cidaw;
 }
 
 /*
  * Create the list of idal words for an address/length pair.
  */
-static inline unsigned long *idal_create_words(unsigned long *idaws,
-					       void *vaddr, unsigned int length)
+static inline dma64_t *idal_create_words(dma64_t *idaws, void *vaddr, unsigned int length)
 {
-	unsigned long paddr;
+	dma64_t paddr = virt_to_dma64(vaddr);
 	unsigned int cidaw;
 
-	paddr = __pa(vaddr);
-	cidaw = ((paddr & (IDA_BLOCK_SIZE-1)) + length + 
-		 (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG;
 	*idaws++ = paddr;
-	paddr &= -IDA_BLOCK_SIZE;
+	cidaw = idal_nr_words(vaddr, length);
+	paddr = dma64_and(paddr, -IDA_BLOCK_SIZE);
 	while (--cidaw > 0) {
-		paddr += IDA_BLOCK_SIZE;
+		paddr = dma64_add(paddr, IDA_BLOCK_SIZE);
 		*idaws++ = paddr;
 	}
 	return idaws;
@@ -79,36 +85,33 @@ static inline unsigned long *idal_create_words(unsigned long *idaws,
  * Sets the address of the data in CCW.
  * If necessary it allocates an IDAL and sets the appropriate flags.
  */
-static inline int
-set_normalized_cda(struct ccw1 * ccw, void *vaddr)
+static inline int set_normalized_cda(struct ccw1 *ccw, void *vaddr)
 {
 	unsigned int nridaws;
-	unsigned long *idal;
+	dma64_t *idal;
 
 	if (ccw->flags & CCW_FLAG_IDA)
 		return -EINVAL;
 	nridaws = idal_nr_words(vaddr, ccw->count);
 	if (nridaws > 0) {
-		idal = kmalloc(nridaws * sizeof(unsigned long),
-			       GFP_ATOMIC | GFP_DMA );
-		if (idal == NULL)
+		idal = kcalloc(nridaws, sizeof(*idal), GFP_ATOMIC | GFP_DMA);
+		if (!idal)
 			return -ENOMEM;
 		idal_create_words(idal, vaddr, ccw->count);
 		ccw->flags |= CCW_FLAG_IDA;
 		vaddr = idal;
 	}
-	ccw->cda = (__u32)(unsigned long) vaddr;
+	ccw->cda = virt_to_dma32(vaddr);
 	return 0;
 }
 
 /*
  * Releases any allocated IDAL related to the CCW.
  */
-static inline void
-clear_normalized_cda(struct ccw1 * ccw)
+static inline void clear_normalized_cda(struct ccw1 *ccw)
 {
 	if (ccw->flags & CCW_FLAG_IDA) {
-		kfree((void *)(unsigned long) ccw->cda);
+		kfree(dma32_to_virt(ccw->cda));
 		ccw->flags &= ~CCW_FLAG_IDA;
 	}
 	ccw->cda = 0;
@@ -120,125 +123,138 @@ clear_normalized_cda(struct ccw1 * ccw)
 struct idal_buffer {
 	size_t size;
 	size_t page_order;
-	void *data[];
+	dma64_t data[];
 };
 
 /*
  * Allocate an idal buffer
  */
-static inline struct idal_buffer *
-idal_buffer_alloc(size_t size, int page_order)
+static inline struct idal_buffer *idal_buffer_alloc(size_t size, int page_order)
 {
-	struct idal_buffer *ib;
 	int nr_chunks, nr_ptrs, i;
+	struct idal_buffer *ib;
+	void *vaddr;
 
-	nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG;
-	nr_chunks = (4096 << page_order) >> IDA_SIZE_LOG;
+	nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_SHIFT;
+	nr_chunks = (PAGE_SIZE << page_order) >> IDA_SIZE_SHIFT;
 	ib = kmalloc(struct_size(ib, data, nr_ptrs), GFP_DMA | GFP_KERNEL);
-	if (ib == NULL)
+	if (!ib)
 		return ERR_PTR(-ENOMEM);
 	ib->size = size;
 	ib->page_order = page_order;
 	for (i = 0; i < nr_ptrs; i++) {
-		if ((i & (nr_chunks - 1)) != 0) {
-			ib->data[i] = ib->data[i-1] + IDA_BLOCK_SIZE;
-			continue;
-		}
-		ib->data[i] = (void *)
-			__get_free_pages(GFP_KERNEL, page_order);
-		if (ib->data[i] != NULL)
+		if (i & (nr_chunks - 1)) {
+			ib->data[i] = dma64_add(ib->data[i - 1], IDA_BLOCK_SIZE);
 			continue;
-		// Not enough memory
-		while (i >= nr_chunks) {
-			i -= nr_chunks;
-			free_pages((unsigned long) ib->data[i],
-				   ib->page_order);
 		}
-		kfree(ib);
-		return ERR_PTR(-ENOMEM);
+		vaddr = (void *)__get_free_pages(GFP_KERNEL, page_order);
+		if (!vaddr)
+			goto error;
+		ib->data[i] = virt_to_dma64(vaddr);
 	}
 	return ib;
+error:
+	while (i >= nr_chunks) {
+		i -= nr_chunks;
+		vaddr = dma64_to_virt(ib->data[i]);
+		free_pages((unsigned long)vaddr, ib->page_order);
+	}
+	kfree(ib);
+	return ERR_PTR(-ENOMEM);
 }
 
 /*
  * Free an idal buffer.
  */
-static inline void
-idal_buffer_free(struct idal_buffer *ib)
+static inline void idal_buffer_free(struct idal_buffer *ib)
 {
 	int nr_chunks, nr_ptrs, i;
+	void *vaddr;
 
-	nr_ptrs = (ib->size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG;
-	nr_chunks = (4096 << ib->page_order) >> IDA_SIZE_LOG;
-	for (i = 0; i < nr_ptrs; i += nr_chunks)
-		free_pages((unsigned long) ib->data[i], ib->page_order);
+	nr_ptrs = (ib->size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_SHIFT;
+	nr_chunks = (PAGE_SIZE << ib->page_order) >> IDA_SIZE_SHIFT;
+	for (i = 0; i < nr_ptrs; i += nr_chunks) {
+		vaddr = dma64_to_virt(ib->data[i]);
+		free_pages((unsigned long)vaddr, ib->page_order);
+	}
 	kfree(ib);
 }
 
 /*
  * Test if a idal list is really needed.
  */
-static inline int
-__idal_buffer_is_needed(struct idal_buffer *ib)
+static inline bool __idal_buffer_is_needed(struct idal_buffer *ib)
 {
-	return ib->size > (4096ul << ib->page_order) ||
-		idal_is_needed(ib->data[0], ib->size);
+	if (ib->size > (PAGE_SIZE << ib->page_order))
+		return true;
+	return idal_is_needed(dma64_to_virt(ib->data[0]), ib->size);
 }
 
 /*
  * Set channel data address to idal buffer.
  */
-static inline void
-idal_buffer_set_cda(struct idal_buffer *ib, struct ccw1 *ccw)
+static inline void idal_buffer_set_cda(struct idal_buffer *ib, struct ccw1 *ccw)
 {
+	void *vaddr;
+
 	if (__idal_buffer_is_needed(ib)) {
-		// setup idals;
-		ccw->cda = (u32)(addr_t) ib->data;
+		/* Setup idals */
+		ccw->cda = virt_to_dma32(ib->data);
 		ccw->flags |= CCW_FLAG_IDA;
-	} else
-		// we do not need idals - use direct addressing
-		ccw->cda = (u32)(addr_t) ib->data[0];
+	} else {
+		/*
+		 * No idals needed - use direct addressing. Convert from
+		 * dma64_t to virt and then to dma32_t only because of type
+		 * checking. The physical address is known to be below 2GB.
+		 */
+		vaddr = dma64_to_virt(ib->data[0]);
+		ccw->cda = virt_to_dma32(vaddr);
+	}
 	ccw->count = ib->size;
 }
 
 /*
  * Copy count bytes from an idal buffer to user memory
  */
-static inline size_t
-idal_buffer_to_user(struct idal_buffer *ib, void __user *to, size_t count)
+static inline size_t idal_buffer_to_user(struct idal_buffer *ib, void __user *to, size_t count)
 {
 	size_t left;
+	void *vaddr;
 	int i;
 
 	BUG_ON(count > ib->size);
 	for (i = 0; count > IDA_BLOCK_SIZE; i++) {
-		left = copy_to_user(to, ib->data[i], IDA_BLOCK_SIZE);
+		vaddr = dma64_to_virt(ib->data[i]);
+		left = copy_to_user(to, vaddr, IDA_BLOCK_SIZE);
 		if (left)
 			return left + count - IDA_BLOCK_SIZE;
-		to = (void __user *) to + IDA_BLOCK_SIZE;
+		to = (void __user *)to + IDA_BLOCK_SIZE;
 		count -= IDA_BLOCK_SIZE;
 	}
-	return copy_to_user(to, ib->data[i], count);
+	vaddr = dma64_to_virt(ib->data[i]);
+	return copy_to_user(to, vaddr, count);
 }
 
 /*
  * Copy count bytes from user memory to an idal buffer
  */
-static inline size_t
-idal_buffer_from_user(struct idal_buffer *ib, const void __user *from, size_t count)
+static inline size_t idal_buffer_from_user(struct idal_buffer *ib, const void __user *from, size_t count)
 {
 	size_t left;
+	void *vaddr;
 	int i;
 
 	BUG_ON(count > ib->size);
 	for (i = 0; count > IDA_BLOCK_SIZE; i++) {
-		left = copy_from_user(ib->data[i], from, IDA_BLOCK_SIZE);
+		vaddr = dma64_to_virt(ib->data[i]);
+		left = copy_from_user(vaddr, from, IDA_BLOCK_SIZE);
 		if (left)
 			return left + count - IDA_BLOCK_SIZE;
-		from = (void __user *) from + IDA_BLOCK_SIZE;
+		from = (void __user *)from + IDA_BLOCK_SIZE;
 		count -= IDA_BLOCK_SIZE;
 	}
-	return copy_from_user(ib->data[i], from, count);
+	vaddr = dma64_to_virt(ib->data[i]);
+	return copy_from_user(vaddr, from, count);
 }
 
 #endif
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index ded9548d11d9..9381879f7ecf 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -181,9 +181,35 @@ int arch_make_page_accessible(struct page *page);
 #define __PAGE_OFFSET		0x0UL
 #define PAGE_OFFSET		0x0UL
 
-#define __pa(x)			((unsigned long)(x))
+#define __pa_nodebug(x)		((unsigned long)(x))
+
+#ifdef __DECOMPRESSOR
+
+#define __pa(x)			__pa_nodebug(x)
+#define __pa32(x)		__pa(x)
 #define __va(x)			((void *)(unsigned long)(x))
 
+#else /* __DECOMPRESSOR */
+
+#ifdef CONFIG_DEBUG_VIRTUAL
+
+unsigned long __phys_addr(unsigned long x, bool is_31bit);
+
+#else /* CONFIG_DEBUG_VIRTUAL */
+
+static inline unsigned long __phys_addr(unsigned long x, bool is_31bit)
+{
+	return __pa_nodebug(x);
+}
+
+#endif /* CONFIG_DEBUG_VIRTUAL */
+
+#define __pa(x)			__phys_addr((unsigned long)(x), false)
+#define __pa32(x)		__phys_addr((unsigned long)(x), true)
+#define __va(x)			((void *)(unsigned long)(x))
+
+#endif /* __DECOMPRESSOR */
+
 #define phys_to_pfn(phys)	((phys) >> PAGE_SHIFT)
 #define pfn_to_phys(pfn)	((pfn) << PAGE_SHIFT)
 
@@ -205,7 +231,7 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
 #define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
 #define page_to_virt(page)	pfn_to_virt(page_to_pfn(page))
 
-#define virt_addr_valid(kaddr)	pfn_valid(virt_to_pfn(kaddr))
+#define virt_addr_valid(kaddr)	pfn_valid(phys_to_pfn(__pa_nodebug(kaddr)))
 
 #define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_NON_EXEC
 
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 7cf00cf8fb0b..db9982f0e8cd 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -14,11 +14,13 @@
 
 #include <linux/bits.h>
 
+#define CIF_SIE			0	/* CPU needs SIE exit cleanup */
 #define CIF_NOHZ_DELAY		2	/* delay HZ disable for a tick */
 #define CIF_ENABLED_WAIT	5	/* in enabled wait state */
 #define CIF_MCCK_GUEST		6	/* machine check happening in guest */
 #define CIF_DEDICATED_CPU	7	/* this CPU is dedicated */
 
+#define _CIF_SIE		BIT(CIF_SIE)
 #define _CIF_NOHZ_DELAY		BIT(CIF_NOHZ_DELAY)
 #define _CIF_ENABLED_WAIT	BIT(CIF_ENABLED_WAIT)
 #define _CIF_MCCK_GUEST		BIT(CIF_MCCK_GUEST)
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 788bc4467445..2ad9324f6338 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -14,13 +14,11 @@
 #define PIF_SYSCALL			0	/* inside a system call */
 #define PIF_EXECVE_PGSTE_RESTART	1	/* restart execve for PGSTE binaries */
 #define PIF_SYSCALL_RET_SET		2	/* return value was set via ptrace */
-#define PIF_GUEST_FAULT			3	/* indicates program check in sie64a */
 #define PIF_FTRACE_FULL_REGS		4	/* all register contents valid (ftrace) */
 
 #define _PIF_SYSCALL			BIT(PIF_SYSCALL)
 #define _PIF_EXECVE_PGSTE_RESTART	BIT(PIF_EXECVE_PGSTE_RESTART)
 #define _PIF_SYSCALL_RET_SET		BIT(PIF_SYSCALL_RET_SET)
-#define _PIF_GUEST_FAULT		BIT(PIF_GUEST_FAULT)
 #define _PIF_FTRACE_FULL_REGS		BIT(PIF_FTRACE_FULL_REGS)
 
 #define PSW32_MASK_PER		_AC(0x40000000, UL)
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 2f983e0b95e0..69c4ead0c332 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -9,8 +9,9 @@
 #define __QDIO_H__
 
 #include <linux/interrupt.h>
-#include <asm/cio.h>
+#include <asm/dma-types.h>
 #include <asm/ccwdev.h>
+#include <asm/cio.h>
 
 /* only use 4 queues to save some cachelines */
 #define QDIO_MAX_QUEUES_PER_IRQ		4
@@ -34,9 +35,9 @@
  * @dkey: access key for SLSB
  */
 struct qdesfmt0 {
-	u64 sliba;
-	u64 sla;
-	u64 slsba;
+	dma64_t sliba;
+	dma64_t sla;
+	dma64_t slsba;
 	u32	 : 32;
 	u32 akey : 4;
 	u32 bkey : 4;
@@ -74,7 +75,7 @@ struct qdr {
 	/* private: */
 	u32 res[9];
 	/* public: */
-	u64 qiba;
+	dma64_t qiba;
 	u32	   : 32;
 	u32 qkey   : 4;
 	u32	   : 28;
@@ -146,7 +147,7 @@ struct qaob {
 	u8 flags;
 	u16 cbtbs;
 	u8 sb_count;
-	u64 sba[QDIO_MAX_ELEMENTS_PER_BUFFER];
+	dma64_t sba[QDIO_MAX_ELEMENTS_PER_BUFFER];
 	u16 dcount[QDIO_MAX_ELEMENTS_PER_BUFFER];
 	u64 user0;
 	u64 res4[2];
@@ -208,7 +209,7 @@ struct qdio_buffer_element {
 	u8 scount;
 	u8 sflags;
 	u32 length;
-	u64 addr;
+	dma64_t addr;
 } __attribute__ ((packed, aligned(16)));
 
 /**
@@ -224,7 +225,7 @@ struct qdio_buffer {
  * @sbal: absolute SBAL address
  */
 struct sl_element {
-	u64 sbal;
+	dma64_t sbal;
 } __attribute__ ((packed));
 
 /**
diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h
index 322bdcd4b616..56003e26cdbf 100644
--- a/arch/s390/include/asm/scsw.h
+++ b/arch/s390/include/asm/scsw.h
@@ -11,6 +11,7 @@
 
 #include <linux/types.h>
 #include <asm/css_chars.h>
+#include <asm/dma-types.h>
 #include <asm/cio.h>
 
 /**
@@ -53,7 +54,7 @@ struct cmd_scsw {
 	__u32 fctl : 3;
 	__u32 actl : 7;
 	__u32 stctl : 5;
-	__u32 cpa;
+	dma32_t cpa;
 	__u32 dstat : 8;
 	__u32 cstat : 8;
 	__u32 count : 16;
@@ -93,7 +94,7 @@ struct tm_scsw {
 	u32 fctl:3;
 	u32 actl:7;
 	u32 stctl:5;
-	u32 tcw;
+	dma32_t tcw;
 	u32 dstat:8;
 	u32 cstat:8;
 	u32 fcxs:8;
@@ -125,7 +126,7 @@ struct eadm_scsw {
 	u32 fctl:3;
 	u32 actl:7;
 	u32 stctl:5;
-	u32 aob;
+	dma32_t aob;
 	u32 dstat:8;
 	u32 cstat:8;
 	u32:16;
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index fc5277eab554..787394978bc0 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -119,33 +119,11 @@ _LPP_OFFSET	= __LC_LPP
 	.endm
 
 #if IS_ENABLED(CONFIG_KVM)
-	/*
-	 * The OUTSIDE macro jumps to the provided label in case the value
-	 * in the provided register is outside of the provided range. The
-	 * macro is useful for checking whether a PSW stored in a register
-	 * pair points inside or outside of a block of instructions.
-	 * @reg: register to check
-	 * @start: start of the range
-	 * @end: end of the range
-	 * @outside_label: jump here if @reg is outside of [@start..@end)
-	 */
-	.macro OUTSIDE reg,start,end,outside_label
-	lgr	%r14,\reg
-	larl	%r13,\start
-	slgr	%r14,%r13
-	clgfrl	%r14,.Lrange_size\@
-	jhe	\outside_label
-	.section .rodata, "a"
-	.balign 4
-.Lrange_size\@:
-	.long	\end - \start
-	.previous
-	.endm
-
-	.macro SIEEXIT
-	lg	%r9,__SF_SIE_CONTROL(%r15)	# get control block pointer
+	.macro SIEEXIT sie_control
+	lg	%r9,\sie_control		# get control block pointer
 	ni	__SIE_PROG0C+3(%r9),0xfe	# no longer in SIE
 	lctlg	%c1,%c1,__LC_KERNEL_ASCE	# load primary asce
+	ni	__LC_CPU_FLAGS+7,255-_CIF_SIE
 	larl	%r9,sie_exit			# skip forward to sie_exit
 	.endm
 #endif
@@ -214,6 +192,7 @@ SYM_FUNC_START(__sie64a)
 	lg	%r14,__LC_GMAP			# get gmap pointer
 	ltgr	%r14,%r14
 	jz	.Lsie_gmap
+	oi	__LC_CPU_FLAGS+7,_CIF_SIE
 	lctlg	%c1,%c1,__GMAP_ASCE(%r14)	# load primary asce
 .Lsie_gmap:
 	lg	%r14,__SF_SIE_CONTROL(%r15)	# get control block pointer
@@ -234,7 +213,7 @@ SYM_FUNC_START(__sie64a)
 	lg	%r14,__SF_SIE_CONTROL(%r15)	# get control block pointer
 	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
 	lctlg	%c1,%c1,__LC_KERNEL_ASCE	# load primary asce
-.Lsie_done:
+	ni	__LC_CPU_FLAGS+7,255-_CIF_SIE
 # some program checks are suppressing. C code (e.g. do_protection_exception)
 # will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
 # are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
@@ -337,20 +316,13 @@ SYM_CODE_START(pgm_check_handler)
 	stpt	__LC_SYS_ENTER_TIMER
 	BPOFF
 	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
-	lghi	%r10,0
+	lgr	%r10,%r15
 	lmg	%r8,%r9,__LC_PGM_OLD_PSW
 	tmhh	%r8,0x0001		# coming from user space?
 	jno	.Lpgm_skip_asce
 	lctlg	%c1,%c1,__LC_KERNEL_ASCE
 	j	3f			# -> fault in user space
 .Lpgm_skip_asce:
-#if IS_ENABLED(CONFIG_KVM)
-	# cleanup critical section for program checks in __sie64a
-	OUTSIDE	%r9,.Lsie_gmap,.Lsie_done,1f
-	BPENTER	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
-	SIEEXIT
-	lghi	%r10,_PIF_GUEST_FAULT
-#endif
 1:	tmhh	%r8,0x4000		# PER bit set in old PSW ?
 	jnz	2f			# -> enabled, can't be a double fault
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
@@ -361,13 +333,20 @@ SYM_CODE_START(pgm_check_handler)
 	CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
 3:	lg	%r15,__LC_KERNEL_STACK
 4:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
-	stg	%r10,__PT_FLAGS(%r11)
+	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
 	mvc	__PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK
-	stmg	%r8,%r9,__PT_PSW(%r11)
-
+	stctg	%c1,%c1,__PT_CR1(%r11)
+#if IS_ENABLED(CONFIG_KVM)
+	lg	%r12,__LC_GMAP
+	clc	__GMAP_ASCE(8,%r12), __PT_CR1(%r11)
+	jne	5f
+	BPENTER	__SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST
+	SIEEXIT __SF_SIE_CONTROL(%r10)
+#endif
+5:	stmg	%r8,%r9,__PT_PSW(%r11)
 	# clear user controlled registers to prevent speculative use
 	xgr	%r0,%r0
 	xgr	%r1,%r1
@@ -416,9 +395,10 @@ SYM_CODE_START(\name)
 	tmhh	%r8,0x0001			# interrupting from user ?
 	jnz	1f
 #if IS_ENABLED(CONFIG_KVM)
-	OUTSIDE	%r9,.Lsie_gmap,.Lsie_done,0f
+	TSTMSK	__LC_CPU_FLAGS,_CIF_SIE
+	jz	0f
 	BPENTER	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
-	SIEEXIT
+	SIEEXIT __SF_SIE_CONTROL(%r15)
 #endif
 0:	CHECK_STACK __LC_SAVE_AREA_ASYNC
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
@@ -513,11 +493,20 @@ SYM_CODE_START(mcck_int_handler)
 	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
 	jno	.Lmcck_panic
 #if IS_ENABLED(CONFIG_KVM)
-	OUTSIDE	%r9,.Lsie_gmap,.Lsie_done,.Lmcck_user
-	OUTSIDE	%r9,.Lsie_entry,.Lsie_leave,4f
+	TSTMSK	__LC_CPU_FLAGS,_CIF_SIE
+	jz	.Lmcck_user
+	# Need to compare the address instead of a CIF_SIE* flag.
+	# Otherwise there would be a race between setting the flag
+	# and entering SIE (or leaving and clearing the flag). This
+	# would cause machine checks targeted at the guest to be
+	# handled by the host.
+	larl	%r14,.Lsie_entry
+	clgrjl	%r9,%r14, 4f
+	larl	%r14,.Lsie_leave
+	clgrjhe	%r9,%r14, 4f
 	oi	__LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
 4:	BPENTER	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
-	SIEEXIT
+	SIEEXIT __SF_SIE_CONTROL(%r15)
 #endif
 .Lmcck_user:
 	lg	%r15,__LC_MCCK_STACK
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 1b1be3110cfc..2be30a96696a 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -397,7 +397,7 @@ static void service_level_vm_print(struct seq_file *m,
 {
 	char *query_buffer, *str;
 
-	query_buffer = kmalloc(1024, GFP_KERNEL | GFP_DMA);
+	query_buffer = kmalloc(1024, GFP_KERNEL);
 	if (!query_buffer)
 		return;
 	cpcmd("QUERY CPLEVEL", query_buffer, 1024, NULL);
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index e0a88dcaf5cb..24a18e5ef6e8 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -210,13 +210,13 @@ void vtime_flush(struct task_struct *tsk)
 		virt_timer_expire();
 
 	steal = S390_lowcore.steal_timer;
-	avg_steal = S390_lowcore.avg_steal_timer / 2;
+	avg_steal = S390_lowcore.avg_steal_timer;
 	if ((s64) steal > 0) {
 		S390_lowcore.steal_timer = 0;
 		account_steal_time(cputime_to_nsecs(steal));
 		avg_steal += steal;
 	}
-	S390_lowcore.avg_steal_timer = avg_steal;
+	S390_lowcore.avg_steal_timer = avg_steal / 2;
 }
 
 static u64 vtime_delta(void)
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 352ff520fd94..f6c2db7a8669 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -7,6 +7,7 @@ obj-y		:= init.o fault.o extmem.o mmap.o vmem.o maccess.o
 obj-y		+= page-states.o pageattr.o pgtable.o pgalloc.o extable.o
 
 obj-$(CONFIG_CMM)		+= cmm.o
+obj-$(CONFIG_DEBUG_VIRTUAL)	+= physaddr.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_PTDUMP_CORE)	+= dump_pagetables.o
 obj-$(CONFIG_PGSTE)		+= gmap.o
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index ac4c78546d97..c421dd44ffbe 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -67,13 +67,15 @@ early_initcall(fault_init);
 static enum fault_type get_fault_type(struct pt_regs *regs)
 {
 	union teid teid = { .val = regs->int_parm_long };
+	struct gmap *gmap;
 
 	if (likely(teid.as == PSW_BITS_AS_PRIMARY)) {
 		if (user_mode(regs))
 			return USER_FAULT;
 		if (!IS_ENABLED(CONFIG_PGSTE))
 			return KERNEL_FAULT;
-		if (test_pt_regs_flag(regs, PIF_GUEST_FAULT))
+		gmap = (struct gmap *)S390_lowcore.gmap;
+		if (regs->cr1 == gmap->asce)
 			return GMAP_FAULT;
 		return KERNEL_FAULT;
 	}
diff --git a/arch/s390/mm/physaddr.c b/arch/s390/mm/physaddr.c
new file mode 100644
index 000000000000..59de866c72d9
--- /dev/null
+++ b/arch/s390/mm/physaddr.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/mmdebug.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <asm/page.h>
+
+unsigned long __phys_addr(unsigned long x, bool is_31bit)
+{
+	VIRTUAL_BUG_ON(is_vmalloc_or_module_addr((void *)(x)));
+	x = __pa_nodebug(x);
+	if (is_31bit)
+		VIRTUAL_BUG_ON(x >> 31);
+	return x;
+}
+EXPORT_SYMBOL(__phys_addr);
diff --git a/arch/sh/cchips/hd6446x/hd64461.c b/arch/sh/cchips/hd6446x/hd64461.c
index f3fba967445a..81764882d87d 100644
--- a/arch/sh/cchips/hd6446x/hd64461.c
+++ b/arch/sh/cchips/hd6446x/hd64461.c
@@ -72,7 +72,7 @@ static void hd64461_irq_demux(struct irq_desc *desc)
 	}
 }
 
-int __init setup_hd64461(void)
+static int __init setup_hd64461(void)
 {
 	int irq_base, i;
 
diff --git a/arch/sh/drivers/dma/dma-sysfs.c b/arch/sh/drivers/dma/dma-sysfs.c
index 431bc18f0a41..9f666280d80c 100644
--- a/arch/sh/drivers/dma/dma-sysfs.c
+++ b/arch/sh/drivers/dma/dma-sysfs.c
@@ -15,7 +15,7 @@
 #include <linux/string.h>
 #include <asm/dma.h>
 
-static struct bus_type dma_subsys = {
+static const struct bus_type dma_subsys = {
 	.name = "dma",
 	.dev_name = "dma",
 };
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7aed87cbf386..39886bab943a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -472,10 +472,6 @@ config X86_MPPARSE
 	  For old smp systems that do not have proper acpi support. Newer systems
 	  (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
 
-config GOLDFISH
-	def_bool y
-	depends on X86_GOLDFISH
-
 config X86_CPU_RESCTRL
 	bool "x86 CPU resource control support"
 	depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S
index f4e22ef774ab..719e939050cb 100644
--- a/arch/x86/boot/compressed/efi_mixed.S
+++ b/arch/x86/boot/compressed/efi_mixed.S
@@ -49,6 +49,11 @@ SYM_FUNC_START(startup_64_mixed_mode)
 	lea	efi32_boot_args(%rip), %rdx
 	mov	0(%rdx), %edi
 	mov	4(%rdx), %esi
+
+	/* Switch to the firmware's stack */
+	movl	efi32_boot_sp(%rip), %esp
+	andl	$~7, %esp
+
 #ifdef CONFIG_EFI_HANDOVER_PROTOCOL
 	mov	8(%rdx), %edx		// saved bootparams pointer
 	test	%edx, %edx
@@ -254,6 +259,9 @@ SYM_FUNC_START_LOCAL(efi32_entry)
 	/* Store firmware IDT descriptor */
 	sidtl	(efi32_boot_idt - 1b)(%ebx)
 
+	/* Store firmware stack pointer */
+	movl	%esp, (efi32_boot_sp - 1b)(%ebx)
+
 	/* Store boot arguments */
 	leal	(efi32_boot_args - 1b)(%ebx), %ebx
 	movl	%ecx, 0(%ebx)
@@ -318,5 +326,6 @@ SYM_DATA_END(efi32_boot_idt)
 
 SYM_DATA_LOCAL(efi32_boot_cs, .word 0)
 SYM_DATA_LOCAL(efi32_boot_ds, .word 0)
+SYM_DATA_LOCAL(efi32_boot_sp, .long 0)
 SYM_DATA_LOCAL(efi32_boot_args, .long 0, 0, 0)
 SYM_DATA(efi_is64, .byte 1)
diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
index 66c9e2aab16c..be3ee4294903 100644
--- a/arch/x86/configs/tiny.config
+++ b/arch/x86/configs/tiny.config
@@ -1,5 +1,6 @@
 CONFIG_NOHIGHMEM=y
 # CONFIG_HIGHMEM4G is not set
 # CONFIG_HIGHMEM64G is not set
+# CONFIG_UNWINDER_ORC is not set
 CONFIG_UNWINDER_GUESS=y
 # CONFIG_UNWINDER_FRAME_POINTER is not set
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 620f6257bbe9..fd63051bbbbb 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -9,7 +9,9 @@ include $(srctree)/lib/vdso/Makefile
 # Sanitizer runtimes are unavailable and cannot be linked here.
 KASAN_SANITIZE			:= n
 KMSAN_SANITIZE_vclock_gettime.o := n
+KMSAN_SANITIZE_vdso32/vclock_gettime.o	:= n
 KMSAN_SANITIZE_vgetcpu.o	:= n
+KMSAN_SANITIZE_vdso32/vgetcpu.o	:= n
 
 UBSAN_SANITIZE			:= n
 KCSAN_SANITIZE			:= n
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 8f3a4d16bb79..17a71e92a343 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -667,14 +667,14 @@ void hyperv_cleanup(void)
 	hv_hypercall_pg = NULL;
 
 	/* Reset the hypercall page */
-	hypercall_msr.as_uint64 = hv_get_register(HV_X64_MSR_HYPERCALL);
+	hypercall_msr.as_uint64 = hv_get_msr(HV_X64_MSR_HYPERCALL);
 	hypercall_msr.enable = 0;
-	hv_set_register(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+	hv_set_msr(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
 
 	/* Reset the TSC page */
-	tsc_msr.as_uint64 = hv_get_register(HV_X64_MSR_REFERENCE_TSC);
+	tsc_msr.as_uint64 = hv_get_msr(HV_X64_MSR_REFERENCE_TSC);
 	tsc_msr.enable = 0;
-	hv_set_register(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
+	hv_set_msr(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
 }
 
 void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die)
diff --git a/arch/x86/hyperv/hv_spinlock.c b/arch/x86/hyperv/hv_spinlock.c
index 737d6f7a6155..151e851bef09 100644
--- a/arch/x86/hyperv/hv_spinlock.c
+++ b/arch/x86/hyperv/hv_spinlock.c
@@ -16,7 +16,7 @@
 #include <asm/paravirt.h>
 #include <asm/apic.h>
 
-static bool __initdata hv_pvspin = true;
+static bool hv_pvspin __initdata = true;
 
 static void hv_qlock_kick(int cpu)
 {
@@ -64,6 +64,7 @@ __visible bool hv_vcpu_is_preempted(int vcpu)
 {
 	return false;
 }
+
 PV_CALLEE_SAVE_REGS_THUNK(hv_vcpu_is_preempted);
 
 void __init hv_init_spinlocks(void)
diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
index edd2f35b2a5e..5c7de79423b8 100644
--- a/arch/x86/hyperv/hv_vtl.c
+++ b/arch/x86/hyperv/hv_vtl.c
@@ -12,6 +12,7 @@
 #include <asm/i8259.h>
 #include <asm/mshyperv.h>
 #include <asm/realmode.h>
+#include <../kernel/smpboot.h>
 
 extern struct boot_params boot_params;
 static struct real_mode_header hv_vtl_real_mode_header;
@@ -65,7 +66,7 @@ static void hv_vtl_ap_entry(void)
 	((secondary_startup_64_fn)secondary_startup_64)(&boot_params, &boot_params);
 }
 
-static int hv_vtl_bringup_vcpu(u32 target_vp_index, u64 eip_ignored)
+static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored)
 {
 	u64 status;
 	int ret = 0;
@@ -79,7 +80,9 @@ static int hv_vtl_bringup_vcpu(u32 target_vp_index, u64 eip_ignored)
 	struct ldttss_desc *ldt;
 	struct desc_struct *gdt;
 
-	u64 rsp = current->thread.sp;
+	struct task_struct *idle = idle_thread_get(cpu);
+	u64 rsp = (unsigned long)idle->thread.sp;
+
 	u64 rip = (u64)&hv_vtl_ap_entry;
 
 	native_store_gdt(&gdt_ptr);
@@ -206,7 +209,15 @@ static int hv_vtl_apicid_to_vp_id(u32 apic_id)
 
 static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip)
 {
-	int vp_id;
+	int vp_id, cpu;
+
+	/* Find the logical CPU for the APIC ID */
+	for_each_present_cpu(cpu) {
+		if (arch_match_cpu_phys_id(cpu, apicid))
+			break;
+	}
+	if (cpu >= nr_cpu_ids)
+		return -EINVAL;
 
 	pr_debug("Bringing up CPU with APIC ID %d in VTL2...\n", apicid);
 	vp_id = hv_vtl_apicid_to_vp_id(apicid);
@@ -220,7 +231,7 @@ static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip)
 		return -EINVAL;
 	}
 
-	return hv_vtl_bringup_vcpu(vp_id, start_eip);
+	return hv_vtl_bringup_vcpu(vp_id, cpu, start_eip);
 }
 
 int __init hv_vtl_early_init(void)
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 2ff26f53cd62..3787d26810c1 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -182,7 +182,7 @@ enum hv_isolation_type {
 #define HV_X64_MSR_HYPERCALL			0x40000001
 
 /* MSR used to provide vcpu index */
-#define HV_REGISTER_VP_INDEX			0x40000002
+#define HV_X64_MSR_VP_INDEX			0x40000002
 
 /* MSR used to reset the guest OS. */
 #define HV_X64_MSR_RESET			0x40000003
@@ -191,10 +191,10 @@ enum hv_isolation_type {
 #define HV_X64_MSR_VP_RUNTIME			0x40000010
 
 /* MSR used to read the per-partition time reference counter */
-#define HV_REGISTER_TIME_REF_COUNT		0x40000020
+#define HV_X64_MSR_TIME_REF_COUNT		0x40000020
 
 /* A partition's reference time stamp counter (TSC) page */
-#define HV_REGISTER_REFERENCE_TSC		0x40000021
+#define HV_X64_MSR_REFERENCE_TSC		0x40000021
 
 /* MSR used to retrieve the TSC frequency */
 #define HV_X64_MSR_TSC_FREQUENCY		0x40000022
@@ -209,61 +209,61 @@ enum hv_isolation_type {
 #define HV_X64_MSR_VP_ASSIST_PAGE		0x40000073
 
 /* Define synthetic interrupt controller model specific registers. */
-#define HV_REGISTER_SCONTROL			0x40000080
-#define HV_REGISTER_SVERSION			0x40000081
-#define HV_REGISTER_SIEFP			0x40000082
-#define HV_REGISTER_SIMP			0x40000083
-#define HV_REGISTER_EOM				0x40000084
-#define HV_REGISTER_SINT0			0x40000090
-#define HV_REGISTER_SINT1			0x40000091
-#define HV_REGISTER_SINT2			0x40000092
-#define HV_REGISTER_SINT3			0x40000093
-#define HV_REGISTER_SINT4			0x40000094
-#define HV_REGISTER_SINT5			0x40000095
-#define HV_REGISTER_SINT6			0x40000096
-#define HV_REGISTER_SINT7			0x40000097
-#define HV_REGISTER_SINT8			0x40000098
-#define HV_REGISTER_SINT9			0x40000099
-#define HV_REGISTER_SINT10			0x4000009A
-#define HV_REGISTER_SINT11			0x4000009B
-#define HV_REGISTER_SINT12			0x4000009C
-#define HV_REGISTER_SINT13			0x4000009D
-#define HV_REGISTER_SINT14			0x4000009E
-#define HV_REGISTER_SINT15			0x4000009F
+#define HV_X64_MSR_SCONTROL			0x40000080
+#define HV_X64_MSR_SVERSION			0x40000081
+#define HV_X64_MSR_SIEFP			0x40000082
+#define HV_X64_MSR_SIMP				0x40000083
+#define HV_X64_MSR_EOM				0x40000084
+#define HV_X64_MSR_SINT0			0x40000090
+#define HV_X64_MSR_SINT1			0x40000091
+#define HV_X64_MSR_SINT2			0x40000092
+#define HV_X64_MSR_SINT3			0x40000093
+#define HV_X64_MSR_SINT4			0x40000094
+#define HV_X64_MSR_SINT5			0x40000095
+#define HV_X64_MSR_SINT6			0x40000096
+#define HV_X64_MSR_SINT7			0x40000097
+#define HV_X64_MSR_SINT8			0x40000098
+#define HV_X64_MSR_SINT9			0x40000099
+#define HV_X64_MSR_SINT10			0x4000009A
+#define HV_X64_MSR_SINT11			0x4000009B
+#define HV_X64_MSR_SINT12			0x4000009C
+#define HV_X64_MSR_SINT13			0x4000009D
+#define HV_X64_MSR_SINT14			0x4000009E
+#define HV_X64_MSR_SINT15			0x4000009F
 
 /*
  * Define synthetic interrupt controller model specific registers for
  * nested hypervisor.
  */
-#define HV_REGISTER_NESTED_SCONTROL            0x40001080
-#define HV_REGISTER_NESTED_SVERSION            0x40001081
-#define HV_REGISTER_NESTED_SIEFP               0x40001082
-#define HV_REGISTER_NESTED_SIMP                0x40001083
-#define HV_REGISTER_NESTED_EOM                 0x40001084
-#define HV_REGISTER_NESTED_SINT0               0x40001090
+#define HV_X64_MSR_NESTED_SCONTROL		0x40001080
+#define HV_X64_MSR_NESTED_SVERSION		0x40001081
+#define HV_X64_MSR_NESTED_SIEFP			0x40001082
+#define HV_X64_MSR_NESTED_SIMP			0x40001083
+#define HV_X64_MSR_NESTED_EOM			0x40001084
+#define HV_X64_MSR_NESTED_SINT0			0x40001090
 
 /*
  * Synthetic Timer MSRs. Four timers per vcpu.
  */
-#define HV_REGISTER_STIMER0_CONFIG		0x400000B0
-#define HV_REGISTER_STIMER0_COUNT		0x400000B1
-#define HV_REGISTER_STIMER1_CONFIG		0x400000B2
-#define HV_REGISTER_STIMER1_COUNT		0x400000B3
-#define HV_REGISTER_STIMER2_CONFIG		0x400000B4
-#define HV_REGISTER_STIMER2_COUNT		0x400000B5
-#define HV_REGISTER_STIMER3_CONFIG		0x400000B6
-#define HV_REGISTER_STIMER3_COUNT		0x400000B7
+#define HV_X64_MSR_STIMER0_CONFIG		0x400000B0
+#define HV_X64_MSR_STIMER0_COUNT		0x400000B1
+#define HV_X64_MSR_STIMER1_CONFIG		0x400000B2
+#define HV_X64_MSR_STIMER1_COUNT		0x400000B3
+#define HV_X64_MSR_STIMER2_CONFIG		0x400000B4
+#define HV_X64_MSR_STIMER2_COUNT		0x400000B5
+#define HV_X64_MSR_STIMER3_CONFIG		0x400000B6
+#define HV_X64_MSR_STIMER3_COUNT		0x400000B7
 
 /* Hyper-V guest idle MSR */
 #define HV_X64_MSR_GUEST_IDLE			0x400000F0
 
 /* Hyper-V guest crash notification MSR's */
-#define HV_REGISTER_CRASH_P0			0x40000100
-#define HV_REGISTER_CRASH_P1			0x40000101
-#define HV_REGISTER_CRASH_P2			0x40000102
-#define HV_REGISTER_CRASH_P3			0x40000103
-#define HV_REGISTER_CRASH_P4			0x40000104
-#define HV_REGISTER_CRASH_CTL			0x40000105
+#define HV_X64_MSR_CRASH_P0			0x40000100
+#define HV_X64_MSR_CRASH_P1			0x40000101
+#define HV_X64_MSR_CRASH_P2			0x40000102
+#define HV_X64_MSR_CRASH_P3			0x40000103
+#define HV_X64_MSR_CRASH_P4			0x40000104
+#define HV_X64_MSR_CRASH_CTL			0x40000105
 
 /* TSC emulation after migration */
 #define HV_X64_MSR_REENLIGHTENMENT_CONTROL	0x40000106
@@ -276,31 +276,38 @@ enum hv_isolation_type {
 /* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */
 #define HV_EXPOSE_INVARIANT_TSC		BIT_ULL(0)
 
-/* Register name aliases for temporary compatibility */
-#define HV_X64_MSR_STIMER0_COUNT	HV_REGISTER_STIMER0_COUNT
-#define HV_X64_MSR_STIMER0_CONFIG	HV_REGISTER_STIMER0_CONFIG
-#define HV_X64_MSR_STIMER1_COUNT	HV_REGISTER_STIMER1_COUNT
-#define HV_X64_MSR_STIMER1_CONFIG	HV_REGISTER_STIMER1_CONFIG
-#define HV_X64_MSR_STIMER2_COUNT	HV_REGISTER_STIMER2_COUNT
-#define HV_X64_MSR_STIMER2_CONFIG	HV_REGISTER_STIMER2_CONFIG
-#define HV_X64_MSR_STIMER3_COUNT	HV_REGISTER_STIMER3_COUNT
-#define HV_X64_MSR_STIMER3_CONFIG	HV_REGISTER_STIMER3_CONFIG
-#define HV_X64_MSR_SCONTROL		HV_REGISTER_SCONTROL
-#define HV_X64_MSR_SVERSION		HV_REGISTER_SVERSION
-#define HV_X64_MSR_SIMP			HV_REGISTER_SIMP
-#define HV_X64_MSR_SIEFP		HV_REGISTER_SIEFP
-#define HV_X64_MSR_VP_INDEX		HV_REGISTER_VP_INDEX
-#define HV_X64_MSR_EOM			HV_REGISTER_EOM
-#define HV_X64_MSR_SINT0		HV_REGISTER_SINT0
-#define HV_X64_MSR_SINT15		HV_REGISTER_SINT15
-#define HV_X64_MSR_CRASH_P0		HV_REGISTER_CRASH_P0
-#define HV_X64_MSR_CRASH_P1		HV_REGISTER_CRASH_P1
-#define HV_X64_MSR_CRASH_P2		HV_REGISTER_CRASH_P2
-#define HV_X64_MSR_CRASH_P3		HV_REGISTER_CRASH_P3
-#define HV_X64_MSR_CRASH_P4		HV_REGISTER_CRASH_P4
-#define HV_X64_MSR_CRASH_CTL		HV_REGISTER_CRASH_CTL
-#define HV_X64_MSR_TIME_REF_COUNT	HV_REGISTER_TIME_REF_COUNT
-#define HV_X64_MSR_REFERENCE_TSC	HV_REGISTER_REFERENCE_TSC
+/*
+ * To support arch-generic code calling hv_set/get_register:
+ * - On x86, HV_MSR_ indicates an MSR accessed via rdmsrl/wrmsrl
+ * - On ARM, HV_MSR_ indicates a VP register accessed via hypercall
+ */
+#define HV_MSR_CRASH_P0		(HV_X64_MSR_CRASH_P0)
+#define HV_MSR_CRASH_P1		(HV_X64_MSR_CRASH_P1)
+#define HV_MSR_CRASH_P2		(HV_X64_MSR_CRASH_P2)
+#define HV_MSR_CRASH_P3		(HV_X64_MSR_CRASH_P3)
+#define HV_MSR_CRASH_P4		(HV_X64_MSR_CRASH_P4)
+#define HV_MSR_CRASH_CTL	(HV_X64_MSR_CRASH_CTL)
+
+#define HV_MSR_VP_INDEX		(HV_X64_MSR_VP_INDEX)
+#define HV_MSR_TIME_REF_COUNT	(HV_X64_MSR_TIME_REF_COUNT)
+#define HV_MSR_REFERENCE_TSC	(HV_X64_MSR_REFERENCE_TSC)
+
+#define HV_MSR_SINT0		(HV_X64_MSR_SINT0)
+#define HV_MSR_SVERSION		(HV_X64_MSR_SVERSION)
+#define HV_MSR_SCONTROL		(HV_X64_MSR_SCONTROL)
+#define HV_MSR_SIEFP		(HV_X64_MSR_SIEFP)
+#define HV_MSR_SIMP		(HV_X64_MSR_SIMP)
+#define HV_MSR_EOM		(HV_X64_MSR_EOM)
+
+#define HV_MSR_NESTED_SCONTROL	(HV_X64_MSR_NESTED_SCONTROL)
+#define HV_MSR_NESTED_SVERSION	(HV_X64_MSR_NESTED_SVERSION)
+#define HV_MSR_NESTED_SIEFP	(HV_X64_MSR_NESTED_SIEFP)
+#define HV_MSR_NESTED_SIMP	(HV_X64_MSR_NESTED_SIMP)
+#define HV_MSR_NESTED_EOM	(HV_X64_MSR_NESTED_EOM)
+#define HV_MSR_NESTED_SINT0	(HV_X64_MSR_NESTED_SINT0)
+
+#define HV_MSR_STIMER0_CONFIG	(HV_X64_MSR_STIMER0_CONFIG)
+#define HV_MSR_STIMER0_COUNT	(HV_X64_MSR_STIMER0_COUNT)
 
 /*
  * Registers are only accessible via HVCALL_GET_VP_REGISTERS hvcall and
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index b65e9c46b922..d0941f4c2724 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -127,6 +127,7 @@
 
 #define INTEL_FAM6_ARROWLAKE_H		0xC5
 #define INTEL_FAM6_ARROWLAKE		0xC6
+#define INTEL_FAM6_ARROWLAKE_U		0xB5
 
 #define INTEL_FAM6_LUNARLAKE_M		0xBD
 
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index ce4ce8720d55..390c4d13956d 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -293,24 +293,24 @@ static inline void hv_ivm_msr_write(u64 msr, u64 value) {}
 static inline void hv_ivm_msr_read(u64 msr, u64 *value) {}
 #endif
 
-static inline bool hv_is_synic_reg(unsigned int reg)
+static inline bool hv_is_synic_msr(unsigned int reg)
 {
-	return (reg >= HV_REGISTER_SCONTROL) &&
-	       (reg <= HV_REGISTER_SINT15);
+	return (reg >= HV_X64_MSR_SCONTROL) &&
+	       (reg <= HV_X64_MSR_SINT15);
 }
 
-static inline bool hv_is_sint_reg(unsigned int reg)
+static inline bool hv_is_sint_msr(unsigned int reg)
 {
-	return (reg >= HV_REGISTER_SINT0) &&
-	       (reg <= HV_REGISTER_SINT15);
+	return (reg >= HV_X64_MSR_SINT0) &&
+	       (reg <= HV_X64_MSR_SINT15);
 }
 
-u64 hv_get_register(unsigned int reg);
-void hv_set_register(unsigned int reg, u64 value);
-u64 hv_get_non_nested_register(unsigned int reg);
-void hv_set_non_nested_register(unsigned int reg, u64 value);
+u64 hv_get_msr(unsigned int reg);
+void hv_set_msr(unsigned int reg, u64 value);
+u64 hv_get_non_nested_msr(unsigned int reg);
+void hv_set_non_nested_msr(unsigned int reg, u64 value);
 
-static __always_inline u64 hv_raw_get_register(unsigned int reg)
+static __always_inline u64 hv_raw_get_msr(unsigned int reg)
 {
 	return __rdmsr(reg);
 }
@@ -331,10 +331,10 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
 {
 	return -1;
 }
-static inline void hv_set_register(unsigned int reg, u64 value) { }
-static inline u64 hv_get_register(unsigned int reg) { return 0; }
-static inline void hv_set_non_nested_register(unsigned int reg, u64 value) { }
-static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; }
+static inline void hv_set_msr(unsigned int reg, u64 value) { }
+static inline u64 hv_get_msr(unsigned int reg) { return 0; }
+static inline void hv_set_non_nested_msr(unsigned int reg, u64 value) { }
+static inline u64 hv_get_non_nested_msr(unsigned int reg) { return 0; }
 #endif /* CONFIG_HYPERV */
 
 
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index a800abb1a992..d8416b3bf832 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -12,11 +12,6 @@
 
 /* image of the saved processor state */
 struct saved_context {
-	/*
-	 * On x86_32, all segment registers except gs are saved at kernel
-	 * entry in pt_regs.
-	 */
-	u16 gs;
 	unsigned long cr0, cr2, cr3, cr4;
 	u64 misc_enable;
 	struct saved_msrs saved_msrs;
@@ -27,6 +22,11 @@ struct saved_context {
 	unsigned long tr;
 	unsigned long safety;
 	unsigned long return_address;
+	/*
+	 * On x86_32, all segment registers except gs are saved at kernel
+	 * entry in pt_regs.
+	 */
+	u16 gs;
 	bool misc_enable_saved;
 } __attribute__((packed));
 
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index a9088250770f..64fbd2dbc5b7 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -62,6 +62,11 @@ void xen_arch_unregister_cpu(int num);
 #ifdef CONFIG_PVH
 void __init xen_pvh_init(struct boot_params *boot_params);
 void __init mem_map_via_hcall(struct boot_params *boot_params_p);
+#ifdef CONFIG_XEN_PVH
+void __init xen_reserve_extra_memory(struct boot_params *bootp);
+#else
+static inline void xen_reserve_extra_memory(struct boot_params *bootp) { }
+#endif
 #endif
 
 /* Lazy mode for batching updates / context switch */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ba8cf5e9ce56..5c1e6d6be267 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2307,6 +2307,8 @@ void arch_smt_update(void)
 
 void __init arch_cpu_finalize_init(void)
 {
+	struct cpuinfo_x86 *c = this_cpu_ptr(&cpu_info);
+
 	identify_boot_cpu();
 
 	select_idle_routine();
@@ -2345,6 +2347,13 @@ void __init arch_cpu_finalize_init(void)
 	fpu__init_system();
 	fpu__init_cpu();
 
+	/*
+	 * Ensure that access to the per CPU representation has the initial
+	 * boot CPU configuration.
+	 */
+	*c = boot_cpu_data;
+	c->initialized = true;
+
 	alternative_instructions();
 
 	if (IS_ENABLED(CONFIG_X86_64)) {
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 303fef824167..e0fd57a8ba84 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -45,70 +45,70 @@ bool hyperv_paravisor_present __ro_after_init;
 EXPORT_SYMBOL_GPL(hyperv_paravisor_present);
 
 #if IS_ENABLED(CONFIG_HYPERV)
-static inline unsigned int hv_get_nested_reg(unsigned int reg)
+static inline unsigned int hv_get_nested_msr(unsigned int reg)
 {
-	if (hv_is_sint_reg(reg))
-		return reg - HV_REGISTER_SINT0 + HV_REGISTER_NESTED_SINT0;
+	if (hv_is_sint_msr(reg))
+		return reg - HV_X64_MSR_SINT0 + HV_X64_MSR_NESTED_SINT0;
 
 	switch (reg) {
-	case HV_REGISTER_SIMP:
-		return HV_REGISTER_NESTED_SIMP;
-	case HV_REGISTER_SIEFP:
-		return HV_REGISTER_NESTED_SIEFP;
-	case HV_REGISTER_SVERSION:
-		return HV_REGISTER_NESTED_SVERSION;
-	case HV_REGISTER_SCONTROL:
-		return HV_REGISTER_NESTED_SCONTROL;
-	case HV_REGISTER_EOM:
-		return HV_REGISTER_NESTED_EOM;
+	case HV_X64_MSR_SIMP:
+		return HV_X64_MSR_NESTED_SIMP;
+	case HV_X64_MSR_SIEFP:
+		return HV_X64_MSR_NESTED_SIEFP;
+	case HV_X64_MSR_SVERSION:
+		return HV_X64_MSR_NESTED_SVERSION;
+	case HV_X64_MSR_SCONTROL:
+		return HV_X64_MSR_NESTED_SCONTROL;
+	case HV_X64_MSR_EOM:
+		return HV_X64_MSR_NESTED_EOM;
 	default:
 		return reg;
 	}
 }
 
-u64 hv_get_non_nested_register(unsigned int reg)
+u64 hv_get_non_nested_msr(unsigned int reg)
 {
 	u64 value;
 
-	if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present)
+	if (hv_is_synic_msr(reg) && ms_hyperv.paravisor_present)
 		hv_ivm_msr_read(reg, &value);
 	else
 		rdmsrl(reg, value);
 	return value;
 }
-EXPORT_SYMBOL_GPL(hv_get_non_nested_register);
+EXPORT_SYMBOL_GPL(hv_get_non_nested_msr);
 
-void hv_set_non_nested_register(unsigned int reg, u64 value)
+void hv_set_non_nested_msr(unsigned int reg, u64 value)
 {
-	if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present) {
+	if (hv_is_synic_msr(reg) && ms_hyperv.paravisor_present) {
 		hv_ivm_msr_write(reg, value);
 
 		/* Write proxy bit via wrmsl instruction */
-		if (hv_is_sint_reg(reg))
+		if (hv_is_sint_msr(reg))
 			wrmsrl(reg, value | 1 << 20);
 	} else {
 		wrmsrl(reg, value);
 	}
 }
-EXPORT_SYMBOL_GPL(hv_set_non_nested_register);
+EXPORT_SYMBOL_GPL(hv_set_non_nested_msr);
 
-u64 hv_get_register(unsigned int reg)
+u64 hv_get_msr(unsigned int reg)
 {
 	if (hv_nested)
-		reg = hv_get_nested_reg(reg);
+		reg = hv_get_nested_msr(reg);
 
-	return hv_get_non_nested_register(reg);
+	return hv_get_non_nested_msr(reg);
 }
-EXPORT_SYMBOL_GPL(hv_get_register);
+EXPORT_SYMBOL_GPL(hv_get_msr);
 
-void hv_set_register(unsigned int reg, u64 value)
+void hv_set_msr(unsigned int reg, u64 value)
 {
 	if (hv_nested)
-		reg = hv_get_nested_reg(reg);
+		reg = hv_get_nested_msr(reg);
 
-	hv_set_non_nested_register(reg, value);
+	hv_set_non_nested_msr(reg, value);
 }
-EXPORT_SYMBOL_GPL(hv_set_register);
+EXPORT_SYMBOL_GPL(hv_set_msr);
 
 static void (*vmbus_handler)(void);
 static void (*hv_stimer0_handler)(void);
@@ -352,13 +352,24 @@ static void __init reduced_hw_init(void)
 	x86_init.irqs.pre_vector_init	= x86_init_noop;
 }
 
+int hv_get_hypervisor_version(union hv_hypervisor_version_info *info)
+{
+	unsigned int hv_max_functions;
+
+	hv_max_functions = cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS);
+	if (hv_max_functions < HYPERV_CPUID_VERSION) {
+		pr_err("%s: Could not detect Hyper-V version\n", __func__);
+		return -ENODEV;
+	}
+
+	cpuid(HYPERV_CPUID_VERSION, &info->eax, &info->ebx, &info->ecx, &info->edx);
+
+	return 0;
+}
+
 static void __init ms_hyperv_init_platform(void)
 {
 	int hv_max_functions_eax;
-	int hv_host_info_eax;
-	int hv_host_info_ebx;
-	int hv_host_info_ecx;
-	int hv_host_info_edx;
 
 #ifdef CONFIG_PARAVIRT
 	pv_info.name = "Hyper-V";
@@ -409,21 +420,6 @@ static void __init ms_hyperv_init_platform(void)
 		pr_info("Hyper-V: running on a nested hypervisor\n");
 	}
 
-	/*
-	 * Extract host information.
-	 */
-	if (hv_max_functions_eax >= HYPERV_CPUID_VERSION) {
-		hv_host_info_eax = cpuid_eax(HYPERV_CPUID_VERSION);
-		hv_host_info_ebx = cpuid_ebx(HYPERV_CPUID_VERSION);
-		hv_host_info_ecx = cpuid_ecx(HYPERV_CPUID_VERSION);
-		hv_host_info_edx = cpuid_edx(HYPERV_CPUID_VERSION);
-
-		pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n",
-			hv_host_info_ebx >> 16, hv_host_info_ebx & 0xFFFF,
-			hv_host_info_eax, hv_host_info_edx & 0xFFFFFF,
-			hv_host_info_ecx, hv_host_info_edx >> 24);
-	}
-
 	if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS &&
 	    ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
 		x86_platform.calibrate_tsc = hv_get_tsc_khz;
@@ -456,7 +452,7 @@ static void __init ms_hyperv_init_platform(void)
 				/* To be supported: more work is required.  */
 				ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE;
 
-				/* HV_REGISTER_CRASH_CTL is unsupported. */
+				/* HV_MSR_CRASH_CTL is unsupported. */
 				ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
 
 				/* Don't trust Hyper-V's TLB-flushing hypercalls. */
@@ -648,6 +644,7 @@ const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
 	.init.x2apic_available	= ms_hyperv_x2apic_available,
 	.init.msi_ext_dest_id	= ms_hyperv_msi_ext_dest_id,
 	.init.init_platform	= ms_hyperv_init_platform,
+	.init.guest_late_init	= ms_hyperv_late_init,
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 	.runtime.sev_es_hcall_prepare = hv_sev_es_hcall_prepare,
 	.runtime.sev_es_hcall_finish = hv_sev_es_hcall_finish,
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 3259b1d4fefe..aaca8d235dc2 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -415,6 +415,17 @@ void __init topology_init_possible_cpus(void)
 	unsigned int total = assigned + disabled;
 	u32 apicid, firstid;
 
+	/*
+	 * If there was no APIC registered, then fake one so that the
+	 * topology bitmap is populated. That ensures that the code below
+	 * is valid and the various query interfaces can be used
+	 * unconditionally. This does not affect the actual APIC code in
+	 * any way because either the local APIC address has not been
+	 * registered or the local APIC was disabled on the command line.
+	 */
+	if (topo_info.boot_cpu_apic_id == BAD_APICID)
+		topology_register_boot_apic(0);
+
 	if (!restrict_to_up()) {
 		if (WARN_ON_ONCE(assigned > nr_cpu_ids)) {
 			disabled += assigned - nr_cpu_ids;
diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c
index a50ae8d63d1c..9a6069e7133c 100644
--- a/arch/x86/kernel/cpu/topology_common.c
+++ b/arch/x86/kernel/cpu/topology_common.c
@@ -140,7 +140,7 @@ static void parse_topology(struct topo_scan *tscan, bool early)
 	}
 }
 
-static void topo_set_ids(struct topo_scan *tscan)
+static void topo_set_ids(struct topo_scan *tscan, bool early)
 {
 	struct cpuinfo_x86 *c = tscan->c;
 	u32 apicid = c->topo.apicid;
@@ -148,8 +148,10 @@ static void topo_set_ids(struct topo_scan *tscan)
 	c->topo.pkg_id = topo_shift_apicid(apicid, TOPO_PKG_DOMAIN);
 	c->topo.die_id = topo_shift_apicid(apicid, TOPO_DIE_DOMAIN);
 
-	c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN);
-	c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN);
+	if (!early) {
+		c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN);
+		c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN);
+	}
 
 	/* Package relative core ID */
 	c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >>
@@ -187,7 +189,7 @@ void cpu_parse_topology(struct cpuinfo_x86 *c)
 		       tscan.dom_shifts[dom], x86_topo_system.dom_shifts[dom]);
 	}
 
-	topo_set_ids(&tscan);
+	topo_set_ids(&tscan, false);
 }
 
 void __init cpu_init_topology(struct cpuinfo_x86 *c)
@@ -208,7 +210,7 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c)
 		x86_topo_system.dom_size[dom] = 1U << sft;
 	}
 
-	topo_set_ids(&tscan);
+	topo_set_ids(&tscan, true);
 
 	/*
 	 * AMD systems have Nodes per package which cannot be mapped to
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index b66f540de054..6f1b379e3b38 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1016,17 +1016,6 @@ void __init e820__reserve_setup_data(void)
 
 		e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
 
-		/*
-		 * SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by
-		 * kexec and do not need to be reserved.
-		 */
-		if (data->type != SETUP_EFI &&
-		    data->type != SETUP_IMA &&
-		    data->type != SETUP_RNG_SEED)
-			e820__range_update_kexec(pa_data,
-						 sizeof(*data) + data->len,
-						 E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
-
 		if (data->type == SETUP_INDIRECT) {
 			len += data->len;
 			early_memunmap(data, sizeof(*data));
@@ -1038,12 +1027,9 @@ void __init e820__reserve_setup_data(void)
 
 			indirect = (struct setup_indirect *)data->data;
 
-			if (indirect->type != SETUP_INDIRECT) {
+			if (indirect->type != SETUP_INDIRECT)
 				e820__range_update(indirect->addr, indirect->len,
 						   E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
-				e820__range_update_kexec(indirect->addr, indirect->len,
-							 E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
-			}
 		}
 
 		pa_data = pa_next;
@@ -1051,7 +1037,6 @@ void __init e820__reserve_setup_data(void)
 	}
 
 	e820__update_table(e820_table);
-	e820__update_table(e820_table_kexec);
 
 	pr_info("extended physical RAM map:\n");
 	e820__print_table("reserve setup_data");
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 117e74c44e75..33a214b1a4ce 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -178,10 +178,11 @@ void fpu__init_cpu_xstate(void)
 	 * Must happen after CR4 setup and before xsetbv() to allow KVM
 	 * lazy passthrough.  Write independent of the dynamic state static
 	 * key as that does not work on the boot CPU. This also ensures
-	 * that any stale state is wiped out from XFD.
+	 * that any stale state is wiped out from XFD. Reset the per CPU
+	 * xfd cache too.
 	 */
 	if (cpu_feature_enabled(X86_FEATURE_XFD))
-		wrmsrl(MSR_IA32_XFD, init_fpstate.xfd);
+		xfd_set_state(init_fpstate.xfd);
 
 	/*
 	 * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 3518fb26d06b..19ca623ffa2a 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -148,20 +148,26 @@ static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rs
 #endif
 
 #ifdef CONFIG_X86_64
+static inline void xfd_set_state(u64 xfd)
+{
+	wrmsrl(MSR_IA32_XFD, xfd);
+	__this_cpu_write(xfd_state, xfd);
+}
+
 static inline void xfd_update_state(struct fpstate *fpstate)
 {
 	if (fpu_state_size_dynamic()) {
 		u64 xfd = fpstate->xfd;
 
-		if (__this_cpu_read(xfd_state) != xfd) {
-			wrmsrl(MSR_IA32_XFD, xfd);
-			__this_cpu_write(xfd_state, xfd);
-		}
+		if (__this_cpu_read(xfd_state) != xfd)
+			xfd_set_state(xfd);
 	}
 }
 
 extern int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu);
 #else
+static inline void xfd_set_state(u64 xfd) { }
+
 static inline void xfd_update_state(struct fpstate *fpstate) { }
 
 static inline int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu) {
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 212e8e06aeba..a817ed0724d1 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -81,6 +81,13 @@ static inline bool check_la57_support(void)
 	if (!(native_read_cr4() & X86_CR4_LA57))
 		return false;
 
+	RIP_REL_REF(__pgtable_l5_enabled)	= 1;
+	RIP_REL_REF(pgdir_shift)		= 48;
+	RIP_REL_REF(ptrs_per_p4d)		= 512;
+	RIP_REL_REF(page_offset_base)		= __PAGE_OFFSET_BASE_L5;
+	RIP_REL_REF(vmalloc_base)		= __VMALLOC_BASE_L5;
+	RIP_REL_REF(vmemmap_base)		= __VMEMMAP_BASE_L5;
+
 	return true;
 }
 
@@ -175,7 +182,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
 		p4d = (p4dval_t *)&RIP_REL_REF(level4_kernel_pgt);
 		p4d[MAX_PTRS_PER_P4D - 1] += load_delta;
 
-		pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE_NOENC;
+		pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE;
 	}
 
 	RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 2].pud += load_delta;
@@ -431,15 +438,6 @@ asmlinkage __visible void __init __noreturn x86_64_start_kernel(char * real_mode
 				(__START_KERNEL & PGDIR_MASK)));
 	BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
 
-	if (check_la57_support()) {
-		__pgtable_l5_enabled	= 1;
-		pgdir_shift		= 48;
-		ptrs_per_p4d		= 512;
-		page_offset_base	= __PAGE_OFFSET_BASE_L5;
-		vmalloc_base		= __VMALLOC_BASE_L5;
-		vmemmap_base		= __VMEMMAP_BASE_L5;
-	}
-
 	cr4_init_shadow();
 
 	/* Kill off the identity-map trampoline */
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 091b3ab76a18..d0e49bd7c6f3 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -373,7 +373,16 @@ out:
 kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset,
 					 bool *on_func_entry)
 {
-	if (is_endbr(*(u32 *)addr)) {
+	u32 insn;
+
+	/*
+	 * Since 'addr' is not guaranteed to be safe to access, use
+	 * copy_from_kernel_nofault() to read the instruction:
+	 */
+	if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(u32)))
+		return NULL;
+
+	if (is_endbr(insn)) {
 		*on_func_entry = !offset || offset == 4;
 		if (*on_func_entry)
 			offset = 4;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 1ccd30c8246f..e89171b0347a 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -197,12 +197,12 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
 	if (!smp_check_mpc(mpc, oem, str))
 		return 0;
 
-	/* Initialize the lapic mapping */
-	if (!acpi_lapic)
-		register_lapic_address(mpc->lapic);
-
-	if (early)
+	if (early) {
+		/* Initialize the lapic mapping */
+		if (!acpi_lapic)
+			register_lapic_address(mpc->lapic);
 		return 1;
+	}
 
 	/* Now process the configuration blocks. */
 	while (count < mpc->length) {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 3e1e96efadfe..ef206500ed6f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1206,16 +1206,6 @@ void __init i386_reserve_resources(void)
 
 #endif /* CONFIG_X86_32 */
 
-#ifndef CONFIG_SMP
-void __init smp_prepare_boot_cpu(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	*c = boot_cpu_data;
-	c->initialized = true;
-}
-#endif
-
 static struct notifier_block kernel_offset_notifier = {
 	.notifier_call = dump_kernel_offset
 };
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fe355c89f6c1..76bb65045c64 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -313,14 +313,6 @@ static void notrace start_secondary(void *unused)
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
-static void __init smp_store_boot_cpu_info(void)
-{
-	struct cpuinfo_x86 *c = &cpu_data(0);
-
-	*c = boot_cpu_data;
-	c->initialized = true;
-}
-
 /*
  * The bootstrap kernel entry code has set these up. Save them for
  * a given CPU
@@ -1039,29 +1031,15 @@ static __init void disable_smp(void)
 	cpumask_set_cpu(0, topology_die_cpumask(0));
 }
 
-static void __init smp_cpu_index_default(void)
-{
-	int i;
-	struct cpuinfo_x86 *c;
-
-	for_each_possible_cpu(i) {
-		c = &cpu_data(i);
-		/* mark all to hotplug */
-		c->cpu_index = nr_cpu_ids;
-	}
-}
-
 void __init smp_prepare_cpus_common(void)
 {
 	unsigned int i;
 
-	smp_cpu_index_default();
-
-	/*
-	 * Setup boot CPU information
-	 */
-	smp_store_boot_cpu_info(); /* Final full version of the data */
-	mb();
+	/* Mark all except the boot CPU as hotpluggable */
+	for_each_possible_cpu(i) {
+		if (i)
+			per_cpu(cpu_info.cpu_index, i) = nr_cpu_ids;
+	}
 
 	for_each_possible_cpu(i) {
 		zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 8c3032a96caf..3aaf7e86a859 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -118,8 +118,8 @@ config KVM_AMD
 	  will be called kvm-amd.
 
 config KVM_AMD_SEV
-	def_bool y
 	bool "AMD Secure Encrypted Virtualization (SEV) support"
+	default y
 	depends on KVM_AMD && X86_64
 	depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
 	help
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 475b5fa917a6..a88bb14266b6 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -4,7 +4,8 @@ ccflags-y += -I $(srctree)/arch/x86/kvm
 ccflags-$(CONFIG_KVM_WERROR) += -Werror
 
 ifeq ($(CONFIG_FRAME_POINTER),y)
-OBJECT_FILES_NON_STANDARD_vmenter.o := y
+OBJECT_FILES_NON_STANDARD_vmx/vmenter.o := y
+OBJECT_FILES_NON_STANDARD_svm/vmenter.o := y
 endif
 
 include $(srctree)/virt/kvm/Makefile.kvm
diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c
index 944e0290f2c0..8c2d4b8de25d 100644
--- a/arch/x86/platform/pvh/enlighten.c
+++ b/arch/x86/platform/pvh/enlighten.c
@@ -75,6 +75,9 @@ static void __init init_pvh_bootparams(bool xen_guest)
 	} else
 		xen_raw_printk("Warning: Can fit ISA range into e820\n");
 
+	if (xen_guest)
+		xen_reserve_extra_memory(&pvh_bootparams);
+
 	pvh_bootparams.hdr.cmd_line_ptr =
 		pvh_start_info.cmdline_paddr;
 
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index a65fc2ae15b4..77e788e928cd 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -81,7 +81,6 @@ config XEN_PVH
 	bool "Xen PVH guest support"
 	depends on XEN && XEN_PVHVM && ACPI
 	select PVH
-	def_bool n
 	help
 	  Support for running as a Xen PVH guest.
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 3c61bb98c10e..a01ca255b0c6 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -6,6 +6,7 @@
 #include <linux/console.h>
 #include <linux/cpu.h>
 #include <linux/kexec.h>
+#include <linux/memblock.h>
 #include <linux/slab.h>
 #include <linux/panic_notifier.h>
 
@@ -350,3 +351,34 @@ void xen_arch_unregister_cpu(int num)
 }
 EXPORT_SYMBOL(xen_arch_unregister_cpu);
 #endif
+
+/* Amount of extra memory space we add to the e820 ranges */
+struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
+
+void __init xen_add_extra_mem(unsigned long start_pfn, unsigned long n_pfns)
+{
+	unsigned int i;
+
+	/*
+	 * No need to check for zero size, should happen rarely and will only
+	 * write a new entry regarded to be unused due to zero size.
+	 */
+	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+		/* Add new region. */
+		if (xen_extra_mem[i].n_pfns == 0) {
+			xen_extra_mem[i].start_pfn = start_pfn;
+			xen_extra_mem[i].n_pfns = n_pfns;
+			break;
+		}
+		/* Append to existing region. */
+		if (xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns ==
+		    start_pfn) {
+			xen_extra_mem[i].n_pfns += n_pfns;
+			break;
+		}
+	}
+	if (i == XEN_EXTRA_MEM_MAX_REGIONS)
+		printk(KERN_WARNING "Warning: not enough extra memory regions\n");
+
+	memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
+}
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index 9e9db601bd52..27a2a02ef8fb 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/acpi.h>
 #include <linux/export.h>
+#include <linux/mm.h>
 
 #include <xen/hvc-console.h>
 
@@ -73,3 +74,70 @@ void __init mem_map_via_hcall(struct boot_params *boot_params_p)
 	}
 	boot_params_p->e820_entries = memmap.nr_entries;
 }
+
+/*
+ * Reserve e820 UNUSABLE regions to inflate the memory balloon.
+ *
+ * On PVH dom0 the host memory map is used, RAM regions available to dom0 are
+ * located as the same place as in the native memory map, but since dom0 gets
+ * less memory than the total amount of host RAM the ranges that can't be
+ * populated are converted from RAM -> UNUSABLE.  Use such regions (up to the
+ * ratio signaled in EXTRA_MEM_RATIO) in order to inflate the balloon driver at
+ * boot.  Doing so prevents the guest (even if just temporary) from using holes
+ * in the memory map in order to map grants or foreign addresses, and
+ * hopefully limits the risk of a clash with a device MMIO region.  Ideally the
+ * hypervisor should notify us which memory ranges are suitable for creating
+ * foreign mappings, but that's not yet implemented.
+ */
+void __init xen_reserve_extra_memory(struct boot_params *bootp)
+{
+	unsigned int i, ram_pages = 0, extra_pages;
+
+	for (i = 0; i < bootp->e820_entries; i++) {
+		struct boot_e820_entry *e = &bootp->e820_table[i];
+
+		if (e->type != E820_TYPE_RAM)
+			continue;
+		ram_pages += PFN_DOWN(e->addr + e->size) - PFN_UP(e->addr);
+	}
+
+	/* Max amount of extra memory. */
+	extra_pages = EXTRA_MEM_RATIO * ram_pages;
+
+	/*
+	 * Convert UNUSABLE ranges to RAM and reserve them for foreign mapping
+	 * purposes.
+	 */
+	for (i = 0; i < bootp->e820_entries && extra_pages; i++) {
+		struct boot_e820_entry *e = &bootp->e820_table[i];
+		unsigned long pages;
+
+		if (e->type != E820_TYPE_UNUSABLE)
+			continue;
+
+		pages = min(extra_pages,
+			PFN_DOWN(e->addr + e->size) - PFN_UP(e->addr));
+
+		if (pages != (PFN_DOWN(e->addr + e->size) - PFN_UP(e->addr))) {
+			struct boot_e820_entry *next;
+
+			if (bootp->e820_entries ==
+			    ARRAY_SIZE(bootp->e820_table))
+				/* No space left to split - skip region. */
+				continue;
+
+			/* Split entry. */
+			next = e + 1;
+			memmove(next, e,
+				(bootp->e820_entries - i) * sizeof(*e));
+			bootp->e820_entries++;
+			next->addr = PAGE_ALIGN(e->addr) + PFN_PHYS(pages);
+			e->size = next->addr - e->addr;
+			next->size -= e->size;
+		}
+		e->type = E820_TYPE_RAM;
+		extra_pages -= pages;
+
+		xen_add_extra_mem(PFN_UP(e->addr), pages);
+	}
+}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index b3e37961065a..380591028cb8 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -38,9 +38,6 @@
 
 #define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024)
 
-/* Amount of extra memory space we add to the e820 ranges */
-struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
-
 /* Number of pages released from the initial allocation. */
 unsigned long xen_released_pages;
 
@@ -64,18 +61,6 @@ static struct {
 } xen_remap_buf __initdata __aligned(PAGE_SIZE);
 static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
 
-/*
- * The maximum amount of extra memory compared to the base size.  The
- * main scaling factor is the size of struct page.  At extreme ratios
- * of base:extra, all the base memory can be filled with page
- * structures for the extra memory, leaving no space for anything
- * else.
- *
- * 10x seems like a reasonable balance between scaling flexibility and
- * leaving a practically usable system.
- */
-#define EXTRA_MEM_RATIO		(10)
-
 static bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB);
 
 static void __init xen_parse_512gb(void)
@@ -96,35 +81,6 @@ static void __init xen_parse_512gb(void)
 	xen_512gb_limit = val;
 }
 
-static void __init xen_add_extra_mem(unsigned long start_pfn,
-				     unsigned long n_pfns)
-{
-	int i;
-
-	/*
-	 * No need to check for zero size, should happen rarely and will only
-	 * write a new entry regarded to be unused due to zero size.
-	 */
-	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
-		/* Add new region. */
-		if (xen_extra_mem[i].n_pfns == 0) {
-			xen_extra_mem[i].start_pfn = start_pfn;
-			xen_extra_mem[i].n_pfns = n_pfns;
-			break;
-		}
-		/* Append to existing region. */
-		if (xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns ==
-		    start_pfn) {
-			xen_extra_mem[i].n_pfns += n_pfns;
-			break;
-		}
-	}
-	if (i == XEN_EXTRA_MEM_MAX_REGIONS)
-		printk(KERN_WARNING "Warning: not enough extra memory regions\n");
-
-	memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
-}
-
 static void __init xen_del_extra_mem(unsigned long start_pfn,
 				     unsigned long n_pfns)
 {
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index a87ab36889e7..79cf93f2c92f 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -163,4 +163,18 @@ void xen_hvm_post_suspend(int suspend_cancelled);
 static inline void xen_hvm_post_suspend(int suspend_cancelled) {}
 #endif
 
+/*
+ * The maximum amount of extra memory compared to the base size.  The
+ * main scaling factor is the size of struct page.  At extreme ratios
+ * of base:extra, all the base memory can be filled with page
+ * structures for the extra memory, leaving no space for anything
+ * else.
+ *
+ * 10x seems like a reasonable balance between scaling flexibility and
+ * leaving a practically usable system.
+ */
+#define EXTRA_MEM_RATIO		(10)
+
+void xen_add_extra_mem(unsigned long start_pfn, unsigned long n_pfns);
+
 #endif /* XEN_OPS_H */
author	Paolo Bonzini <pbonzini@redhat.com>	2024-04-02 12:26:15 -0400
committer	Paolo Bonzini <pbonzini@redhat.com>	2024-04-02 12:26:15 -0400
commit	52b761b48f8e23399fafe3834a173c990357b8de (patch)
tree	06f7fe191b277dde269ecb11ff3e089da6f2aff9 /arch
parent	0d1756482e66f326eb65fe08eed24ce2efabb168 (diff)
parent	d96c66ab9fb3ad8b243669cf6b41e68d0f7f9ecd (diff)