diff options
Diffstat (limited to 'arch/arm64')
119 files changed, 1808 insertions, 1576 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0d921d21fd35..de65d2fa0657 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -11,6 +11,7 @@ config ARM64 select ACPI_PPTT if ACPI select ARCH_HAS_DEBUG_WX select ARCH_BINFMT_ELF_STATE + select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE @@ -133,7 +134,6 @@ config ARM64 select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY select GENERIC_VDSO_TIME_NS - select HANDLE_DOMAIN_IRQ select HARDIRQS_SW_RESEND select HAVE_MOVE_PMD select HAVE_MOVE_PUD @@ -154,7 +154,6 @@ config ARM64 select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT - select HAVE_ARCH_PFN_VALID select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP_FILTER @@ -193,6 +192,7 @@ config ARM64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUTEX_CMPXCHG if FUTEX select MMU_GATHER_RCU_TABLE_FREE @@ -667,6 +667,121 @@ config ARM64_ERRATUM_1508412 If unsure, say Y. +config ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + bool + +config ARM64_ERRATUM_2119858 + bool "Cortex-A710: 2119858: workaround TRBE overwriting trace data in FILL mode" + default y + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + depends on CORESIGHT_TRBE + select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + help + This option adds the workaround for ARM Cortex-A710 erratum 2119858. + + Affected Cortex-A710 cores could overwrite up to 3 cache lines of trace + data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in + the event of a WRAP event. + + Work around the issue by always making sure we move the TRBPTR_EL1 by + 256 bytes before enabling the buffer and filling the first 256 bytes of + the buffer with ETM ignore packets upon disabling. + + If unsure, say Y. + +config ARM64_ERRATUM_2139208 + bool "Neoverse-N2: 2139208: workaround TRBE overwriting trace data in FILL mode" + default y + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + depends on CORESIGHT_TRBE + select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + help + This option adds the workaround for ARM Neoverse-N2 erratum 2139208. + + Affected Neoverse-N2 cores could overwrite up to 3 cache lines of trace + data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in + the event of a WRAP event. + + Work around the issue by always making sure we move the TRBPTR_EL1 by + 256 bytes before enabling the buffer and filling the first 256 bytes of + the buffer with ETM ignore packets upon disabling. + + If unsure, say Y. + +config ARM64_WORKAROUND_TSB_FLUSH_FAILURE + bool + +config ARM64_ERRATUM_2054223 + bool "Cortex-A710: 2054223: workaround TSB instruction failing to flush trace" + default y + select ARM64_WORKAROUND_TSB_FLUSH_FAILURE + help + Enable workaround for ARM Cortex-A710 erratum 2054223 + + Affected cores may fail to flush the trace data on a TSB instruction, when + the PE is in trace prohibited state. This will cause losing a few bytes + of the trace cached. + + Workaround is to issue two TSB consecutively on affected cores. + + If unsure, say Y. + +config ARM64_ERRATUM_2067961 + bool "Neoverse-N2: 2067961: workaround TSB instruction failing to flush trace" + default y + select ARM64_WORKAROUND_TSB_FLUSH_FAILURE + help + Enable workaround for ARM Neoverse-N2 erratum 2067961 + + Affected cores may fail to flush the trace data on a TSB instruction, when + the PE is in trace prohibited state. This will cause losing a few bytes + of the trace cached. + + Workaround is to issue two TSB consecutively on affected cores. + + If unsure, say Y. + +config ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE + bool + +config ARM64_ERRATUM_2253138 + bool "Neoverse-N2: 2253138: workaround TRBE writing to address out-of-range" + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + depends on CORESIGHT_TRBE + default y + select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE + help + This option adds the workaround for ARM Neoverse-N2 erratum 2253138. + + Affected Neoverse-N2 cores might write to an out-of-range address, not reserved + for TRBE. Under some conditions, the TRBE might generate a write to the next + virtually addressed page following the last page of the TRBE address space + (i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base. + + Work around this in the driver by always making sure that there is a + page beyond the TRBLIMITR_EL1.LIMIT, within the space allowed for the TRBE. + + If unsure, say Y. + +config ARM64_ERRATUM_2224489 + bool "Cortex-A710: 2224489: workaround TRBE writing to address out-of-range" + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + depends on CORESIGHT_TRBE + default y + select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE + help + This option adds the workaround for ARM Cortex-A710 erratum 2224489. + + Affected Cortex-A710 cores might write to an out-of-range address, not reserved + for TRBE. Under some conditions, the TRBE might generate a write to the next + virtually addressed page following the last page of the TRBE address space + (i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base. + + Work around this in the driver by always making sure that there is a + page beyond the TRBLIMITR_EL1.LIMIT, within the space allowed for the TRBE. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y @@ -990,6 +1105,15 @@ config SCHED_MC making when dealing with multi-core CPU chips at a cost of slightly increased overhead in some places. If unsure say N here. +config SCHED_CLUSTER + bool "Cluster scheduler support" + help + Cluster scheduler support improves the CPU scheduler's decision + making when dealing with machines that have clusters of CPUs. + Cluster usually means a couple of CPUs which are placed closely + by sharing mid-level caches, last-level cache tags or internal + busses. + config SCHED_SMT bool "SMT scheduler support" help @@ -1136,7 +1260,7 @@ config CRASH_DUMP config TRANS_TABLE def_bool y - depends on HIBERNATION + depends on HIBERNATION || KEXEC_CORE config XEN_DOM0 def_bool y @@ -1265,7 +1389,8 @@ config KUSER_HELPERS config COMPAT_VDSO bool "Enable vDSO for 32-bit applications" - depends on !CPU_BIG_ENDIAN && "$(CROSS_COMPILE_COMPAT)" != "" + depends on !CPU_BIG_ENDIAN + depends on (CC_IS_CLANG && LD_IS_LLD) || "$(CROSS_COMPILE_COMPAT)" != "" select GENERIC_COMPAT_VDSO default y help @@ -1932,8 +2057,6 @@ source "drivers/cpufreq/Kconfig" endmenu -source "drivers/firmware/Kconfig" - source "drivers/acpi/Kconfig" source "arch/arm64/kvm/Kconfig" diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index b0ce18d4cc98..96a81967c3bf 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -44,7 +44,6 @@ config ARCH_BCM2835 select ARM_AMBA select ARM_GIC select ARM_TIMER_SP804 - select BRCMSTB_L2_IRQ help This enables support for the Broadcom BCM2837 and BCM2711 SoC. These SoCs are used in the Raspberry Pi 3 and 4 devices. @@ -82,8 +81,6 @@ config ARCH_BITMAIN config ARCH_BRCMSTB bool "Broadcom Set-Top-Box SoCs" select ARCH_HAS_RESET_CONTROLLER - select BCM7038_L1_IRQ - select BRCMSTB_L2_IRQ select GENERIC_IRQ_CHIP select PINCTRL help @@ -167,7 +164,6 @@ config ARCH_MEDIATEK config ARCH_MESON bool "Amlogic Platforms" select COMMON_CLK - select MESON_IRQ_GPIO help This enables support for the arm64 based Amlogic SoCs such as the s905, S905X/D, S912, A113X/D or S905X/D2 diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts index 02f8e72f0cad..05486cccee1c 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts @@ -75,7 +75,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/arm/foundation-v8.dtsi b/arch/arm64/boot/dts/arm/foundation-v8.dtsi index 05ae893d1b2e..fbf13f7c2baf 100644 --- a/arch/arm64/boot/dts/arm/foundation-v8.dtsi +++ b/arch/arm64/boot/dts/arm/foundation-v8.dtsi @@ -115,7 +115,6 @@ bus@8000000 { compatible = "arm,vexpress,v2m-p1", "simple-bus"; - arm,v2m-memory-map = "rs1"; #address-cells = <2>; /* SMB chipselect number and offset */ #size-cells = <1>; diff --git a/arch/arm64/boot/dts/arm/fvp-base-revc.dts b/arch/arm64/boot/dts/arm/fvp-base-revc.dts index b8a21092db4d..269b649934b5 100644 --- a/arch/arm64/boot/dts/arm/fvp-base-revc.dts +++ b/arch/arm64/boot/dts/arm/fvp-base-revc.dts @@ -192,32 +192,9 @@ remote-endpoint = <&clcd_pads>; }; }; - - panel-timing { - clock-frequency = <63500127>; - hactive = <1024>; - hback-porch = <152>; - hfront-porch = <48>; - hsync-len = <104>; - vactive = <768>; - vback-porch = <23>; - vfront-porch = <3>; - vsync-len = <4>; - }; }; bus@8000000 { - compatible = "simple-bus"; - - #address-cells = <2>; - #size-cells = <1>; - ranges = <0 0 0 0x08000000 0x04000000>, - <1 0 0 0x14000000 0x04000000>, - <2 0 0 0x18000000 0x04000000>, - <3 0 0 0x1c000000 0x04000000>, - <4 0 0 0x0c000000 0x04000000>, - <5 0 0 0x10000000 0x04000000>; - #interrupt-cells = <1>; interrupt-map-mask = <0 0 63>; interrupt-map = <0 0 0 &gic 0 0 GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi index 8e7a66943b01..6288e104a089 100644 --- a/arch/arm64/boot/dts/arm/juno-base.dtsi +++ b/arch/arm64/boot/dts/arm/juno-base.dtsi @@ -27,8 +27,6 @@ reg = <0x0 0x2b1f0000 0x0 0x1000>; interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "mhu_lpri_rx", - "mhu_hpri_rx"; #mbox-cells = <1>; clocks = <&soc_refclk100mhz>; clock-names = "apb_pclk"; @@ -804,16 +802,6 @@ }; bus@8000000 { - compatible = "simple-bus"; - #address-cells = <2>; - #size-cells = <1>; - ranges = <0 0 0 0x08000000 0x04000000>, - <1 0 0 0x14000000 0x04000000>, - <2 0 0 0x18000000 0x04000000>, - <3 0 0 0x1c000000 0x04000000>, - <4 0 0 0x0c000000 0x04000000>, - <5 0 0 0x10000000 0x04000000>; - #interrupt-cells = <1>; interrupt-map-mask = <0 0 15>; interrupt-map = <0 0 0 &gic 0 GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi index 40d95c58b55e..fefd2b5f0176 100644 --- a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi +++ b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi @@ -92,16 +92,23 @@ }; bus@8000000 { - motherboard-bus { + compatible = "simple-bus"; + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0x8000000 0 0x8000000 0x18000000>; + + motherboard-bus@8000000 { compatible = "arm,vexpress,v2p-p1", "simple-bus"; #address-cells = <2>; /* SMB chipselect number and offset */ #size-cells = <1>; - #interrupt-cells = <1>; - ranges; - model = "V2M-Juno"; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; arm,hbi = <0x252>; arm,vexpress,site = <0>; - arm,v2m-memory-map = "rs1"; flash@0 { /* 2 * 32MiB NOR Flash memory mounted on CS0 */ @@ -218,7 +225,7 @@ }; }; - mmci@50000 { + mmc@50000 { compatible = "arm,pl180", "arm,primecell"; reg = <0x050000 0x1000>; interrupts = <5>; @@ -246,7 +253,7 @@ clock-names = "KMIREFCLK", "apb_pclk"; }; - wdt@f0000 { + watchdog@f0000 { compatible = "arm,sp805", "arm,primecell"; reg = <0x0f0000 0x10000>; interrupts = <7>; diff --git a/arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts b/arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts index 3050f45bade4..258991ad7cc0 100644 --- a/arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts +++ b/arch/arm64/boot/dts/arm/rtsm_ve-aemv8a.dts @@ -133,17 +133,6 @@ }; bus@8000000 { - compatible = "simple-bus"; - - #address-cells = <2>; - #size-cells = <1>; - ranges = <0 0 0 0x08000000 0x04000000>, - <1 0 0 0x14000000 0x04000000>, - <2 0 0 0x18000000 0x04000000>, - <3 0 0 0x1c000000 0x04000000>, - <4 0 0 0x0c000000 0x04000000>, - <5 0 0 0x10000000 0x04000000>; - #interrupt-cells = <1>; interrupt-map-mask = <0 0 63>; interrupt-map = <0 0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/arm/rtsm_ve-motherboard-rs2.dtsi b/arch/arm64/boot/dts/arm/rtsm_ve-motherboard-rs2.dtsi index b917d9d3f1c4..33182d9e5826 100644 --- a/arch/arm64/boot/dts/arm/rtsm_ve-motherboard-rs2.dtsi +++ b/arch/arm64/boot/dts/arm/rtsm_ve-motherboard-rs2.dtsi @@ -6,7 +6,7 @@ */ / { bus@8000000 { - motherboard-bus { + motherboard-bus@8000000 { arm,v2m-memory-map = "rs2"; iofpga-bus@300000000 { diff --git a/arch/arm64/boot/dts/arm/rtsm_ve-motherboard.dtsi b/arch/arm64/boot/dts/arm/rtsm_ve-motherboard.dtsi index 4c4a381d2c75..5f6cab668aa0 100644 --- a/arch/arm64/boot/dts/arm/rtsm_ve-motherboard.dtsi +++ b/arch/arm64/boot/dts/arm/rtsm_ve-motherboard.dtsi @@ -77,13 +77,21 @@ }; bus@8000000 { - motherboard-bus { - arm,v2m-memory-map = "rs1"; + compatible = "simple-bus"; + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0x8000000 0 0x8000000 0x18000000>; + + motherboard-bus@8000000 { compatible = "arm,vexpress,v2m-p1", "simple-bus"; #address-cells = <2>; /* SMB chipselect number and offset */ #size-cells = <1>; - #interrupt-cells = <1>; - ranges; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; flash@0 { compatible = "arm,vexpress-flash", "cfi-flash"; @@ -130,7 +138,7 @@ clock-names = "apb_pclk"; }; - mmci@50000 { + mmc@50000 { compatible = "arm,pl180", "arm,primecell"; reg = <0x050000 0x1000>; interrupts = <9>, <10>; @@ -190,7 +198,7 @@ clock-names = "uartclk", "apb_pclk"; }; - wdt@f0000 { + watchdog@f0000 { compatible = "arm,sp805", "arm,primecell"; reg = <0x0f0000 0x1000>; interrupts = <0>; diff --git a/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts b/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts index d859914500a7..5b6d9d8e934d 100644 --- a/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts +++ b/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts @@ -145,61 +145,6 @@ }; smb: bus@8000000 { - compatible = "simple-bus"; - - #address-cells = <2>; - #size-cells = <1>; - ranges = <0 0 0 0x08000000 0x04000000>, - <1 0 0 0x14000000 0x04000000>, - <2 0 0 0x18000000 0x04000000>, - <3 0 0 0x1c000000 0x04000000>, - <4 0 0 0x0c000000 0x04000000>, - <5 0 0 0x10000000 0x04000000>; - - #interrupt-cells = <1>; - interrupt-map-mask = <0 0 63>; - interrupt-map = <0 0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, - <0 0 1 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, - <0 0 2 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>, - <0 0 3 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>, - <0 0 4 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, - <0 0 5 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>, - <0 0 6 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, - <0 0 7 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, - <0 0 8 &gic GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, - <0 0 9 &gic GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>, - <0 0 10 &gic GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>, - <0 0 11 &gic GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>, - <0 0 12 &gic GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>, - <0 0 13 &gic GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>, - <0 0 14 &gic GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>, - <0 0 15 &gic GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>, - <0 0 16 &gic GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>, - <0 0 17 &gic GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>, - <0 0 18 &gic GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>, - <0 0 19 &gic GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>, - <0 0 20 &gic GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>, - <0 0 21 &gic GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>, - <0 0 22 &gic GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>, - <0 0 23 &gic GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>, - <0 0 24 &gic GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>, - <0 0 25 &gic GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>, - <0 0 26 &gic GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>, - <0 0 27 &gic GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>, - <0 0 28 &gic GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>, - <0 0 29 &gic GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>, - <0 0 30 &gic GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>, - <0 0 31 &gic GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>, - <0 0 32 &gic GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>, - <0 0 33 &gic GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>, - <0 0 34 &gic GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>, - <0 0 35 &gic GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>, - <0 0 36 &gic GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>, - <0 0 37 &gic GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>, - <0 0 38 &gic GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>, - <0 0 39 &gic GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>, - <0 0 40 &gic GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>, - <0 0 41 &gic GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>, - <0 0 42 &gic GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>; + ranges = <0x8000000 0 0x8000000 0x18000000>; }; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index 343ecf0e8973..06b36cc65865 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -405,9 +405,9 @@ interrupts = <GIC_SPI 63 IRQ_TYPE_LEVEL_HIGH>; clock-frequency = <0>; /* fixed up by bootloader */ clocks = <&clockgen QORIQ_CLK_HWACCEL 1>; - voltage-ranges = <1800 1800 3300 3300>; + voltage-ranges = <1800 1800>; sdhci,auto-cmd12; - broken-cd; + non-removable; little-endian; bus-width = <4>; status = "disabled"; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi index 988f8ab679ad..40f5e7a3b064 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi @@ -91,7 +91,7 @@ #size-cells = <1>; compatible = "jedec,spi-nor"; spi-max-frequency = <80000000>; - spi-tx-bus-width = <4>; + spi-tx-bus-width = <1>; spi-rx-bus-width = <4>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-evk.dts b/arch/arm64/boot/dts/freescale/imx8mm-evk.dts index 4e2820d19244..a2b24d4d4e3e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-evk.dts @@ -48,7 +48,7 @@ #size-cells = <1>; compatible = "jedec,spi-nor"; spi-max-frequency = <80000000>; - spi-tx-bus-width = <4>; + spi-tx-bus-width = <1>; spi-rx-bus-width = <4>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts index d17abb515835..e99e7644ff39 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts @@ -70,7 +70,9 @@ regulator-name = "rst-usb-eth2"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb_eth2>; - gpio = <&gpio3 2 GPIO_ACTIVE_LOW>; + gpio = <&gpio3 2 GPIO_ACTIVE_HIGH>; + enable-active-high; + regulator-always-on; }; reg_vdd_5v: regulator-5v { @@ -95,7 +97,7 @@ clocks = <&osc_can>; interrupt-parent = <&gpio4>; interrupts = <28 IRQ_TYPE_EDGE_FALLING>; - spi-max-frequency = <100000>; + spi-max-frequency = <10000000>; vdd-supply = <®_vdd_3v3>; xceiver-supply = <®_vdd_5v>; }; @@ -111,7 +113,7 @@ &fec1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-rxid"; phy-handle = <ðphy>; status = "okay"; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi index d0456daefda8..42bbbb3f532b 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi @@ -91,10 +91,12 @@ reg_vdd_soc: BUCK1 { regulator-name = "buck1"; regulator-min-microvolt = <800000>; - regulator-max-microvolt = <900000>; + regulator-max-microvolt = <850000>; regulator-boot-on; regulator-always-on; regulator-ramp-delay = <3125>; + nxp,dvs-run-voltage = <850000>; + nxp,dvs-standby-voltage = <800000>; }; reg_vdd_arm: BUCK2 { @@ -102,6 +104,7 @@ regulator-min-microvolt = <850000>; regulator-max-microvolt = <950000>; regulator-boot-on; + regulator-always-on; regulator-ramp-delay = <3125>; nxp,dvs-run-voltage = <950000>; nxp,dvs-standby-voltage = <850000>; @@ -110,7 +113,7 @@ reg_vdd_dram: BUCK3 { regulator-name = "buck3"; regulator-min-microvolt = <850000>; - regulator-max-microvolt = <900000>; + regulator-max-microvolt = <950000>; regulator-boot-on; regulator-always-on; }; @@ -149,7 +152,7 @@ reg_vdd_snvs: LDO2 { regulator-name = "ldo2"; - regulator-min-microvolt = <850000>; + regulator-min-microvolt = <800000>; regulator-max-microvolt = <900000>; regulator-boot-on; regulator-always-on; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7902.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7902.dts index 05cb60991fb9..d52686f4c059 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7902.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7902.dts @@ -647,7 +647,7 @@ pinctrl_hog: hoggrp { fsl,pins = < MX8MM_IOMUXC_NAND_CE0_B_GPIO3_IO1 0x40000159 /* M2_GDIS# */ - MX8MM_IOMUXC_GPIO1_IO12_GPIO1_IO12 0x40000041 /* M2_RST# */ + MX8MM_IOMUXC_GPIO1_IO13_GPIO1_IO13 0x40000041 /* M2_RST# */ MX8MM_IOMUXC_NAND_DATA01_GPIO3_IO7 0x40000119 /* M2_OFF# */ MX8MM_IOMUXC_GPIO1_IO15_GPIO1_IO15 0x40000159 /* M2_WDIS# */ MX8MM_IOMUXC_SAI1_TXD2_GPIO4_IO14 0x40000041 /* AMP GPIO1 */ diff --git a/arch/arm64/boot/dts/freescale/imx8mn-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-beacon-som.dtsi index 54eaf3d6055b..3b2d627a0342 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-beacon-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn-beacon-som.dtsi @@ -101,7 +101,7 @@ #size-cells = <1>; compatible = "jedec,spi-nor"; spi-max-frequency = <80000000>; - spi-tx-bus-width = <4>; + spi-tx-bus-width = <1>; spi-rx-bus-width = <4>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mn-venice-gw7902.dts b/arch/arm64/boot/dts/freescale/imx8mn-venice-gw7902.dts index e77db4996e58..236f425e1570 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-venice-gw7902.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-venice-gw7902.dts @@ -633,7 +633,7 @@ pinctrl_hog: hoggrp { fsl,pins = < MX8MN_IOMUXC_NAND_CE0_B_GPIO3_IO1 0x40000159 /* M2_GDIS# */ - MX8MN_IOMUXC_GPIO1_IO12_GPIO1_IO12 0x40000041 /* M2_RST# */ + MX8MN_IOMUXC_GPIO1_IO13_GPIO1_IO13 0x40000041 /* M2_RST# */ MX8MN_IOMUXC_NAND_DATA01_GPIO3_IO7 0x40000119 /* M2_OFF# */ MX8MN_IOMUXC_GPIO1_IO15_GPIO1_IO15 0x40000159 /* M2_WDIS# */ MX8MN_IOMUXC_SAI2_RXFS_GPIO4_IO21 0x40000041 /* APP GPIO1 */ diff --git a/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi index aa78e0d8c72b..fc178eebf8aa 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi @@ -74,7 +74,7 @@ compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <80000000>; - spi-tx-bus-width = <4>; + spi-tx-bus-width = <1>; spi-rx-bus-width = <4>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq-evk.dts b/arch/arm64/boot/dts/freescale/imx8mq-evk.dts index 49f9db971f3b..b83df77195ec 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mq-evk.dts @@ -337,6 +337,8 @@ #size-cells = <1>; compatible = "micron,n25q256a", "jedec,spi-nor"; spi-max-frequency = <29000000>; + spi-tx-bus-width = <1>; + spi-rx-bus-width = <4>; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts b/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts index f593e4ff62e1..564746d5000d 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts +++ b/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts @@ -281,7 +281,7 @@ #address-cells = <1>; #size-cells = <1>; reg = <0>; - spi-tx-bus-width = <4>; + spi-tx-bus-width = <1>; spi-rx-bus-width = <4>; m25p,fast-read; spi-max-frequency = <50000000>; diff --git a/arch/arm64/boot/dts/qcom/pm8150.dtsi b/arch/arm64/boot/dts/qcom/pm8150.dtsi index c566a64b1373..0df76f7b1cc1 100644 --- a/arch/arm64/boot/dts/qcom/pm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/pm8150.dtsi @@ -48,8 +48,10 @@ #size-cells = <0>; pon: power-on@800 { - compatible = "qcom,pm8916-pon"; + compatible = "qcom,pm8998-pon"; reg = <0x0800>; + mode-bootloader = <0x2>; + mode-recovery = <0x1>; pon_pwrkey: pwrkey { compatible = "qcom,pm8941-pwrkey"; diff --git a/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts b/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts index 8ac96f8e79d4..28d5b5528516 100644 --- a/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts +++ b/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts @@ -804,6 +804,16 @@ }; }; +&pon_pwrkey { + status = "okay"; +}; + +&pon_resin { + status = "okay"; + + linux,code = <KEY_VOLUMEDOWN>; +}; + &qupv3_id_0 { status = "okay"; }; diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi index 0f2b3c00e434..70c88c37de32 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi @@ -273,7 +273,6 @@ "Headphone Jack", "HPOL", "Headphone Jack", "HPOR"; - #sound-dai-cells = <0>; #address-cells = <1>; #size-cells = <0>; @@ -301,11 +300,11 @@ }; }; - dai-link@2 { + dai-link@5 { link-name = "MultiMedia2"; - reg = <2>; + reg = <LPASS_DP_RX>; cpu { - sound-dai = <&lpass_cpu 2>; + sound-dai = <&lpass_cpu LPASS_DP_RX>; }; codec { @@ -782,7 +781,7 @@ hp_i2c: &i2c9 { qcom,playback-sd-lines = <0>; }; - hdmi-primary@0 { + hdmi@5 { reg = <LPASS_DP_RX>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 53a21d086178..fd78f16181dd 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -1850,9 +1850,9 @@ cpufreq_hw: cpufreq@18591000 { compatible = "qcom,cpufreq-epss"; - reg = <0 0x18591100 0 0x900>, - <0 0x18592100 0 0x900>, - <0 0x18593100 0 0x900>; + reg = <0 0x18591000 0 0x1000>, + <0 0x18592000 0 0x1000>, + <0 0x18593000 0 0x1000>; clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GCC_GPLL0>; clock-names = "xo", "alternate"; #freq-domain-cells = <1>; diff --git a/arch/arm64/boot/dts/qcom/sdm630.dtsi b/arch/arm64/boot/dts/qcom/sdm630.dtsi index 9153e6616ba4..9c7f87e42fcc 100644 --- a/arch/arm64/boot/dts/qcom/sdm630.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm630.dtsi @@ -654,9 +654,20 @@ compatible = "qcom,sdm660-a2noc"; reg = <0x01704000 0xc100>; #interconnect-cells = <1>; - clock-names = "bus", "bus_a"; + clock-names = "bus", + "bus_a", + "ipa", + "ufs_axi", + "aggre2_ufs_axi", + "aggre2_usb3_axi", + "cfg_noc_usb2_axi"; clocks = <&rpmcc RPM_SMD_AGGR2_NOC_CLK>, - <&rpmcc RPM_SMD_AGGR2_NOC_A_CLK>; + <&rpmcc RPM_SMD_AGGR2_NOC_A_CLK>, + <&rpmcc RPM_SMD_IPA_CLK>, + <&gcc GCC_UFS_AXI_CLK>, + <&gcc GCC_AGGRE2_UFS_AXI_CLK>, + <&gcc GCC_AGGRE2_USB3_AXI_CLK>, + <&gcc GCC_CFG_NOC_USB2_AXI_CLK>; }; mnoc: interconnect@1745000 { diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 6d7172e6f4c3..b3b911926184 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -128,23 +128,28 @@ no-map; }; - wlan_msa_mem: memory@8c400000 { - reg = <0 0x8c400000 0 0x100000>; + ipa_fw_mem: memory@8c400000 { + reg = <0 0x8c400000 0 0x10000>; no-map; }; - gpu_mem: memory@8c515000 { - reg = <0 0x8c515000 0 0x2000>; + ipa_gsi_mem: memory@8c410000 { + reg = <0 0x8c410000 0 0x5000>; no-map; }; - ipa_fw_mem: memory@8c517000 { - reg = <0 0x8c517000 0 0x5a000>; + gpu_mem: memory@8c415000 { + reg = <0 0x8c415000 0 0x2000>; no-map; }; - adsp_mem: memory@8c600000 { - reg = <0 0x8c600000 0 0x1a00000>; + adsp_mem: memory@8c500000 { + reg = <0 0x8c500000 0 0x1a00000>; + no-map; + }; + + wlan_msa_mem: memory@8df00000 { + reg = <0 0x8df00000 0 0x100000>; no-map; }; diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts index 385e5029437d..2ba23aa582a1 100644 --- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts +++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts @@ -16,6 +16,17 @@ #include "sdm850.dtsi" #include "pm8998.dtsi" +/* + * Update following upstream (sdm845.dtsi) reserved + * memory mappings for firmware loading to succeed + * and enable the IPA device. + */ +/delete-node/ &ipa_fw_mem; +/delete-node/ &ipa_gsi_mem; +/delete-node/ &gpu_mem; +/delete-node/ &adsp_mem; +/delete-node/ &wlan_msa_mem; + / { model = "Lenovo Yoga C630"; compatible = "lenovo,yoga-c630", "qcom,sdm845"; @@ -58,6 +69,29 @@ }; }; + /* Reserved memory changes for IPA */ + reserved-memory { + wlan_msa_mem: memory@8c400000 { + reg = <0 0x8c400000 0 0x100000>; + no-map; + }; + + gpu_mem: memory@8c515000 { + reg = <0 0x8c515000 0 0x2000>; + no-map; + }; + + ipa_fw_mem: memory@8c517000 { + reg = <0 0x8c517000 0 0x5a000>; + no-map; + }; + + adsp_mem: memory@8c600000 { + reg = <0 0x8c600000 0 0x1a00000>; + no-map; + }; + }; + sn65dsi86_refclk: sn65dsi86-refclk { compatible = "fixed-clock"; #clock-cells = <0>; diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index 8c15d9fed08f..d12e4cbfc852 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -2590,9 +2590,10 @@ power-domains = <&dispcc MDSS_GDSC>; clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>, + <&gcc GCC_DISP_HF_AXI_CLK>, <&gcc GCC_DISP_SF_AXI_CLK>, <&dispcc DISP_CC_MDSS_MDP_CLK>; - clock-names = "iface", "nrt_bus", "core"; + clock-names = "iface", "bus", "nrt_bus", "core"; assigned-clocks = <&dispcc DISP_CC_MDSS_MDP_CLK>; assigned-clock-rates = <460000000>; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 156d96afbbfc..545197bc0501 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -245,7 +245,6 @@ CONFIG_DEVTMPFS_MOUNT=y CONFIG_FW_LOADER_USER_HELPER=y CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y CONFIG_HISILICON_LPC=y -CONFIG_SIMPLE_PM_BUS=y CONFIG_FSL_MC_BUS=y CONFIG_TEGRA_ACONNECT=m CONFIG_GNSS=m diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 55f19450091b..addfa413650b 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -88,16 +88,12 @@ config CRYPTO_AES_ARM64_CE_BLK depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER select CRYPTO_AES_ARM64_CE - select CRYPTO_AES_ARM64 - select CRYPTO_SIMD config CRYPTO_AES_ARM64_NEON_BLK tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions" depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER - select CRYPTO_AES_ARM64 select CRYPTO_LIB_AES - select CRYPTO_SIMD config CRYPTO_CHACHA20_NEON tristate "ChaCha20, XChaCha20, and XChaCha12 stream ciphers using NEON instructions" @@ -122,8 +118,6 @@ config CRYPTO_AES_ARM64_BS depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER select CRYPTO_AES_ARM64_NEON_BLK - select CRYPTO_AES_ARM64 select CRYPTO_LIB_AES - select CRYPTO_SIMD endif diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S index 99a028e298ed..b03f7f71f893 100644 --- a/arch/arm64/crypto/aes-ce-ccm-core.S +++ b/arch/arm64/crypto/aes-ce-ccm-core.S @@ -12,22 +12,21 @@ .arch armv8-a+crypto /* - * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, - * u32 *macp, u8 const rk[], u32 rounds); + * u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, + * u32 macp, u8 const rk[], u32 rounds); */ SYM_FUNC_START(ce_aes_ccm_auth_data) - ldr w8, [x3] /* leftover from prev round? */ ld1 {v0.16b}, [x0] /* load mac */ - cbz w8, 1f - sub w8, w8, #16 + cbz w3, 1f + sub w3, w3, #16 eor v1.16b, v1.16b, v1.16b 0: ldrb w7, [x1], #1 /* get 1 byte of input */ subs w2, w2, #1 - add w8, w8, #1 + add w3, w3, #1 ins v1.b[0], w7 ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ beq 8f /* out of input? */ - cbnz w8, 0b + cbnz w3, 0b eor v0.16b, v0.16b, v1.16b 1: ld1 {v3.4s}, [x4] /* load first round key */ prfm pldl1strm, [x1] @@ -62,7 +61,7 @@ SYM_FUNC_START(ce_aes_ccm_auth_data) beq 10f adds w2, w2, #16 beq 10f - mov w8, w2 + mov w3, w2 7: ldrb w7, [x1], #1 umov w6, v0.b[0] eor w6, w6, w7 @@ -71,15 +70,15 @@ SYM_FUNC_START(ce_aes_ccm_auth_data) beq 10f ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ b 7b -8: cbz w8, 91f - mov w7, w8 - add w8, w8, #16 +8: cbz w3, 91f + mov w7, w3 + add w3, w3, #16 9: ext v1.16b, v1.16b, v1.16b, #1 adds w7, w7, #1 bne 9b 91: eor v0.16b, v0.16b, v1.16b st1 {v0.16b}, [x0] -10: str w8, [x3] +10: mov w0, w3 ret SYM_FUNC_END(ce_aes_ccm_auth_data) @@ -124,6 +123,7 @@ SYM_FUNC_START(ce_aes_ccm_final) SYM_FUNC_END(ce_aes_ccm_final) .macro aes_ccm_do_crypt,enc + cbz x2, 5f ldr x8, [x6, #8] /* load lower ctr */ ld1 {v0.16b}, [x5] /* load mac */ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index f6d19b0dc893..c4f14415f5f0 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -6,12 +6,10 @@ */ #include <asm/neon.h> -#include <asm/simd.h> #include <asm/unaligned.h> #include <crypto/aes.h> #include <crypto/scatterwalk.h> #include <crypto/internal/aead.h> -#include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> #include <linux/module.h> @@ -29,8 +27,8 @@ static int num_rounds(struct crypto_aes_ctx *ctx) return 6 + ctx->key_length / 4; } -asmlinkage void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, - u32 *macp, u32 const rk[], u32 rounds); +asmlinkage u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, + u32 macp, u32 const rk[], u32 rounds); asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, u32 const rk[], u32 rounds, u8 mac[], @@ -96,41 +94,6 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen) return 0; } -static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[], - u32 abytes, u32 *macp) -{ - if (crypto_simd_usable()) { - kernel_neon_begin(); - ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc, - num_rounds(key)); - kernel_neon_end(); - } else { - if (*macp > 0 && *macp < AES_BLOCK_SIZE) { - int added = min(abytes, AES_BLOCK_SIZE - *macp); - - crypto_xor(&mac[*macp], in, added); - - *macp += added; - in += added; - abytes -= added; - } - - while (abytes >= AES_BLOCK_SIZE) { - aes_encrypt(key, mac, mac); - crypto_xor(mac, in, AES_BLOCK_SIZE); - - in += AES_BLOCK_SIZE; - abytes -= AES_BLOCK_SIZE; - } - - if (abytes > 0) { - aes_encrypt(key, mac, mac); - crypto_xor(mac, in, abytes); - *macp = abytes; - } - } -} - static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[]) { struct crypto_aead *aead = crypto_aead_reqtfm(req); @@ -150,7 +113,8 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[]) ltag.len = 6; } - ccm_update_mac(ctx, mac, (u8 *)<ag, ltag.len, &macp); + macp = ce_aes_ccm_auth_data(mac, (u8 *)<ag, ltag.len, macp, + ctx->key_enc, num_rounds(ctx)); scatterwalk_start(&walk, req->src); do { @@ -161,8 +125,16 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[]) scatterwalk_start(&walk, sg_next(walk.sg)); n = scatterwalk_clamp(&walk, len); } + n = min_t(u32, n, SZ_4K); /* yield NEON at least every 4k */ p = scatterwalk_map(&walk); - ccm_update_mac(ctx, mac, p, n, &macp); + + macp = ce_aes_ccm_auth_data(mac, p, n, macp, ctx->key_enc, + num_rounds(ctx)); + + if (len / SZ_4K > (len - n) / SZ_4K) { + kernel_neon_end(); + kernel_neon_begin(); + } len -= n; scatterwalk_unmap(p); @@ -171,54 +143,6 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[]) } while (len); } -static int ccm_crypt_fallback(struct skcipher_walk *walk, u8 mac[], u8 iv0[], - struct crypto_aes_ctx *ctx, bool enc) -{ - u8 buf[AES_BLOCK_SIZE]; - int err = 0; - - while (walk->nbytes) { - int blocks = walk->nbytes / AES_BLOCK_SIZE; - u32 tail = walk->nbytes % AES_BLOCK_SIZE; - u8 *dst = walk->dst.virt.addr; - u8 *src = walk->src.virt.addr; - u32 nbytes = walk->nbytes; - - if (nbytes == walk->total && tail > 0) { - blocks++; - tail = 0; - } - - do { - u32 bsize = AES_BLOCK_SIZE; - - if (nbytes < AES_BLOCK_SIZE) - bsize = nbytes; - - crypto_inc(walk->iv, AES_BLOCK_SIZE); - aes_encrypt(ctx, buf, walk->iv); - aes_encrypt(ctx, mac, mac); - if (enc) - crypto_xor(mac, src, bsize); - crypto_xor_cpy(dst, src, buf, bsize); - if (!enc) - crypto_xor(mac, dst, bsize); - dst += bsize; - src += bsize; - nbytes -= bsize; - } while (--blocks); - - err = skcipher_walk_done(walk, tail); - } - - if (!err) { - aes_encrypt(ctx, buf, iv0); - aes_encrypt(ctx, mac, mac); - crypto_xor(mac, buf, AES_BLOCK_SIZE); - } - return err; -} - static int ccm_encrypt(struct aead_request *req) { struct crypto_aead *aead = crypto_aead_reqtfm(req); @@ -233,41 +157,41 @@ static int ccm_encrypt(struct aead_request *req) if (err) return err; - if (req->assoclen) - ccm_calculate_auth_mac(req, mac); - /* preserve the original iv for the final round */ memcpy(buf, req->iv, AES_BLOCK_SIZE); err = skcipher_walk_aead_encrypt(&walk, req, false); + if (unlikely(err)) + return err; - if (crypto_simd_usable()) { - while (walk.nbytes) { - u32 tail = walk.nbytes % AES_BLOCK_SIZE; + kernel_neon_begin(); - if (walk.nbytes == walk.total) - tail = 0; + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); - kernel_neon_begin(); - ce_aes_ccm_encrypt(walk.dst.virt.addr, - walk.src.virt.addr, - walk.nbytes - tail, ctx->key_enc, - num_rounds(ctx), mac, walk.iv); - kernel_neon_end(); + do { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; + + if (walk.nbytes == walk.total) + tail = 0; + + ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes - tail, ctx->key_enc, + num_rounds(ctx), mac, walk.iv); + if (walk.nbytes == walk.total) + ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); + + kernel_neon_end(); + + if (walk.nbytes) { err = skcipher_walk_done(&walk, tail); + if (unlikely(err)) + return err; + if (unlikely(walk.nbytes)) + kernel_neon_begin(); } - if (!err) { - kernel_neon_begin(); - ce_aes_ccm_final(mac, buf, ctx->key_enc, - num_rounds(ctx)); - kernel_neon_end(); - } - } else { - err = ccm_crypt_fallback(&walk, mac, buf, ctx, true); - } - if (err) - return err; + } while (walk.nbytes); /* copy authtag to end of dst */ scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen, @@ -291,42 +215,41 @@ static int ccm_decrypt(struct aead_request *req) if (err) return err; - if (req->assoclen) - ccm_calculate_auth_mac(req, mac); - /* preserve the original iv for the final round */ memcpy(buf, req->iv, AES_BLOCK_SIZE); err = skcipher_walk_aead_decrypt(&walk, req, false); + if (unlikely(err)) + return err; - if (crypto_simd_usable()) { - while (walk.nbytes) { - u32 tail = walk.nbytes % AES_BLOCK_SIZE; + kernel_neon_begin(); - if (walk.nbytes == walk.total) - tail = 0; + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); - kernel_neon_begin(); - ce_aes_ccm_decrypt(walk.dst.virt.addr, - walk.src.virt.addr, - walk.nbytes - tail, ctx->key_enc, - num_rounds(ctx), mac, walk.iv); - kernel_neon_end(); + do { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; + + if (walk.nbytes == walk.total) + tail = 0; + + ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes - tail, ctx->key_enc, + num_rounds(ctx), mac, walk.iv); + + if (walk.nbytes == walk.total) + ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); + kernel_neon_end(); + + if (walk.nbytes) { err = skcipher_walk_done(&walk, tail); + if (unlikely(err)) + return err; + if (unlikely(walk.nbytes)) + kernel_neon_begin(); } - if (!err) { - kernel_neon_begin(); - ce_aes_ccm_final(mac, buf, ctx->key_enc, - num_rounds(ctx)); - kernel_neon_end(); - } - } else { - err = ccm_crypt_fallback(&walk, mac, buf, ctx, false); - } - - if (err) - return err; + } while (walk.nbytes); /* compare calculated auth tag with the stored one */ scatterwalk_map_and_copy(buf, req->src, diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 17e735931a0c..30b7cc6a7079 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -444,7 +444,7 @@ static int __maybe_unused essiv_cbc_decrypt(struct skcipher_request *req) return err ?: cbc_decrypt_walk(req, &walk); } -static int ctr_encrypt(struct skcipher_request *req) +static int __maybe_unused ctr_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -485,29 +485,6 @@ static int ctr_encrypt(struct skcipher_request *req) return err; } -static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst) -{ - const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - unsigned long flags; - - /* - * Temporarily disable interrupts to avoid races where - * cachelines are evicted when the CPU is interrupted - * to do something else. - */ - local_irq_save(flags); - aes_encrypt(ctx, dst, src); - local_irq_restore(flags); -} - -static int __maybe_unused ctr_encrypt_sync(struct skcipher_request *req) -{ - if (!crypto_simd_usable()) - return crypto_ctr_encrypt_walk(req, ctr_encrypt_one); - - return ctr_encrypt(req); -} - static int __maybe_unused xts_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -656,10 +633,9 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req) static struct skcipher_alg aes_algs[] = { { #if defined(USE_V8_CRYPTO_EXTENSIONS) || !IS_ENABLED(CONFIG_CRYPTO_AES_ARM64_BS) .base = { - .cra_name = "__ecb(aes)", - .cra_driver_name = "__ecb-aes-" MODE, + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-" MODE, .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), .cra_module = THIS_MODULE, @@ -671,10 +647,9 @@ static struct skcipher_alg aes_algs[] = { { .decrypt = ecb_decrypt, }, { .base = { - .cra_name = "__cbc(aes)", - .cra_driver_name = "__cbc-aes-" MODE, + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-" MODE, .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), .cra_module = THIS_MODULE, @@ -687,10 +662,9 @@ static struct skcipher_alg aes_algs[] = { { .decrypt = cbc_decrypt, }, { .base = { - .cra_name = "__ctr(aes)", - .cra_driver_name = "__ctr-aes-" MODE, + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-" MODE, .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct crypto_aes_ctx), .cra_module = THIS_MODULE, @@ -704,26 +678,9 @@ static struct skcipher_alg aes_algs[] = { { .decrypt = ctr_encrypt, }, { .base = { - .cra_name = "ctr(aes)", - .cra_driver_name = "ctr-aes-" MODE, - .cra_priority = PRIO - 1, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_module = THIS_MODULE, - }, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .chunksize = AES_BLOCK_SIZE, - .setkey = skcipher_aes_setkey, - .encrypt = ctr_encrypt_sync, - .decrypt = ctr_encrypt_sync, -}, { - .base = { - .cra_name = "__xts(aes)", - .cra_driver_name = "__xts-aes-" MODE, + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-" MODE, .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), .cra_module = THIS_MODULE, @@ -738,10 +695,9 @@ static struct skcipher_alg aes_algs[] = { { }, { #endif .base = { - .cra_name = "__cts(cbc(aes))", - .cra_driver_name = "__cts-cbc-aes-" MODE, + .cra_name = "cts(cbc(aes))", + .cra_driver_name = "cts-cbc-aes-" MODE, .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), .cra_module = THIS_MODULE, @@ -755,10 +711,9 @@ static struct skcipher_alg aes_algs[] = { { .decrypt = cts_cbc_decrypt, }, { .base = { - .cra_name = "__essiv(cbc(aes),sha256)", - .cra_driver_name = "__essiv-cbc-aes-sha256-" MODE, + .cra_name = "essiv(cbc(aes),sha256)", + .cra_driver_name = "essiv-cbc-aes-sha256-" MODE, .cra_priority = PRIO + 1, - .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_essiv_cbc_ctx), .cra_module = THIS_MODULE, @@ -997,28 +952,15 @@ static struct shash_alg mac_algs[] = { { .descsize = sizeof(struct mac_desc_ctx), } }; -static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)]; - static void aes_exit(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++) - if (aes_simd_algs[i]) - simd_skcipher_free(aes_simd_algs[i]); - crypto_unregister_shashes(mac_algs, ARRAY_SIZE(mac_algs)); crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); } static int __init aes_init(void) { - struct simd_skcipher_alg *simd; - const char *basename; - const char *algname; - const char *drvname; int err; - int i; err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); if (err) @@ -1028,26 +970,8 @@ static int __init aes_init(void) if (err) goto unregister_ciphers; - for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { - if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL)) - continue; - - algname = aes_algs[i].base.cra_name + 2; - drvname = aes_algs[i].base.cra_driver_name + 2; - basename = aes_algs[i].base.cra_driver_name; - simd = simd_skcipher_create_compat(algname, drvname, basename); - err = PTR_ERR(simd); - if (IS_ERR(simd)) - goto unregister_simds; - - aes_simd_algs[i] = simd; - } - return 0; -unregister_simds: - aes_exit(); - return err; unregister_ciphers: crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); return err; diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index fb507d569922..8df6ad8cb09d 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -63,11 +63,6 @@ struct aesbs_cbc_ctx { u32 enc[AES_MAX_KEYLENGTH_U32]; }; -struct aesbs_ctr_ctx { - struct aesbs_ctx key; /* must be first member */ - struct crypto_aes_ctx fallback; -}; - struct aesbs_xts_ctx { struct aesbs_ctx key; u32 twkey[AES_MAX_KEYLENGTH_U32]; @@ -207,25 +202,6 @@ static int cbc_decrypt(struct skcipher_request *req) return err; } -static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key, - unsigned int key_len) -{ - struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - int err; - - err = aes_expandkey(&ctx->fallback, in_key, key_len); - if (err) - return err; - - ctx->key.rounds = 6 + key_len / 4; - - kernel_neon_begin(); - aesbs_convert_key(ctx->key.rk, ctx->fallback.key_enc, ctx->key.rounds); - kernel_neon_end(); - - return 0; -} - static int ctr_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -292,29 +268,6 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, return aesbs_setkey(tfm, in_key, key_len); } -static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst) -{ - struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - unsigned long flags; - - /* - * Temporarily disable interrupts to avoid races where - * cachelines are evicted when the CPU is interrupted - * to do something else. - */ - local_irq_save(flags); - aes_encrypt(&ctx->fallback, dst, src); - local_irq_restore(flags); -} - -static int ctr_encrypt_sync(struct skcipher_request *req) -{ - if (!crypto_simd_usable()) - return crypto_ctr_encrypt_walk(req, ctr_encrypt_one); - - return ctr_encrypt(req); -} - static int __xts_crypt(struct skcipher_request *req, bool encrypt, void (*fn)(u8 out[], u8 const in[], u8 const rk[], int rounds, int blocks, u8 iv[])) @@ -431,13 +384,12 @@ static int xts_decrypt(struct skcipher_request *req) } static struct skcipher_alg aes_algs[] = { { - .base.cra_name = "__ecb(aes)", - .base.cra_driver_name = "__ecb-aes-neonbs", + .base.cra_name = "ecb(aes)", + .base.cra_driver_name = "ecb-aes-neonbs", .base.cra_priority = 250, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct aesbs_ctx), .base.cra_module = THIS_MODULE, - .base.cra_flags = CRYPTO_ALG_INTERNAL, .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, @@ -446,13 +398,12 @@ static struct skcipher_alg aes_algs[] = { { .encrypt = ecb_encrypt, .decrypt = ecb_decrypt, }, { - .base.cra_name = "__cbc(aes)", - .base.cra_driver_name = "__cbc-aes-neonbs", + .base.cra_name = "cbc(aes)", + .base.cra_driver_name = "cbc-aes-neonbs", .base.cra_priority = 250, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx), .base.cra_module = THIS_MODULE, - .base.cra_flags = CRYPTO_ALG_INTERNAL, .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, @@ -462,13 +413,12 @@ static struct skcipher_alg aes_algs[] = { { .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, }, { - .base.cra_name = "__ctr(aes)", - .base.cra_driver_name = "__ctr-aes-neonbs", + .base.cra_name = "ctr(aes)", + .base.cra_driver_name = "ctr-aes-neonbs", .base.cra_priority = 250, .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct aesbs_ctx), .base.cra_module = THIS_MODULE, - .base.cra_flags = CRYPTO_ALG_INTERNAL, .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, @@ -479,29 +429,12 @@ static struct skcipher_alg aes_algs[] = { { .encrypt = ctr_encrypt, .decrypt = ctr_encrypt, }, { - .base.cra_name = "ctr(aes)", - .base.cra_driver_name = "ctr-aes-neonbs", - .base.cra_priority = 250 - 1, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct aesbs_ctr_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .chunksize = AES_BLOCK_SIZE, - .walksize = 8 * AES_BLOCK_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aesbs_ctr_setkey_sync, - .encrypt = ctr_encrypt_sync, - .decrypt = ctr_encrypt_sync, -}, { - .base.cra_name = "__xts(aes)", - .base.cra_driver_name = "__xts-aes-neonbs", + .base.cra_name = "xts(aes)", + .base.cra_driver_name = "xts-aes-neonbs", .base.cra_priority = 250, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct aesbs_xts_ctx), .base.cra_module = THIS_MODULE, - .base.cra_flags = CRYPTO_ALG_INTERNAL, .min_keysize = 2 * AES_MIN_KEY_SIZE, .max_keysize = 2 * AES_MAX_KEY_SIZE, @@ -512,54 +445,17 @@ static struct skcipher_alg aes_algs[] = { { .decrypt = xts_decrypt, } }; -static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)]; - static void aes_exit(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++) - if (aes_simd_algs[i]) - simd_skcipher_free(aes_simd_algs[i]); - crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); } static int __init aes_init(void) { - struct simd_skcipher_alg *simd; - const char *basename; - const char *algname; - const char *drvname; - int err; - int i; - if (!cpu_have_named_feature(ASIMD)) return -ENODEV; - err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); - if (err) - return err; - - for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { - if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL)) - continue; - - algname = aes_algs[i].base.cra_name + 2; - drvname = aes_algs[i].base.cra_driver_name + 2; - basename = aes_algs[i].base.cra_driver_name; - simd = simd_skcipher_create_compat(algname, drvname, basename); - err = PTR_ERR(simd); - if (IS_ERR(simd)) - goto unregister_simds; - - aes_simd_algs[i] = simd; - } - return 0; - -unregister_simds: - aes_exit(); - return err; + return crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); } module_init(aes_init); diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 720cd3a58da3..15794fe21a0b 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -362,84 +362,36 @@ static int gcm_encrypt(struct aead_request *req) err = skcipher_walk_aead_encrypt(&walk, req, false); - if (likely(crypto_simd_usable())) { - do { - const u8 *src = walk.src.virt.addr; - u8 *dst = walk.dst.virt.addr; - int nbytes = walk.nbytes; - - tag = (u8 *)&lengths; - - if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) { - src = dst = memcpy(buf + sizeof(buf) - nbytes, - src, nbytes); - } else if (nbytes < walk.total) { - nbytes &= ~(AES_BLOCK_SIZE - 1); - tag = NULL; - } - - kernel_neon_begin(); - pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h, - dg, iv, ctx->aes_key.key_enc, nrounds, - tag); - kernel_neon_end(); - - if (unlikely(!nbytes)) - break; - - if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) - memcpy(walk.dst.virt.addr, - buf + sizeof(buf) - nbytes, nbytes); - - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } while (walk.nbytes); - } else { - while (walk.nbytes >= AES_BLOCK_SIZE) { - int blocks = walk.nbytes / AES_BLOCK_SIZE; - const u8 *src = walk.src.virt.addr; - u8 *dst = walk.dst.virt.addr; - int remaining = blocks; - - do { - aes_encrypt(&ctx->aes_key, buf, iv); - crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE); - crypto_inc(iv, AES_BLOCK_SIZE); - - dst += AES_BLOCK_SIZE; - src += AES_BLOCK_SIZE; - } while (--remaining > 0); - - ghash_do_update(blocks, dg, walk.dst.virt.addr, - &ctx->ghash_key, NULL); - - err = skcipher_walk_done(&walk, - walk.nbytes % AES_BLOCK_SIZE); - } - - /* handle the tail */ - if (walk.nbytes) { - aes_encrypt(&ctx->aes_key, buf, iv); + do { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + int nbytes = walk.nbytes; - crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, - buf, walk.nbytes); + tag = (u8 *)&lengths; - memcpy(buf, walk.dst.virt.addr, walk.nbytes); - memset(buf + walk.nbytes, 0, sizeof(buf) - walk.nbytes); + if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) { + src = dst = memcpy(buf + sizeof(buf) - nbytes, + src, nbytes); + } else if (nbytes < walk.total) { + nbytes &= ~(AES_BLOCK_SIZE - 1); + tag = NULL; } - tag = (u8 *)&lengths; - ghash_do_update(1, dg, tag, &ctx->ghash_key, - walk.nbytes ? buf : NULL); + kernel_neon_begin(); + pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h, + dg, iv, ctx->aes_key.key_enc, nrounds, + tag); + kernel_neon_end(); - if (walk.nbytes) - err = skcipher_walk_done(&walk, 0); + if (unlikely(!nbytes)) + break; - put_unaligned_be64(dg[1], tag); - put_unaligned_be64(dg[0], tag + 8); - put_unaligned_be32(1, iv + GCM_IV_SIZE); - aes_encrypt(&ctx->aes_key, iv, iv); - crypto_xor(tag, iv, AES_BLOCK_SIZE); - } + if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) + memcpy(walk.dst.virt.addr, + buf + sizeof(buf) - nbytes, nbytes); + + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } while (walk.nbytes); if (err) return err; @@ -464,6 +416,7 @@ static int gcm_decrypt(struct aead_request *req) u64 dg[2] = {}; be128 lengths; u8 *tag; + int ret; int err; lengths.a = cpu_to_be64(req->assoclen * 8); @@ -481,101 +434,41 @@ static int gcm_decrypt(struct aead_request *req) err = skcipher_walk_aead_decrypt(&walk, req, false); - if (likely(crypto_simd_usable())) { - int ret; - - do { - const u8 *src = walk.src.virt.addr; - u8 *dst = walk.dst.virt.addr; - int nbytes = walk.nbytes; - - tag = (u8 *)&lengths; - - if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) { - src = dst = memcpy(buf + sizeof(buf) - nbytes, - src, nbytes); - } else if (nbytes < walk.total) { - nbytes &= ~(AES_BLOCK_SIZE - 1); - tag = NULL; - } - - kernel_neon_begin(); - ret = pmull_gcm_decrypt(nbytes, dst, src, - ctx->ghash_key.h, - dg, iv, ctx->aes_key.key_enc, - nrounds, tag, otag, authsize); - kernel_neon_end(); - - if (unlikely(!nbytes)) - break; - - if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) - memcpy(walk.dst.virt.addr, - buf + sizeof(buf) - nbytes, nbytes); - - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } while (walk.nbytes); - - if (err) - return err; - if (ret) - return -EBADMSG; - } else { - while (walk.nbytes >= AES_BLOCK_SIZE) { - int blocks = walk.nbytes / AES_BLOCK_SIZE; - const u8 *src = walk.src.virt.addr; - u8 *dst = walk.dst.virt.addr; - - ghash_do_update(blocks, dg, walk.src.virt.addr, - &ctx->ghash_key, NULL); - - do { - aes_encrypt(&ctx->aes_key, buf, iv); - crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE); - crypto_inc(iv, AES_BLOCK_SIZE); - - dst += AES_BLOCK_SIZE; - src += AES_BLOCK_SIZE; - } while (--blocks > 0); + do { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + int nbytes = walk.nbytes; - err = skcipher_walk_done(&walk, - walk.nbytes % AES_BLOCK_SIZE); - } + tag = (u8 *)&lengths; - /* handle the tail */ - if (walk.nbytes) { - memcpy(buf, walk.src.virt.addr, walk.nbytes); - memset(buf + walk.nbytes, 0, sizeof(buf) - walk.nbytes); + if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) { + src = dst = memcpy(buf + sizeof(buf) - nbytes, + src, nbytes); + } else if (nbytes < walk.total) { + nbytes &= ~(AES_BLOCK_SIZE - 1); + tag = NULL; } - tag = (u8 *)&lengths; - ghash_do_update(1, dg, tag, &ctx->ghash_key, - walk.nbytes ? buf : NULL); - - if (walk.nbytes) { - aes_encrypt(&ctx->aes_key, buf, iv); + kernel_neon_begin(); + ret = pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h, + dg, iv, ctx->aes_key.key_enc, + nrounds, tag, otag, authsize); + kernel_neon_end(); - crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, - buf, walk.nbytes); + if (unlikely(!nbytes)) + break; - err = skcipher_walk_done(&walk, 0); - } + if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) + memcpy(walk.dst.virt.addr, + buf + sizeof(buf) - nbytes, nbytes); - if (err) - return err; + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } while (walk.nbytes); - put_unaligned_be64(dg[1], tag); - put_unaligned_be64(dg[0], tag + 8); - put_unaligned_be32(1, iv + GCM_IV_SIZE); - aes_encrypt(&ctx->aes_key, iv, iv); - crypto_xor(tag, iv, AES_BLOCK_SIZE); + if (err) + return err; - if (crypto_memneq(tag, otag, authsize)) { - memzero_explicit(tag, AES_BLOCK_SIZE); - return -EBADMSG; - } - } - return 0; + return ret ? -EBADMSG : 0; } static struct aead_alg gcm_aes_alg = { diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 88d20f04c64a..af1fafbe7e1d 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -32,7 +32,7 @@ ({ \ const struct arch_timer_erratum_workaround *__wa; \ __wa = __this_cpu_read(timer_unstable_counter_workaround); \ - (__wa && __wa->h) ? __wa->h : arch_timer_##h; \ + (__wa && __wa->h) ? ({ isb(); __wa->h;}) : arch_timer_##h; \ }) #else @@ -52,8 +52,6 @@ struct arch_timer_erratum_workaround { enum arch_timer_erratum_match_type match_type; const void *id; const char *desc; - u32 (*read_cntp_tval_el0)(void); - u32 (*read_cntv_tval_el0)(void); u64 (*read_cntpct_el0)(void); u64 (*read_cntvct_el0)(void); int (*set_next_event_phys)(unsigned long, struct clock_event_device *); @@ -64,25 +62,28 @@ struct arch_timer_erratum_workaround { DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround); -/* inline sysreg accessors that make erratum_handler() work */ -static inline notrace u32 arch_timer_read_cntp_tval_el0(void) +static inline notrace u64 arch_timer_read_cntpct_el0(void) { - return read_sysreg(cntp_tval_el0); -} + u64 cnt; -static inline notrace u32 arch_timer_read_cntv_tval_el0(void) -{ - return read_sysreg(cntv_tval_el0); -} + asm volatile(ALTERNATIVE("isb\n mrs %0, cntpct_el0", + "nop\n" __mrs_s("%0", SYS_CNTPCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (cnt)); -static inline notrace u64 arch_timer_read_cntpct_el0(void) -{ - return read_sysreg(cntpct_el0); + return cnt; } static inline notrace u64 arch_timer_read_cntvct_el0(void) { - return read_sysreg(cntvct_el0); + u64 cnt; + + asm volatile(ALTERNATIVE("isb\n mrs %0, cntvct_el0", + "nop\n" __mrs_s("%0", SYS_CNTVCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (cnt)); + + return cnt; } #define arch_timer_reg_read_stable(reg) \ @@ -102,51 +103,58 @@ static inline notrace u64 arch_timer_read_cntvct_el0(void) * the code. */ static __always_inline -void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val) +void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u64 val) { if (access == ARCH_TIMER_PHYS_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: write_sysreg(val, cntp_ctl_el0); + isb(); break; - case ARCH_TIMER_REG_TVAL: - write_sysreg(val, cntp_tval_el0); + case ARCH_TIMER_REG_CVAL: + write_sysreg(val, cntp_cval_el0); break; + default: + BUILD_BUG(); } } else if (access == ARCH_TIMER_VIRT_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: write_sysreg(val, cntv_ctl_el0); + isb(); break; - case ARCH_TIMER_REG_TVAL: - write_sysreg(val, cntv_tval_el0); + case ARCH_TIMER_REG_CVAL: + write_sysreg(val, cntv_cval_el0); break; + default: + BUILD_BUG(); } + } else { + BUILD_BUG(); } - - isb(); } static __always_inline -u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg) +u64 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg) { if (access == ARCH_TIMER_PHYS_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: return read_sysreg(cntp_ctl_el0); - case ARCH_TIMER_REG_TVAL: - return arch_timer_reg_read_stable(cntp_tval_el0); + default: + BUILD_BUG(); } } else if (access == ARCH_TIMER_VIRT_ACCESS) { switch (reg) { case ARCH_TIMER_REG_CTRL: return read_sysreg(cntv_ctl_el0); - case ARCH_TIMER_REG_TVAL: - return arch_timer_reg_read_stable(cntv_tval_el0); + default: + BUILD_BUG(); } } - BUG(); + BUILD_BUG(); + unreachable(); } static inline u32 arch_timer_get_cntfrq(void) @@ -169,7 +177,6 @@ static __always_inline u64 __arch_counter_get_cntpct_stable(void) { u64 cnt; - isb(); cnt = arch_timer_reg_read_stable(cntpct_el0); arch_counter_enforce_ordering(cnt); return cnt; @@ -179,8 +186,10 @@ static __always_inline u64 __arch_counter_get_cntpct(void) { u64 cnt; - isb(); - cnt = read_sysreg(cntpct_el0); + asm volatile(ALTERNATIVE("isb\n mrs %0, cntpct_el0", + "nop\n" __mrs_s("%0", SYS_CNTPCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (cnt)); arch_counter_enforce_ordering(cnt); return cnt; } @@ -189,7 +198,6 @@ static __always_inline u64 __arch_counter_get_cntvct_stable(void) { u64 cnt; - isb(); cnt = arch_timer_reg_read_stable(cntvct_el0); arch_counter_enforce_ordering(cnt); return cnt; @@ -199,8 +207,10 @@ static __always_inline u64 __arch_counter_get_cntvct(void) { u64 cnt; - isb(); - cnt = read_sysreg(cntvct_el0); + asm volatile(ALTERNATIVE("isb\n mrs %0, cntvct_el0", + "nop\n" __mrs_s("%0", SYS_CNTVCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (cnt)); arch_counter_enforce_ordering(cnt); return cnt; } diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h new file mode 100644 index 000000000000..c39f2437e08e --- /dev/null +++ b/arch/arm64/include/asm/asm-extable.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_ASM_EXTABLE_H +#define __ASM_ASM_EXTABLE_H + +#define EX_TYPE_NONE 0 +#define EX_TYPE_FIXUP 1 +#define EX_TYPE_BPF 2 +#define EX_TYPE_UACCESS_ERR_ZERO 3 +#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4 + +#ifdef __ASSEMBLY__ + +#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ + .pushsection __ex_table, "a"; \ + .align 2; \ + .long ((insn) - .); \ + .long ((fixup) - .); \ + .short (type); \ + .short (data); \ + .popsection; + +/* + * Create an exception table entry for `insn`, which will branch to `fixup` + * when an unhandled fault is taken. + */ + .macro _asm_extable, insn, fixup + __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0) + .endm + +/* + * Create an exception table entry for `insn` if `fixup` is provided. Otherwise + * do nothing. + */ + .macro _cond_extable, insn, fixup + .ifnc \fixup, + _asm_extable \insn, \fixup + .endif + .endm + +#else /* __ASSEMBLY__ */ + +#include <linux/bits.h> +#include <linux/stringify.h> + +#include <asm/gpr-num.h> + +#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ + ".pushsection __ex_table, \"a\"\n" \ + ".align 2\n" \ + ".long ((" insn ") - .)\n" \ + ".long ((" fixup ") - .)\n" \ + ".short (" type ")\n" \ + ".short (" data ")\n" \ + ".popsection\n" + +#define _ASM_EXTABLE(insn, fixup) \ + __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0") + +#define EX_DATA_REG_ERR_SHIFT 0 +#define EX_DATA_REG_ERR GENMASK(4, 0) +#define EX_DATA_REG_ZERO_SHIFT 5 +#define EX_DATA_REG_ZERO GENMASK(9, 5) + +#define EX_DATA_REG(reg, gpr) \ + "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")" + +#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \ + __DEFINE_ASM_GPR_NUMS \ + __ASM_EXTABLE_RAW(#insn, #fixup, \ + __stringify(EX_TYPE_UACCESS_ERR_ZERO), \ + "(" \ + EX_DATA_REG(ERR, err) " | " \ + EX_DATA_REG(ZERO, zero) \ + ")") + +#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr) + +#define EX_DATA_REG_DATA_SHIFT 0 +#define EX_DATA_REG_DATA GENMASK(4, 0) +#define EX_DATA_REG_ADDR_SHIFT 5 +#define EX_DATA_REG_ADDR GENMASK(9, 5) + +#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \ + __DEFINE_ASM_GPR_NUMS \ + __ASM_EXTABLE_RAW(#insn, #fixup, \ + __stringify(EX_TYPE_LOAD_UNALIGNED_ZEROPAD), \ + "(" \ + EX_DATA_REG(DATA, data) " | " \ + EX_DATA_REG(ADDR, addr) \ + ")") + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index ccedf548dac9..0557af834e03 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -3,10 +3,11 @@ #define __ASM_ASM_UACCESS_H #include <asm/alternative-macros.h> +#include <asm/asm-extable.h> +#include <asm/assembler.h> #include <asm/kernel-pgtable.h> #include <asm/mmu.h> #include <asm/sysreg.h> -#include <asm/assembler.h> /* * User access enabling/disabling macros. @@ -58,6 +59,10 @@ alternative_else_nop_endif .endm #endif +#define USER(l, x...) \ +9999: x; \ + _asm_extable 9999b, l + /* * Generate the assembly for LDTR/STTR with exception table entries. * This is complicated as there is no post-increment or pair versions of the diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index bfa58409a4d4..136d13f3d6e9 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -14,9 +14,10 @@ #include <asm-generic/export.h> -#include <asm/asm-offsets.h> #include <asm/alternative.h> #include <asm/asm-bug.h> +#include <asm/asm-extable.h> +#include <asm/asm-offsets.h> #include <asm/cpufeature.h> #include <asm/cputype.h> #include <asm/debug-monitors.h> @@ -130,32 +131,6 @@ alternative_endif .endm /* - * Create an exception table entry for `insn`, which will branch to `fixup` - * when an unhandled fault is taken. - */ - .macro _asm_extable, insn, fixup - .pushsection __ex_table, "a" - .align 3 - .long (\insn - .), (\fixup - .) - .popsection - .endm - -/* - * Create an exception table entry for `insn` if `fixup` is provided. Otherwise - * do nothing. - */ - .macro _cond_extable, insn, fixup - .ifnc \fixup, - _asm_extable \insn, \fixup - .endif - .endm - - -#define USER(l, x...) \ -9999: x; \ - _asm_extable 9999b, l - -/* * Register aliases. */ lr .req x30 // link register @@ -405,19 +380,19 @@ alternative_endif /* * Macro to perform a data cache maintenance for the interval - * [start, end) + * [start, end) with dcache line size explicitly provided. * * op: operation passed to dc instruction * domain: domain used in dsb instruciton * start: starting virtual address of the region * end: end virtual address of the region + * linesz: dcache line size * fixup: optional label to branch to on user fault - * Corrupts: start, end, tmp1, tmp2 + * Corrupts: start, end, tmp */ - .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup - dcache_line_size \tmp1, \tmp2 - sub \tmp2, \tmp1, #1 - bic \start, \start, \tmp2 + .macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup + sub \tmp, \linesz, #1 + bic \start, \start, \tmp .Ldcache_op\@: .ifc \op, cvau __dcache_op_workaround_clean_cache \op, \start @@ -436,7 +411,7 @@ alternative_endif .endif .endif .endif - add \start, \start, \tmp1 + add \start, \start, \linesz cmp \start, \end b.lo .Ldcache_op\@ dsb \domain @@ -445,6 +420,22 @@ alternative_endif .endm /* + * Macro to perform a data cache maintenance for the interval + * [start, end) + * + * op: operation passed to dc instruction + * domain: domain used in dsb instruciton + * start: starting virtual address of the region + * end: end virtual address of the region + * fixup: optional label to branch to on user fault + * Corrupts: start, end, tmp1, tmp2 + */ + .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup + dcache_line_size \tmp1, \tmp2 + dcache_by_myline_op \op, \domain, \start, \end, \tmp1, \tmp2, \fixup + .endm + +/* * Macro to perform an instruction cache maintenance for the interval * [start, end) * @@ -468,6 +459,25 @@ alternative_endif .endm /* + * To prevent the possibility of old and new partial table walks being visible + * in the tlb, switch the ttbr to a zero page when we invalidate the old + * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i + * Even switching to our copied tables will cause a changed output address at + * each stage of the walk. + */ + .macro break_before_make_ttbr_switch zero_page, page_table, tmp, tmp2 + phys_to_ttbr \tmp, \zero_page + msr ttbr1_el1, \tmp + isb + tlbi vmalle1 + dsb nsh + phys_to_ttbr \tmp, \page_table + offset_ttbr1 \tmp, \tmp2 + msr ttbr1_el1, \tmp + isb + .endm + +/* * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present */ .macro reset_pmuserenr_el0, tmpreg diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 451e11e5fd23..1c5a00598458 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -23,7 +23,7 @@ #define dsb(opt) asm volatile("dsb " #opt : : : "memory") #define psb_csync() asm volatile("hint #17" : : : "memory") -#define tsb_csync() asm volatile("hint #18" : : : "memory") +#define __tsb_csync() asm volatile("hint #18" : : : "memory") #define csdb() asm volatile("hint #20" : : : "memory") #ifdef CONFIG_ARM64_PSEUDO_NMI @@ -46,6 +46,20 @@ #define dma_rmb() dmb(oshld) #define dma_wmb() dmb(oshst) + +#define tsb_csync() \ + do { \ + /* \ + * CPUs affected by Arm Erratum 2054223 or 2067961 needs \ + * another TSB to ensure the trace is flushed. The barriers \ + * don't have to be strictly back to back, as long as the \ + * CPU is in trace prohibited state. \ + */ \ + if (cpus_have_final_cap(ARM64_WORKAROUND_TSB_FLUSH_FAILURE)) \ + __tsb_csync(); \ + __tsb_csync(); \ + } while (0) + /* * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz * and 0 otherwise. diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 6231e1f0abe7..19b8441aa8f2 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -73,6 +73,8 @@ #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_CORTEX_A710 0xD47 +#define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define APM_CPU_PART_POTENZA 0x000 @@ -113,6 +115,8 @@ #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) +#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 29f97eb3dad4..a305ce256090 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -227,6 +227,9 @@ #define ESR_ELx_SYS64_ISS_SYS_CNTVCT (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 2, 14, 0) | \ ESR_ELx_SYS64_ISS_DIR_READ) +#define ESR_ELx_SYS64_ISS_SYS_CNTVCTSS (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 6, 14, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + #define ESR_ELx_SYS64_ISS_SYS_CNTFRQ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \ ESR_ELx_SYS64_ISS_DIR_READ) @@ -317,6 +320,9 @@ #define ESR_ELx_CP15_64_ISS_SYS_CNTVCT (ESR_ELx_CP15_64_ISS_SYS_VAL(1, 14) | \ ESR_ELx_CP15_64_ISS_DIR_READ) +#define ESR_ELx_CP15_64_ISS_SYS_CNTVCTSS (ESR_ELx_CP15_64_ISS_SYS_VAL(9, 14) | \ + ESR_ELx_CP15_64_ISS_DIR_READ) + #define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\ ESR_ELx_CP15_32_ISS_DIR_READ) diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h index b15eb4a3e6b2..8b300dd28def 100644 --- a/arch/arm64/include/asm/extable.h +++ b/arch/arm64/include/asm/extable.h @@ -18,10 +18,21 @@ struct exception_table_entry { int insn, fixup; + short type, data; }; #define ARCH_HAS_RELATIVE_EXTABLE +#define swap_ex_entry_fixup(a, b, tmp, delta) \ +do { \ + (a)->fixup = (b)->fixup + (delta); \ + (b)->fixup = (tmp).fixup - (delta); \ + (a)->type = (b)->type; \ + (b)->type = (tmp).type; \ + (a)->data = (b)->data; \ + (b)->data = (tmp).data; \ +} while (0) + static inline bool in_bpf_jit(struct pt_regs *regs) { if (!IS_ENABLED(CONFIG_BPF_JIT)) @@ -32,16 +43,16 @@ static inline bool in_bpf_jit(struct pt_regs *regs) } #ifdef CONFIG_BPF_JIT -int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, - struct pt_regs *regs); +bool ex_handler_bpf(const struct exception_table_entry *ex, + struct pt_regs *regs); #else /* !CONFIG_BPF_JIT */ static inline -int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, - struct pt_regs *regs) +bool ex_handler_bpf(const struct exception_table_entry *ex, + struct pt_regs *regs) { - return 0; + return false; } #endif /* !CONFIG_BPF_JIT */ -extern int fixup_exception(struct pt_regs *regs); +bool fixup_exception(struct pt_regs *regs); #endif diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 9a62884183e5..dbb4b30a5648 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -62,15 +62,13 @@ static inline size_t sve_ffr_offset(int vl) static inline void *sve_pffr(struct thread_struct *thread) { - return (char *)thread->sve_state + sve_ffr_offset(thread->sve_vl); + return (char *)thread->sve_state + sve_ffr_offset(thread_get_sve_vl(thread)); } -extern void sve_save_state(void *state, u32 *pfpsr); +extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr); extern void sve_load_state(void const *state, u32 const *pfpsr, - unsigned long vq_minus_1); -extern void sve_flush_live(unsigned long vq_minus_1); -extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state, - unsigned long vq_minus_1); + int restore_ffr); +extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); extern void sve_set_vq(unsigned long vq_minus_1); @@ -79,10 +77,6 @@ extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern u64 read_zcr_features(void); -extern int __ro_after_init sve_max_vl; -extern int __ro_after_init sve_max_virtualisable_vl; -extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX); - /* * Helpers to translate bit indices in sve_vq_map to VQ values (and * vice versa). This allows find_next_bit() to be used to find the @@ -98,15 +92,29 @@ static inline unsigned int __bit_to_vq(unsigned int bit) return SVE_VQ_MAX - bit; } -/* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */ -static inline bool sve_vq_available(unsigned int vq) -{ - return test_bit(__vq_to_bit(vq), sve_vq_map); -} -#ifdef CONFIG_ARM64_SVE +struct vl_info { + enum vec_type type; + const char *name; /* For display purposes */ -extern size_t sve_state_size(struct task_struct const *task); + /* Minimum supported vector length across all CPUs */ + int min_vl; + + /* Maximum supported vector length across all CPUs */ + int max_vl; + int max_virtualisable_vl; + + /* + * Set of available vector lengths, + * where length vq encoded as bit __vq_to_bit(vq): + */ + DECLARE_BITMAP(vq_map, SVE_VQ_MAX); + + /* Set of vector lengths present on at least one cpu: */ + DECLARE_BITMAP(vq_partial_map, SVE_VQ_MAX); +}; + +#ifdef CONFIG_ARM64_SVE extern void sve_alloc(struct task_struct *task); extern void fpsimd_release_task(struct task_struct *task); @@ -143,11 +151,63 @@ static inline void sve_user_enable(void) * Probing and setup functions. * Calls to these functions must be serialised with one another. */ -extern void __init sve_init_vq_map(void); -extern void sve_update_vq_map(void); -extern int sve_verify_vq_map(void); +enum vec_type; + +extern void __init vec_init_vq_map(enum vec_type type); +extern void vec_update_vq_map(enum vec_type type); +extern int vec_verify_vq_map(enum vec_type type); extern void __init sve_setup(void); +extern __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX]; + +static inline void write_vl(enum vec_type type, u64 val) +{ + u64 tmp; + + switch (type) { +#ifdef CONFIG_ARM64_SVE + case ARM64_VEC_SVE: + tmp = read_sysreg_s(SYS_ZCR_EL1) & ~ZCR_ELx_LEN_MASK; + write_sysreg_s(tmp | val, SYS_ZCR_EL1); + break; +#endif + default: + WARN_ON_ONCE(1); + break; + } +} + +static inline int vec_max_vl(enum vec_type type) +{ + return vl_info[type].max_vl; +} + +static inline int vec_max_virtualisable_vl(enum vec_type type) +{ + return vl_info[type].max_virtualisable_vl; +} + +static inline int sve_max_vl(void) +{ + return vec_max_vl(ARM64_VEC_SVE); +} + +static inline int sve_max_virtualisable_vl(void) +{ + return vec_max_virtualisable_vl(ARM64_VEC_SVE); +} + +/* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */ +static inline bool vq_available(enum vec_type type, unsigned int vq) +{ + return test_bit(__vq_to_bit(vq), vl_info[type].vq_map); +} + +static inline bool sve_vq_available(unsigned int vq) +{ + return vq_available(ARM64_VEC_SVE, vq); +} + #else /* ! CONFIG_ARM64_SVE */ static inline void sve_alloc(struct task_struct *task) { } @@ -155,6 +215,11 @@ static inline void fpsimd_release_task(struct task_struct *task) { } static inline void sve_sync_to_fpsimd(struct task_struct *task) { } static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { } +static inline int sve_max_virtualisable_vl(void) +{ + return 0; +} + static inline int sve_set_current_vl(unsigned long arg) { return -EINVAL; @@ -165,14 +230,21 @@ static inline int sve_get_current_vl(void) return -EINVAL; } +static inline int sve_max_vl(void) +{ + return -EINVAL; +} + +static inline bool sve_vq_available(unsigned int vq) { return false; } + static inline void sve_user_disable(void) { BUILD_BUG(); } static inline void sve_user_enable(void) { BUILD_BUG(); } #define sve_cond_update_zcr_vq(val, reg) do { } while (0) -static inline void sve_init_vq_map(void) { } -static inline void sve_update_vq_map(void) { } -static inline int sve_verify_vq_map(void) { return 0; } +static inline void vec_init_vq_map(enum vec_type t) { } +static inline void vec_update_vq_map(enum vec_type t) { } +static inline int vec_verify_vq_map(enum vec_type t) { return 0; } static inline void sve_setup(void) { } #endif /* ! CONFIG_ARM64_SVE */ diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 00a2c0b69c2b..2509d7dde55a 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -217,28 +217,36 @@ .macro sve_flush_z _for n, 0, 31, _sve_flush_z \n .endm -.macro sve_flush_p_ffr +.macro sve_flush_p _for n, 0, 15, _sve_pfalse \n +.endm +.macro sve_flush_ffr _sve_wrffr 0 .endm -.macro sve_save nxbase, xpfpsr, nxtmp +.macro sve_save nxbase, xpfpsr, save_ffr, nxtmp _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34 _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 + cbz \save_ffr, 921f _sve_rdffr 0 _sve_str_p 0, \nxbase _sve_ldr_p 0, \nxbase, -16 - + b 922f +921: + str xzr, [x\nxbase] // Zero out FFR +922: mrs x\nxtmp, fpsr str w\nxtmp, [\xpfpsr] mrs x\nxtmp, fpcr str w\nxtmp, [\xpfpsr, #4] .endm -.macro __sve_load nxbase, xpfpsr, nxtmp +.macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34 + cbz \restore_ffr, 921f _sve_ldr_p 0, \nxbase _sve_wrffr 0 +921: _for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16 ldr w\nxtmp, [\xpfpsr] @@ -246,8 +254,3 @@ ldr w\nxtmp, [\xpfpsr, #4] msr fpcr, x\nxtmp .endm - -.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2 - sve_load_vq \xvqminus1, x\nxtmp, \xtmp2 - __sve_load \nxbase, \xpfpsr, \nxtmp -.endm diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index 91fa4baa1a93..347b0cc68f07 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -15,7 +15,7 @@ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS #define ARCH_SUPPORTS_FTRACE_OPS 1 #else -#define MCOUNT_ADDR ((unsigned long)_mcount) +#define MCOUNT_ADDR ((unsigned long)function_nocfi(_mcount)) #endif /* The BL at the callsite's adjusted rec->ip */ diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 8e41faa37c69..bc06691d2062 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -25,19 +25,14 @@ do { \ " cbz %w0, 3f\n" \ " sub %w4, %w4, %w0\n" \ " cbnz %w4, 1b\n" \ -" mov %w0, %w7\n" \ +" mov %w0, %w6\n" \ "3:\n" \ " dmb ish\n" \ -" .pushsection .fixup,\"ax\"\n" \ -" .align 2\n" \ -"4: mov %w0, %w6\n" \ -" b 3b\n" \ -" .popsection\n" \ - _ASM_EXTABLE(1b, 4b) \ - _ASM_EXTABLE(2b, 4b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w0) \ + _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w0) \ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp), \ "+r" (loops) \ - : "r" (oparg), "Ir" (-EFAULT), "Ir" (-EAGAIN) \ + : "r" (oparg), "Ir" (-EAGAIN) \ : "memory"); \ uaccess_disable_privileged(); \ } while (0) @@ -105,18 +100,14 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr, " cbz %w3, 3f\n" " sub %w4, %w4, %w3\n" " cbnz %w4, 1b\n" -" mov %w0, %w8\n" +" mov %w0, %w7\n" "3:\n" " dmb ish\n" "4:\n" -" .pushsection .fixup,\"ax\"\n" -"5: mov %w0, %w7\n" -" b 4b\n" -" .popsection\n" - _ASM_EXTABLE(1b, 5b) - _ASM_EXTABLE(2b, 5b) + _ASM_EXTABLE_UACCESS_ERR(1b, 4b, %w0) + _ASM_EXTABLE_UACCESS_ERR(2b, 4b, %w0) : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops) - : "r" (oldval), "r" (newval), "Ir" (-EFAULT), "Ir" (-EAGAIN) + : "r" (oldval), "r" (newval), "Ir" (-EAGAIN) : "memory"); uaccess_disable_privileged(); diff --git a/arch/arm64/include/asm/gpr-num.h b/arch/arm64/include/asm/gpr-num.h new file mode 100644 index 000000000000..05da4a7c5788 --- /dev/null +++ b/arch/arm64/include/asm/gpr-num.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_GPR_NUM_H +#define __ASM_GPR_NUM_H + +#ifdef __ASSEMBLY__ + + .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 + .equ .L__gpr_num_x\num, \num + .equ .L__gpr_num_w\num, \num + .endr + .equ .L__gpr_num_xzr, 31 + .equ .L__gpr_num_wzr, 31 + +#else /* __ASSEMBLY__ */ + +#define __DEFINE_ASM_GPR_NUMS \ +" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ +" .equ .L__gpr_num_x\\num, \\num\n" \ +" .equ .L__gpr_num_w\\num, \\num\n" \ +" .endr\n" \ +" .equ .L__gpr_num_xzr, 31\n" \ +" .equ .L__gpr_num_wzr, 31\n" + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_GPR_NUM_H */ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 8c129db8232a..b100e0055eab 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -105,6 +105,7 @@ #define KERNEL_HWCAP_RNG __khwcap2_feature(RNG) #define KERNEL_HWCAP_BTI __khwcap2_feature(BTI) #define KERNEL_HWCAP_MTE __khwcap2_feature(MTE) +#define KERNEL_HWCAP_ECV __khwcap2_feature(ECV) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 00dbcc71aeb2..9839bfc163d7 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -90,12 +90,24 @@ static inline void crash_prepare_suspend(void) {} static inline void crash_post_resume(void) {} #endif +#if defined(CONFIG_KEXEC_CORE) +void cpu_soft_restart(unsigned long el2_switch, unsigned long entry, + unsigned long arg0, unsigned long arg1, + unsigned long arg2); +#endif + #define ARCH_HAS_KIMAGE_ARCH struct kimage_arch { void *dtb; phys_addr_t dtb_mem; phys_addr_t kern_reloc; + phys_addr_t el2_vectors; + phys_addr_t ttbr0; + phys_addr_t ttbr1; + phys_addr_t zero_page; + unsigned long phys_offset; + unsigned long t0sz; }; #ifdef CONFIG_KEXEC_FILE diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h index 5d38ff4a4806..05cd82eeca13 100644 --- a/arch/arm64/include/asm/kprobes.h +++ b/arch/arm64/include/asm/kprobes.h @@ -39,7 +39,7 @@ void arch_remove_kprobe(struct kprobe *); int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr); int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); -void kretprobe_trampoline(void); +void __kretprobe_trampoline(void); void __kprobes *trampoline_probe_handler(struct pt_regs *regs); #endif /* CONFIG_KPROBES */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 0167f3702c61..50d5e4de244c 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -271,9 +271,10 @@ extern u64 __kvm_get_mdcr_el2(void); /* * KVM extable for unexpected exceptions. - * In the same format _asm_extable, but output to a different section so that - * it can be mapped to EL2. The KVM version is not sorted. The caller must - * ensure: + * Create a struct kvm_exception_table_entry output to a section that can be + * mapped by EL2. The table is not sorted. + * + * The caller must ensure: * x18 has the hypervisor value to allow any Shadow-Call-Stack instrumented * code to write to it, and that SPSR_EL2 and ELR_EL2 are restored by the fixup. */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index f1745a843414..1b9a1e242612 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -243,6 +243,7 @@ static inline const void *__tag_set(const void *addr, u8 tag) #ifdef CONFIG_KASAN_HW_TAGS #define arch_enable_tagging_sync() mte_enable_kernel_sync() #define arch_enable_tagging_async() mte_enable_kernel_async() +#define arch_enable_tagging_asymm() mte_enable_kernel_asymm() #define arch_force_async_tag_fault() mte_check_tfsr_exit() #define arch_get_random_tag() mte_get_random_tag() #define arch_get_mem_tag(addr) mte_get_mem_tag(addr) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index f4ba93d4ffeb..6770667b34a3 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -116,6 +116,30 @@ static inline void cpu_install_idmap(void) } /* + * Load our new page tables. A strict BBM approach requires that we ensure that + * TLBs are free of any entries that may overlap with the global mappings we are + * about to install. + * + * For a real hibernate/resume/kexec cycle TTBR0 currently points to a zero + * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI runtime + * services), while for a userspace-driven test_resume cycle it points to + * userspace page tables (and we must point it at a zero page ourselves). + * + * We change T0SZ as part of installing the idmap. This is undone by + * cpu_uninstall_idmap() in __cpu_suspend_exit(). + */ +static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz) +{ + cpu_set_reserved_ttbr0(); + local_flush_tlb_all(); + __cpu_set_tcr_t0sz(t0sz); + + /* avoid cpu_switch_mm() and its SW-PAN and CNP interactions */ + write_sysreg(ttbr0, ttbr0_el1); + isb(); +} + +/* * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, * avoiding the possibility of conflicting TLB entries being allocated. */ diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h index 22420e1f8c03..478b9bcf69ad 100644 --- a/arch/arm64/include/asm/mte-kasan.h +++ b/arch/arm64/include/asm/mte-kasan.h @@ -130,6 +130,7 @@ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag, void mte_enable_kernel_sync(void); void mte_enable_kernel_async(void); +void mte_enable_kernel_asymm(void); #else /* CONFIG_ARM64_MTE */ @@ -161,6 +162,10 @@ static inline void mte_enable_kernel_async(void) { } +static inline void mte_enable_kernel_asymm(void) +{ +} + #endif /* CONFIG_ARM64_MTE */ #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 02511650cffe..075539f5f1c8 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -88,11 +88,11 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child, #ifdef CONFIG_KASAN_HW_TAGS /* Whether the MTE asynchronous mode is enabled. */ -DECLARE_STATIC_KEY_FALSE(mte_async_mode); +DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode); -static inline bool system_uses_mte_async_mode(void) +static inline bool system_uses_mte_async_or_asymm_mode(void) { - return static_branch_unlikely(&mte_async_mode); + return static_branch_unlikely(&mte_async_or_asymm_mode); } void mte_check_tfsr_el1(void); @@ -121,7 +121,7 @@ static inline void mte_check_tfsr_exit(void) mte_check_tfsr_el1(); } #else -static inline bool system_uses_mte_async_mode(void) +static inline bool system_uses_mte_async_or_asymm_mode(void) { return false; } diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index f98c91bbd7c1..993a27ea6f54 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -41,7 +41,6 @@ void tag_clear_highpage(struct page *to); typedef struct page *pgtable_t; -int pfn_valid(unsigned long pfn); int pfn_is_map_memory(unsigned long pfn); #include <asm/memory.h> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index dfa76afa0ccf..84fbb52b4224 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1022,6 +1022,11 @@ static inline pgprot_t arch_filter_pgprot(pgprot_t prot) return PAGE_READONLY_EXEC; } +static inline bool pud_sect_supported(void) +{ + return PAGE_SIZE == SZ_4K; +} + #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index ee2bdc1b9f5b..6f41b65f9962 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -115,6 +115,11 @@ struct debug_info { #endif }; +enum vec_type { + ARM64_VEC_SVE = 0, + ARM64_VEC_MAX, +}; + struct cpu_context { unsigned long x19; unsigned long x20; @@ -147,8 +152,8 @@ struct thread_struct { unsigned int fpsimd_cpu; void *sve_state; /* SVE registers, if any */ - unsigned int sve_vl; /* SVE vector length */ - unsigned int sve_vl_onexec; /* SVE vl after next exec */ + unsigned int vl[ARM64_VEC_MAX]; /* vector length */ + unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */ unsigned long fault_address; /* fault info */ unsigned long fault_code; /* ESR_EL1 value */ struct debug_info debug; /* debugging */ @@ -164,6 +169,46 @@ struct thread_struct { u64 sctlr_user; }; +static inline unsigned int thread_get_vl(struct thread_struct *thread, + enum vec_type type) +{ + return thread->vl[type]; +} + +static inline unsigned int thread_get_sve_vl(struct thread_struct *thread) +{ + return thread_get_vl(thread, ARM64_VEC_SVE); +} + +unsigned int task_get_vl(const struct task_struct *task, enum vec_type type); +void task_set_vl(struct task_struct *task, enum vec_type type, + unsigned long vl); +void task_set_vl_onexec(struct task_struct *task, enum vec_type type, + unsigned long vl); +unsigned int task_get_vl_onexec(const struct task_struct *task, + enum vec_type type); + +static inline unsigned int task_get_sve_vl(const struct task_struct *task) +{ + return task_get_vl(task, ARM64_VEC_SVE); +} + +static inline void task_set_sve_vl(struct task_struct *task, unsigned long vl) +{ + task_set_vl(task, ARM64_VEC_SVE, vl); +} + +static inline unsigned int task_get_sve_vl_onexec(const struct task_struct *task) +{ + return task_get_vl_onexec(task, ARM64_VEC_SVE); +} + +static inline void task_set_sve_vl_onexec(struct task_struct *task, + unsigned long vl) +{ + task_set_vl_onexec(task, ARM64_VEC_SVE, vl); +} + #define SCTLR_USER_MASK \ (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | SCTLR_ELx_ENDA | SCTLR_ELx_ENDB | \ SCTLR_EL1_TCF0_MASK) @@ -257,7 +302,7 @@ struct task_struct; /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); -unsigned long get_wchan(struct task_struct *p); +unsigned long __get_wchan(struct task_struct *p); void update_sctlr_el1(u64 sctlr); diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index e4ad9db53af1..152cb35bf9df 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -21,5 +21,6 @@ extern char __exittext_begin[], __exittext_end[]; extern char __irqentry_text_start[], __irqentry_text_end[]; extern char __mmuoff_data_start[], __mmuoff_data_end[]; extern char __entry_tramp_text_start[], __entry_tramp_text_end[]; +extern char __relocate_new_kernel_start[], __relocate_new_kernel_end[]; #endif /* __ASM_SECTIONS_H */ diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h index d3320618ed14..6437df661700 100644 --- a/arch/arm64/include/asm/setup.h +++ b/arch/arm64/include/asm/setup.h @@ -8,4 +8,10 @@ void *get_early_fdt_ptr(void); void early_fdt_map(u64 dt_phys); +/* + * These two variables are used in the head.S file. + */ +extern phys_addr_t __fdt_pointer __initdata; +extern u64 __cacheline_aligned boot_args[4]; + #endif diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 8aebc00c1718..a4e046ef4568 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -9,6 +9,7 @@ #include <linux/sched.h> #include <linux/sched/task_stack.h> #include <linux/types.h> +#include <linux/llist.h> #include <asm/memory.h> #include <asm/ptrace.h> @@ -59,6 +60,9 @@ struct stackframe { #ifdef CONFIG_FUNCTION_GRAPH_TRACER int graph; #endif +#ifdef CONFIG_KRETPROBES + struct llist_node *kr_cur; +#endif }; extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 9412a645a1c0..16b3f1a1d468 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -13,6 +13,8 @@ #include <linux/stringify.h> #include <linux/kasan-tags.h> +#include <asm/gpr-num.h> + /* * ARMv8 ARM reserves the following encoding for system registers: * (Ref: ARMv8 ARM, Section: "System instruction class encoding overview", @@ -507,6 +509,9 @@ #define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) +#define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5) +#define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6) + #define SYS_CNTP_TVAL_EL0 sys_reg(3, 3, 14, 2, 0) #define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) #define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) @@ -621,6 +626,7 @@ #define SCTLR_ELx_TCF_NONE (UL(0x0) << SCTLR_ELx_TCF_SHIFT) #define SCTLR_ELx_TCF_SYNC (UL(0x1) << SCTLR_ELx_TCF_SHIFT) #define SCTLR_ELx_TCF_ASYNC (UL(0x2) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_TCF_ASYMM (UL(0x3) << SCTLR_ELx_TCF_SHIFT) #define SCTLR_ELx_TCF_MASK (UL(0x3) << SCTLR_ELx_TCF_SHIFT) #define SCTLR_ELx_ENIA_SHIFT 31 @@ -666,6 +672,7 @@ #define SCTLR_EL1_TCF0_NONE (UL(0x0) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_TCF0_SYNC (UL(0x1) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_TCF0_ASYNC (UL(0x2) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_ASYMM (UL(0x3) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_TCF0_MASK (UL(0x3) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_BT1 (BIT(36)) @@ -807,6 +814,7 @@ #define ID_AA64PFR1_MTE_NI 0x0 #define ID_AA64PFR1_MTE_EL0 0x1 #define ID_AA64PFR1_MTE 0x2 +#define ID_AA64PFR1_MTE_ASYMM 0x3 /* id_aa64zfr0 */ #define ID_AA64ZFR0_F64MM_SHIFT 56 @@ -1195,17 +1203,12 @@ #ifdef __ASSEMBLY__ - .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 - .equ .L__reg_num_x\num, \num - .endr - .equ .L__reg_num_xzr, 31 - .macro mrs_s, rt, sreg - __emit_inst(0xd5200000|(\sreg)|(.L__reg_num_\rt)) + __emit_inst(0xd5200000|(\sreg)|(.L__gpr_num_\rt)) .endm .macro msr_s, sreg, rt - __emit_inst(0xd5000000|(\sreg)|(.L__reg_num_\rt)) + __emit_inst(0xd5000000|(\sreg)|(.L__gpr_num_\rt)) .endm #else @@ -1214,22 +1217,16 @@ #include <linux/types.h> #include <asm/alternative.h> -#define __DEFINE_MRS_MSR_S_REGNUM \ -" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ -" .equ .L__reg_num_x\\num, \\num\n" \ -" .endr\n" \ -" .equ .L__reg_num_xzr, 31\n" - #define DEFINE_MRS_S \ - __DEFINE_MRS_MSR_S_REGNUM \ + __DEFINE_ASM_GPR_NUMS \ " .macro mrs_s, rt, sreg\n" \ - __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) \ + __emit_inst(0xd5200000|(\\sreg)|(.L__gpr_num_\\rt)) \ " .endm\n" #define DEFINE_MSR_S \ - __DEFINE_MRS_MSR_S_REGNUM \ + __DEFINE_ASM_GPR_NUMS \ " .macro msr_s, sreg, rt\n" \ - __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) \ + __emit_inst(0xd5000000|(\\sreg)|(.L__gpr_num_\\rt)) \ " .endm\n" #define UNDEFINE_MRS_S \ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 6623c99f0984..e1317b7c4525 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -42,6 +42,7 @@ struct thread_info { void *scs_base; void *scs_sp; #endif + u32 cpu; }; #define thread_saved_pc(tsk) \ @@ -78,7 +79,7 @@ int arch_dup_task_struct(struct task_struct *dst, #define TIF_SINGLESTEP 21 #define TIF_32BIT 22 /* 32bit process */ #define TIF_SVE 23 /* Scalable Vector Extension in use */ -#define TIF_SVE_VL_INHERIT 24 /* Inherit sve_vl_onexec across exec */ +#define TIF_SVE_VL_INHERIT 24 /* Inherit SVE vl_onexec across exec */ #define TIF_SSBD 25 /* Wants SSB mitigation */ #define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */ diff --git a/arch/arm64/include/asm/trans_pgd.h b/arch/arm64/include/asm/trans_pgd.h index 5d08e5adf3d5..033d400a4ea4 100644 --- a/arch/arm64/include/asm/trans_pgd.h +++ b/arch/arm64/include/asm/trans_pgd.h @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (c) 2020, Microsoft Corporation. - * Pavel Tatashin <pasha.tatashin@soleen.com> + * Copyright (c) 2021, Microsoft Corporation. + * Pasha Tatashin <pasha.tatashin@soleen.com> */ #ifndef _ASM_TRANS_TABLE_H @@ -15,7 +15,7 @@ /* * trans_alloc_page * - Allocator that should return exactly one zeroed page, if this - * allocator fails, trans_pgd_create_copy() and trans_pgd_map_page() + * allocator fails, trans_pgd_create_copy() and trans_pgd_idmap_page() * return -ENOMEM error. * * trans_alloc_arg @@ -30,10 +30,12 @@ struct trans_pgd_info { int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **trans_pgd, unsigned long start, unsigned long end); -int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd, - void *page, unsigned long dst_addr, pgprot_t pgprot); - int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, unsigned long *t0sz, void *page); +int trans_pgd_copy_el2_vectors(struct trans_pgd_info *info, + phys_addr_t *el2_vectors); + +extern char trans_pgd_stub_vectors[]; + #endif /* _ASM_TRANS_TABLE_H */ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 190b494e22ab..6e2e0b7031ab 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -18,6 +18,7 @@ #include <linux/kasan-checks.h> #include <linux/string.h> +#include <asm/asm-extable.h> #include <asm/cpufeature.h> #include <asm/mmu.h> #include <asm/mte.h> @@ -70,12 +71,6 @@ static inline unsigned long __range_ok(const void __user *addr, unsigned long si #define access_ok(addr, size) __range_ok(addr, size) -#define _ASM_EXTABLE(from, to) \ - " .pushsection __ex_table, \"a\"\n" \ - " .align 3\n" \ - " .long (" #from " - .), (" #to " - .)\n" \ - " .popsection\n" - /* * User access enabling/disabling. */ @@ -196,13 +191,13 @@ static inline void __uaccess_enable_tco(void) */ static inline void __uaccess_disable_tco_async(void) { - if (system_uses_mte_async_mode()) + if (system_uses_mte_async_or_asymm_mode()) __uaccess_disable_tco(); } static inline void __uaccess_enable_tco_async(void) { - if (system_uses_mte_async_mode()) + if (system_uses_mte_async_or_asymm_mode()) __uaccess_enable_tco(); } @@ -260,15 +255,9 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) asm volatile( \ "1: " load " " reg "1, [%2]\n" \ "2:\n" \ - " .section .fixup, \"ax\"\n" \ - " .align 2\n" \ - "3: mov %w0, %3\n" \ - " mov %1, #0\n" \ - " b 2b\n" \ - " .previous\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \ : "+r" (err), "=&r" (x) \ - : "r" (addr), "i" (-EFAULT)) + : "r" (addr)) #define __raw_get_mem(ldr, x, ptr, err) \ do { \ @@ -337,14 +326,9 @@ do { \ asm volatile( \ "1: " store " " reg "1, [%2]\n" \ "2:\n" \ - " .section .fixup,\"ax\"\n" \ - " .align 2\n" \ - "3: mov %w0, %3\n" \ - " b 2b\n" \ - " .previous\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) \ : "+r" (err) \ - : "r" (x), "r" (addr), "i" (-EFAULT)) + : "r" (x), "r" (addr)) #define __raw_put_mem(str, x, ptr, err) \ do { \ diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 3cb206aea3db..6bdb5f5db438 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,7 +38,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 449 +#define __NR_compat_syscalls 450 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 844f6ae58662..41ea1195e44b 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -903,6 +903,8 @@ __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) #define __NR_process_mrelease 448 __SYSCALL(__NR_process_mrelease, sys_process_mrelease) +#define __NR_futex_waitv 449 +__SYSCALL(__NR_futex_waitv, sys_futex_waitv) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h index 3fd8fd6d8fc2..3ac35f4a667c 100644 --- a/arch/arm64/include/asm/vdso/compat_barrier.h +++ b/arch/arm64/include/asm/vdso/compat_barrier.h @@ -20,16 +20,9 @@ #define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory") -#if __LINUX_ARM_ARCH__ >= 8 && defined(CONFIG_AS_DMB_ISHLD) #define aarch32_smp_mb() dmb(ish) #define aarch32_smp_rmb() dmb(ishld) #define aarch32_smp_wmb() dmb(ishst) -#else -#define aarch32_smp_mb() dmb(ish) -#define aarch32_smp_rmb() aarch32_smp_mb() -#define aarch32_smp_wmb() dmb(ishst) -#endif - #undef smp_mb #undef smp_rmb diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 7379f35ae2c6..3c8af033a997 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -67,6 +67,8 @@ */ extern u32 __boot_cpu_mode[2]; +#define ARM64_VECTOR_TABLE_LEN SZ_2K + void __hyp_set_vectors(phys_addr_t phys_vector_base); void __hyp_reset_vectors(void); @@ -128,6 +130,11 @@ static __always_inline bool is_protected_kvm_enabled(void) return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE); } +static inline bool is_hyp_nvhe(void) +{ + return is_hyp_mode_available() && !is_kernel_in_hyp_mode(); +} + #endif /* __ASSEMBLY__ */ #endif /* ! __ASM__VIRT_H */ diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h index 7a22aeea9bb5..b9185503feae 100644 --- a/arch/arm64/include/asm/vmalloc.h +++ b/arch/arm64/include/asm/vmalloc.h @@ -2,6 +2,7 @@ #define _ASM_ARM64_VMALLOC_H #include <asm/page.h> +#include <asm/pgtable.h> #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP @@ -9,10 +10,9 @@ static inline bool arch_vmap_pud_supported(pgprot_t prot) { /* - * Only 4k granule supports level 1 block mappings. * SW table walks can't handle removal of intermediate entries. */ - return IS_ENABLED(CONFIG_ARM64_4K_PAGES) && + return pud_sect_supported() && !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); } diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index 2dcb104c645b..1c8e4f2490bf 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -53,29 +53,16 @@ static inline unsigned long find_zero(unsigned long mask) */ static inline unsigned long load_unaligned_zeropad(const void *addr) { - unsigned long ret, tmp; + unsigned long ret; __uaccess_enable_tco_async(); /* Load word from unaligned pointer addr */ asm( - "1: ldr %0, %3\n" + "1: ldr %0, %2\n" "2:\n" - " .pushsection .fixup,\"ax\"\n" - " .align 2\n" - "3: bic %1, %2, #0x7\n" - " ldr %0, [%1]\n" - " and %1, %2, #0x7\n" - " lsl %1, %1, #0x3\n" -#ifndef __AARCH64EB__ - " lsr %0, %0, %1\n" -#else - " lsl %0, %0, %1\n" -#endif - " b 2b\n" - " .popsection\n" - _ASM_EXTABLE(1b, 3b) - : "=&r" (ret), "=&r" (tmp) + _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(1b, 2b, %0, %1) + : "=&r" (ret) : "r" (addr), "Q" (*(unsigned long *)addr)); __uaccess_disable_tco_async(); diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index b8f41aa234ee..7b23b16f21ce 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -75,5 +75,6 @@ #define HWCAP2_RNG (1 << 16) #define HWCAP2_BTI (1 << 17) #define HWCAP2_MTE (1 << 18) +#define HWCAP2_ECV (1 << 19) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 0e86e8b9cedd..6875a16b09d2 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -279,7 +279,7 @@ static void __init register_insn_emulation_sysctl(void) do { \ uaccess_enable_privileged(); \ __asm__ __volatile__( \ - " mov %w3, %w7\n" \ + " mov %w3, %w6\n" \ "0: ldxr"B" %w2, [%4]\n" \ "1: stxr"B" %w0, %w1, [%4]\n" \ " cbz %w0, 2f\n" \ @@ -290,16 +290,10 @@ do { \ "2:\n" \ " mov %w1, %w2\n" \ "3:\n" \ - " .pushsection .fixup,\"ax\"\n" \ - " .align 2\n" \ - "4: mov %w0, %w6\n" \ - " b 3b\n" \ - " .popsection" \ - _ASM_EXTABLE(0b, 4b) \ - _ASM_EXTABLE(1b, 4b) \ + _ASM_EXTABLE_UACCESS_ERR(0b, 3b, %w0) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w0) \ : "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2) \ : "r" ((unsigned long)addr), "i" (-EAGAIN), \ - "i" (-EFAULT), \ "i" (__SWP_LL_SC_LOOPS) \ : "memory"); \ uaccess_disable_privileged(); \ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 551427ae8cc5..6d0c3afd36b8 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -9,6 +9,7 @@ #include <linux/arm_sdei.h> #include <linux/sched.h> +#include <linux/kexec.h> #include <linux/mm.h> #include <linux/dma-mapping.h> #include <linux/kvm_host.h> @@ -27,8 +28,8 @@ int main(void) { DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); - DEFINE(TSK_CPU, offsetof(struct task_struct, cpu)); BLANK(); + DEFINE(TSK_TI_CPU, offsetof(struct task_struct, thread_info.cpu)); DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); #ifdef CONFIG_ARM64_SW_TTBR0_PAN @@ -171,5 +172,15 @@ int main(void) #endif BLANK(); #endif +#ifdef CONFIG_KEXEC_CORE + DEFINE(KIMAGE_ARCH_DTB_MEM, offsetof(struct kimage, arch.dtb_mem)); + DEFINE(KIMAGE_ARCH_EL2_VECTORS, offsetof(struct kimage, arch.el2_vectors)); + DEFINE(KIMAGE_ARCH_ZERO_PAGE, offsetof(struct kimage, arch.zero_page)); + DEFINE(KIMAGE_ARCH_PHYS_OFFSET, offsetof(struct kimage, arch.phys_offset)); + DEFINE(KIMAGE_ARCH_TTBR1, offsetof(struct kimage, arch.ttbr1)); + DEFINE(KIMAGE_HEAD, offsetof(struct kimage, head)); + DEFINE(KIMAGE_START, offsetof(struct kimage, start)); + BLANK(); +#endif return 0; } diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index d47ff63a5b66..48a8af97faa9 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -16,8 +16,7 @@ .pushsection .idmap.text, "awx" /* - * __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for - * cpu_soft_restart. + * cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) * * @el2_switch: Flag to indicate a switch to EL2 is needed. * @entry: Location to jump to for soft reset. @@ -29,7 +28,7 @@ * branch to what would be the reset vector. It must be executed with the * flat identity mapping. */ -SYM_CODE_START(__cpu_soft_restart) +SYM_CODE_START(cpu_soft_restart) mov_q x12, INIT_SCTLR_EL1_MMU_OFF pre_disable_mmu_workaround /* @@ -48,6 +47,6 @@ SYM_CODE_START(__cpu_soft_restart) mov x1, x3 // arg1 mov x2, x4 // arg2 br x8 -SYM_CODE_END(__cpu_soft_restart) +SYM_CODE_END(cpu_soft_restart) .popsection diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h deleted file mode 100644 index 9a7b1262ef17..000000000000 --- a/arch/arm64/kernel/cpu-reset.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * CPU reset routines - * - * Copyright (C) 2015 Huawei Futurewei Technologies. - */ - -#ifndef _ARM64_CPU_RESET_H -#define _ARM64_CPU_RESET_H - -#include <asm/virt.h> - -void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry, - unsigned long arg0, unsigned long arg1, unsigned long arg2); - -static inline void __noreturn __nocfi cpu_soft_restart(unsigned long entry, - unsigned long arg0, - unsigned long arg1, - unsigned long arg2) -{ - typeof(__cpu_soft_restart) *restart; - - unsigned long el2_switch = !is_kernel_in_hyp_mode() && - is_hyp_mode_available(); - restart = (void *)__pa_symbol(function_nocfi(__cpu_soft_restart)); - - cpu_install_idmap(); - restart(el2_switch, entry, arg0, arg1, arg2); - unreachable(); -} - -#endif diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index e2c20c036442..9e1c1aef9ebd 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -340,6 +340,42 @@ static const struct midr_range erratum_1463225[] = { }; #endif +#ifdef CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE +static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { +#ifdef CONFIG_ARM64_ERRATUM_2139208 + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2119858 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), +#endif + {}, +}; +#endif /* CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE */ + +#ifdef CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE +static const struct midr_range tsb_flush_fail_cpus[] = { +#ifdef CONFIG_ARM64_ERRATUM_2067961 + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2054223 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), +#endif + {}, +}; +#endif /* CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE */ + +#ifdef CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE +static struct midr_range trbe_write_out_of_range_cpus[] = { +#ifdef CONFIG_ARM64_ERRATUM_2253138 + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2224489 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), +#endif + {}, +}; +#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */ + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -534,6 +570,34 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), }, #endif +#ifdef CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + { + /* + * The erratum work around is handled within the TRBE + * driver and can be applied per-cpu. So, we can allow + * a late CPU to come online with this erratum. + */ + .desc = "ARM erratum 2119858 or 2139208", + .capability = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + CAP_MIDR_RANGE_LIST(trbe_overwrite_fill_mode_cpus), + }, +#endif +#ifdef CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE + { + .desc = "ARM erratum 2067961 or 2054223", + .capability = ARM64_WORKAROUND_TSB_FLUSH_FAILURE, + ERRATA_MIDR_RANGE_LIST(tsb_flush_fail_cpus), + }, +#endif +#ifdef CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE + { + .desc = "ARM erratum 2253138 or 2224489", + .capability = ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus), + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6ec7036ef7e1..ecbdff795f5e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -279,7 +279,7 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EXS_SHIFT, 4, 0), /* @@ -941,7 +941,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) { init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr); - sve_init_vq_map(); + vec_init_vq_map(ARM64_VEC_SVE); } if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) @@ -1175,7 +1175,7 @@ void update_cpu_features(int cpu, /* Probe vector lengths, unless we already gave up on SVE */ if (id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) && !system_capabilities_finalized()) - sve_update_vq_map(); + vec_update_vq_map(ARM64_VEC_SVE); } /* @@ -1930,6 +1930,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .min_field_value = 1, }, + { + .desc = "Enhanced Counter Virtualization", + .capability = ARM64_HAS_ECV, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR0_EL1, + .field_pos = ID_AA64MMFR0_ECV_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = 1, + }, #ifdef CONFIG_ARM64_PAN { .desc = "Privileged Access Never", @@ -2321,6 +2331,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .cpu_enable = cpu_enable_mte, }, + { + .desc = "Asymmetric MTE Tag Check Fault", + .capability = ARM64_MTE_ASYMM, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .field_pos = ID_AA64PFR1_MTE_SHIFT, + .min_field_value = ID_AA64PFR1_MTE_ASYMM, + .sign = FTR_UNSIGNED, + }, #endif /* CONFIG_ARM64_MTE */ { .desc = "RCpc load-acquire (LDAPR)", @@ -2451,6 +2471,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #ifdef CONFIG_ARM64_MTE HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), #endif /* CONFIG_ARM64_MTE */ + HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), {}, }; @@ -2739,7 +2760,7 @@ static void verify_sve_features(void) unsigned int safe_len = safe_zcr & ZCR_ELx_LEN_MASK; unsigned int len = zcr & ZCR_ELx_LEN_MASK; - if (len < safe_len || sve_verify_vq_map()) { + if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SVE)) { pr_crit("CPU%d: SVE: vector length support mismatch\n", smp_processor_id()); cpu_die_early(); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 87731fea5e41..6e27b759056a 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -94,6 +94,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_RNG] = "rng", [KERNEL_HWCAP_BTI] = "bti", [KERNEL_HWCAP_MTE] = "mte", + [KERNEL_HWCAP_ECV] = "ecv", }; #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 32f9796c4ffe..f7408edf8571 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -17,6 +17,7 @@ #include <asm/daifflags.h> #include <asm/esr.h> #include <asm/exception.h> +#include <asm/irq_regs.h> #include <asm/kprobes.h> #include <asm/mmu.h> #include <asm/processor.h> @@ -219,22 +220,6 @@ static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs) lockdep_hardirqs_on(CALLER_ADDR0); } -static void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) -{ - if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) - arm64_enter_nmi(regs); - else - enter_from_kernel_mode(regs); -} - -static void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) -{ - if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) - arm64_exit_nmi(regs); - else - exit_to_kernel_mode(regs); -} - static void __sched arm64_preempt_schedule_irq(void) { lockdep_assert_irqs_disabled(); @@ -263,10 +248,14 @@ static void __sched arm64_preempt_schedule_irq(void) static void do_interrupt_handler(struct pt_regs *regs, void (*handler)(struct pt_regs *)) { + struct pt_regs *old_regs = set_irq_regs(regs); + if (on_thread_stack()) call_on_irq_stack(regs, handler); else handler(regs); + + set_irq_regs(old_regs); } extern void (*handle_arch_irq)(struct pt_regs *); @@ -432,13 +421,22 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs) } } -static void noinstr el1_interrupt(struct pt_regs *regs, - void (*handler)(struct pt_regs *)) +static __always_inline void __el1_pnmi(struct pt_regs *regs, + void (*handler)(struct pt_regs *)) { - write_sysreg(DAIF_PROCCTX_NOIRQ, daif); + arm64_enter_nmi(regs); + do_interrupt_handler(regs, handler); + arm64_exit_nmi(regs); +} + +static __always_inline void __el1_irq(struct pt_regs *regs, + void (*handler)(struct pt_regs *)) +{ + enter_from_kernel_mode(regs); - enter_el1_irq_or_nmi(regs); + irq_enter_rcu(); do_interrupt_handler(regs, handler); + irq_exit_rcu(); /* * Note: thread_info::preempt_count includes both thread_info::count @@ -449,7 +447,17 @@ static void noinstr el1_interrupt(struct pt_regs *regs, READ_ONCE(current_thread_info()->preempt_count) == 0) arm64_preempt_schedule_irq(); - exit_el1_irq_or_nmi(regs); + exit_to_kernel_mode(regs); +} +static void noinstr el1_interrupt(struct pt_regs *regs, + void (*handler)(struct pt_regs *)) +{ + write_sysreg(DAIF_PROCCTX_NOIRQ, daif); + + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) + __el1_pnmi(regs, handler); + else + __el1_irq(regs, handler); } asmlinkage void noinstr el1h_64_irq_handler(struct pt_regs *regs) @@ -667,7 +675,9 @@ static void noinstr el0_interrupt(struct pt_regs *regs, if (regs->pc & BIT(55)) arm64_apply_bp_hardening(); + irq_enter_rcu(); do_interrupt_handler(regs, handler); + irq_exit_rcu(); exit_to_user_mode(regs); } diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 196e921f61de..dc242e269f9a 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -38,9 +38,10 @@ SYM_FUNC_END(fpsimd_load_state) * * x0 - pointer to buffer for state * x1 - pointer to storage for FPSR + * x2 - Save FFR if non-zero */ SYM_FUNC_START(sve_save_state) - sve_save 0, x1, 2 + sve_save 0, x1, x2, 3 ret SYM_FUNC_END(sve_save_state) @@ -49,10 +50,10 @@ SYM_FUNC_END(sve_save_state) * * x0 - pointer to buffer for state * x1 - pointer to storage for FPSR - * x2 - VQ-1 + * x2 - Restore FFR if non-zero */ SYM_FUNC_START(sve_load_state) - sve_load 0, x1, x2, 3, x4 + sve_load 0, x1, x2, 4 ret SYM_FUNC_END(sve_load_state) @@ -67,34 +68,21 @@ SYM_FUNC_START(sve_set_vq) SYM_FUNC_END(sve_set_vq) /* - * Load SVE state from FPSIMD state. - * - * x0 = pointer to struct fpsimd_state - * x1 = VQ - 1 - * - * Each SVE vector will be loaded with the first 128-bits taken from FPSIMD - * and the rest zeroed. All the other SVE registers will be zeroed. - */ -SYM_FUNC_START(sve_load_from_fpsimd_state) - sve_load_vq x1, x2, x3 - fpsimd_restore x0, 8 - sve_flush_p_ffr - ret -SYM_FUNC_END(sve_load_from_fpsimd_state) - -/* * Zero all SVE registers but the first 128-bits of each vector * * VQ must already be configured by caller, any further updates of VQ * will need to ensure that the register state remains valid. * - * x0 = VQ - 1 + * x0 = include FFR? + * x1 = VQ - 1 */ SYM_FUNC_START(sve_flush_live) - cbz x0, 1f // A VQ-1 of 0 is 128 bits so no extra Z state + cbz x1, 1f // A VQ-1 of 0 is 128 bits so no extra Z state sve_flush_z -1: sve_flush_p_ffr - ret +1: sve_flush_p + tbz x0, #0, 2f + sve_flush_ffr +2: ret SYM_FUNC_END(sve_flush_live) #endif /* CONFIG_ARM64_SVE */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index bc6d5a970a13..2f69ae43941d 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -168,9 +168,9 @@ alternative_else_nop_endif .macro mte_set_kernel_gcr, tmp, tmp2 #ifdef CONFIG_KASAN_HW_TAGS -alternative_if_not ARM64_MTE +alternative_cb kasan_hw_tags_enable b 1f -alternative_else_nop_endif +alternative_cb_end mov \tmp, KERNEL_GCR_EL1 msr_s SYS_GCR_EL1, \tmp 1: @@ -178,10 +178,10 @@ alternative_else_nop_endif .endm .macro mte_set_user_gcr, tsk, tmp, tmp2 -#ifdef CONFIG_ARM64_MTE -alternative_if_not ARM64_MTE +#ifdef CONFIG_KASAN_HW_TAGS +alternative_cb kasan_hw_tags_enable b 1f -alternative_else_nop_endif +alternative_cb_end ldr \tmp, [\tsk, #THREAD_MTE_CTRL] mte_set_gcr \tmp, \tmp2 diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index ff4962750b3d..fa244c426f61 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -121,40 +121,62 @@ struct fpsimd_last_state_struct { static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); -/* Default VL for tasks that don't set it explicitly: */ -static int __sve_default_vl = -1; +__ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = { +#ifdef CONFIG_ARM64_SVE + [ARM64_VEC_SVE] = { + .type = ARM64_VEC_SVE, + .name = "SVE", + .min_vl = SVE_VL_MIN, + .max_vl = SVE_VL_MIN, + .max_virtualisable_vl = SVE_VL_MIN, + }, +#endif +}; -static int get_sve_default_vl(void) +static unsigned int vec_vl_inherit_flag(enum vec_type type) { - return READ_ONCE(__sve_default_vl); + switch (type) { + case ARM64_VEC_SVE: + return TIF_SVE_VL_INHERIT; + default: + WARN_ON_ONCE(1); + return 0; + } +} + +struct vl_config { + int __default_vl; /* Default VL for tasks */ +}; + +static struct vl_config vl_config[ARM64_VEC_MAX]; + +static inline int get_default_vl(enum vec_type type) +{ + return READ_ONCE(vl_config[type].__default_vl); } #ifdef CONFIG_ARM64_SVE -static void set_sve_default_vl(int val) +static inline int get_sve_default_vl(void) { - WRITE_ONCE(__sve_default_vl, val); + return get_default_vl(ARM64_VEC_SVE); } -/* Maximum supported vector length across all CPUs (initially poisoned) */ -int __ro_after_init sve_max_vl = SVE_VL_MIN; -int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN; +static inline void set_default_vl(enum vec_type type, int val) +{ + WRITE_ONCE(vl_config[type].__default_vl, val); +} -/* - * Set of available vector lengths, - * where length vq encoded as bit __vq_to_bit(vq): - */ -__ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX); -/* Set of vector lengths present on at least one cpu: */ -static __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX); +static inline void set_sve_default_vl(int val) +{ + set_default_vl(ARM64_VEC_SVE, val); +} static void __percpu *efi_sve_state; #else /* ! CONFIG_ARM64_SVE */ /* Dummy declaration for code that will be optimised out: */ -extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX); -extern __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX); extern void __percpu *efi_sve_state; #endif /* ! CONFIG_ARM64_SVE */ @@ -228,6 +250,29 @@ static void sve_free(struct task_struct *task) __sve_free(task); } +unsigned int task_get_vl(const struct task_struct *task, enum vec_type type) +{ + return task->thread.vl[type]; +} + +void task_set_vl(struct task_struct *task, enum vec_type type, + unsigned long vl) +{ + task->thread.vl[type] = vl; +} + +unsigned int task_get_vl_onexec(const struct task_struct *task, + enum vec_type type) +{ + return task->thread.vl_onexec[type]; +} + +void task_set_vl_onexec(struct task_struct *task, enum vec_type type, + unsigned long vl) +{ + task->thread.vl_onexec[type] = vl; +} + /* * TIF_SVE controls whether a task can use SVE without trapping while * in userspace, and also the way a task's FPSIMD/SVE state is stored @@ -287,12 +332,13 @@ static void task_fpsimd_load(void) WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); - if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) + if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) { + sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1); sve_load_state(sve_pffr(¤t->thread), - ¤t->thread.uw.fpsimd_state.fpsr, - sve_vq_from_vl(current->thread.sve_vl) - 1); - else + ¤t->thread.uw.fpsimd_state.fpsr, true); + } else { fpsimd_load_state(¤t->thread.uw.fpsimd_state); + } } /* @@ -308,24 +354,26 @@ static void fpsimd_save(void) WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); - if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { - if (IS_ENABLED(CONFIG_ARM64_SVE) && - test_thread_flag(TIF_SVE)) { - if (WARN_ON(sve_get_vl() != last->sve_vl)) { - /* - * Can't save the user regs, so current would - * re-enter user with corrupt state. - * There's no way to recover, so kill it: - */ - force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); - return; - } - - sve_save_state((char *)last->sve_state + - sve_ffr_offset(last->sve_vl), - &last->st->fpsr); - } else - fpsimd_save_state(last->st); + if (test_thread_flag(TIF_FOREIGN_FPSTATE)) + return; + + if (IS_ENABLED(CONFIG_ARM64_SVE) && + test_thread_flag(TIF_SVE)) { + if (WARN_ON(sve_get_vl() != last->sve_vl)) { + /* + * Can't save the user regs, so current would + * re-enter user with corrupt state. + * There's no way to recover, so kill it: + */ + force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); + return; + } + + sve_save_state((char *)last->sve_state + + sve_ffr_offset(last->sve_vl), + &last->st->fpsr, true); + } else { + fpsimd_save_state(last->st); } } @@ -335,21 +383,23 @@ static void fpsimd_save(void) * If things go wrong there's a bug somewhere, but try to fall back to a * safe choice. */ -static unsigned int find_supported_vector_length(unsigned int vl) +static unsigned int find_supported_vector_length(enum vec_type type, + unsigned int vl) { + struct vl_info *info = &vl_info[type]; int bit; - int max_vl = sve_max_vl; + int max_vl = info->max_vl; if (WARN_ON(!sve_vl_valid(vl))) - vl = SVE_VL_MIN; + vl = info->min_vl; if (WARN_ON(!sve_vl_valid(max_vl))) - max_vl = SVE_VL_MIN; + max_vl = info->min_vl; if (vl > max_vl) vl = max_vl; - bit = find_next_bit(sve_vq_map, SVE_VQ_MAX, + bit = find_next_bit(info->vq_map, SVE_VQ_MAX, __vq_to_bit(sve_vq_from_vl(vl))); return sve_vl_from_vq(__bit_to_vq(bit)); } @@ -359,6 +409,7 @@ static unsigned int find_supported_vector_length(unsigned int vl) static int sve_proc_do_default_vl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { + struct vl_info *info = &vl_info[ARM64_VEC_SVE]; int ret; int vl = get_sve_default_vl(); struct ctl_table tmp_table = { @@ -372,12 +423,12 @@ static int sve_proc_do_default_vl(struct ctl_table *table, int write, /* Writing -1 has the special meaning "set to max": */ if (vl == -1) - vl = sve_max_vl; + vl = info->max_vl; if (!sve_vl_valid(vl)) return -EINVAL; - set_sve_default_vl(find_supported_vector_length(vl)); + set_sve_default_vl(find_supported_vector_length(ARM64_VEC_SVE, vl)); return 0; } @@ -456,7 +507,7 @@ static void fpsimd_to_sve(struct task_struct *task) if (!system_supports_sve()) return; - vq = sve_vq_from_vl(task->thread.sve_vl); + vq = sve_vq_from_vl(task_get_sve_vl(task)); __fpsimd_to_sve(sst, fst, vq); } @@ -482,7 +533,7 @@ static void sve_to_fpsimd(struct task_struct *task) if (!system_supports_sve()) return; - vq = sve_vq_from_vl(task->thread.sve_vl); + vq = sve_vq_from_vl(task_get_sve_vl(task)); for (i = 0; i < SVE_NUM_ZREGS; ++i) { p = (__uint128_t const *)ZREG(sst, vq, i); fst->vregs[i] = arm64_le128_to_cpu(*p); @@ -495,9 +546,9 @@ static void sve_to_fpsimd(struct task_struct *task) * Return how many bytes of memory are required to store the full SVE * state for task, given task's currently configured vector length. */ -size_t sve_state_size(struct task_struct const *task) +static size_t sve_state_size(struct task_struct const *task) { - return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task->thread.sve_vl)); + return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task_get_sve_vl(task))); } /* @@ -572,7 +623,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) if (!test_tsk_thread_flag(task, TIF_SVE)) return; - vq = sve_vq_from_vl(task->thread.sve_vl); + vq = sve_vq_from_vl(task_get_sve_vl(task)); memset(sst, 0, SVE_SIG_REGS_SIZE(vq)); __fpsimd_to_sve(sst, fst, vq); @@ -596,20 +647,20 @@ int sve_set_vector_length(struct task_struct *task, if (vl > SVE_VL_ARCH_MAX) vl = SVE_VL_ARCH_MAX; - vl = find_supported_vector_length(vl); + vl = find_supported_vector_length(ARM64_VEC_SVE, vl); if (flags & (PR_SVE_VL_INHERIT | PR_SVE_SET_VL_ONEXEC)) - task->thread.sve_vl_onexec = vl; + task_set_sve_vl_onexec(task, vl); else /* Reset VL to system default on next exec: */ - task->thread.sve_vl_onexec = 0; + task_set_sve_vl_onexec(task, 0); /* Only actually set the VL if not deferred: */ if (flags & PR_SVE_SET_VL_ONEXEC) goto out; - if (vl == task->thread.sve_vl) + if (vl == task_get_sve_vl(task)) goto out; /* @@ -636,7 +687,7 @@ int sve_set_vector_length(struct task_struct *task, */ sve_free(task); - task->thread.sve_vl = vl; + task_set_sve_vl(task, vl); out: update_tsk_thread_flag(task, TIF_SVE_VL_INHERIT, @@ -656,9 +707,9 @@ static int sve_prctl_status(unsigned long flags) int ret; if (flags & PR_SVE_SET_VL_ONEXEC) - ret = current->thread.sve_vl_onexec; + ret = task_get_sve_vl_onexec(current); else - ret = current->thread.sve_vl; + ret = task_get_sve_vl(current); if (test_thread_flag(TIF_SVE_VL_INHERIT)) ret |= PR_SVE_VL_INHERIT; @@ -694,18 +745,15 @@ int sve_get_current_vl(void) return sve_prctl_status(0); } -static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX)) +static void vec_probe_vqs(struct vl_info *info, + DECLARE_BITMAP(map, SVE_VQ_MAX)) { unsigned int vq, vl; - unsigned long zcr; bitmap_zero(map, SVE_VQ_MAX); - zcr = ZCR_ELx_LEN_MASK; - zcr = read_sysreg_s(SYS_ZCR_EL1) & ~zcr; - for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) { - write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1); /* self-syncing */ + write_vl(info->type, vq - 1); /* self-syncing */ vl = sve_get_vl(); vq = sve_vq_from_vl(vl); /* skip intervening lengths */ set_bit(__vq_to_bit(vq), map); @@ -716,10 +764,11 @@ static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX)) * Initialise the set of known supported VQs for the boot CPU. * This is called during kernel boot, before secondary CPUs are brought up. */ -void __init sve_init_vq_map(void) +void __init vec_init_vq_map(enum vec_type type) { - sve_probe_vqs(sve_vq_map); - bitmap_copy(sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX); + struct vl_info *info = &vl_info[type]; + vec_probe_vqs(info, info->vq_map); + bitmap_copy(info->vq_partial_map, info->vq_map, SVE_VQ_MAX); } /* @@ -727,30 +776,33 @@ void __init sve_init_vq_map(void) * those not supported by the current CPU. * This function is called during the bring-up of early secondary CPUs only. */ -void sve_update_vq_map(void) +void vec_update_vq_map(enum vec_type type) { + struct vl_info *info = &vl_info[type]; DECLARE_BITMAP(tmp_map, SVE_VQ_MAX); - sve_probe_vqs(tmp_map); - bitmap_and(sve_vq_map, sve_vq_map, tmp_map, SVE_VQ_MAX); - bitmap_or(sve_vq_partial_map, sve_vq_partial_map, tmp_map, SVE_VQ_MAX); + vec_probe_vqs(info, tmp_map); + bitmap_and(info->vq_map, info->vq_map, tmp_map, SVE_VQ_MAX); + bitmap_or(info->vq_partial_map, info->vq_partial_map, tmp_map, + SVE_VQ_MAX); } /* * Check whether the current CPU supports all VQs in the committed set. * This function is called during the bring-up of late secondary CPUs only. */ -int sve_verify_vq_map(void) +int vec_verify_vq_map(enum vec_type type) { + struct vl_info *info = &vl_info[type]; DECLARE_BITMAP(tmp_map, SVE_VQ_MAX); unsigned long b; - sve_probe_vqs(tmp_map); + vec_probe_vqs(info, tmp_map); bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX); - if (bitmap_intersects(tmp_map, sve_vq_map, SVE_VQ_MAX)) { - pr_warn("SVE: cpu%d: Required vector length(s) missing\n", - smp_processor_id()); + if (bitmap_intersects(tmp_map, info->vq_map, SVE_VQ_MAX)) { + pr_warn("%s: cpu%d: Required vector length(s) missing\n", + info->name, smp_processor_id()); return -EINVAL; } @@ -766,7 +818,7 @@ int sve_verify_vq_map(void) /* Recover the set of supported VQs: */ bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX); /* Find VQs supported that are not globally supported: */ - bitmap_andnot(tmp_map, tmp_map, sve_vq_map, SVE_VQ_MAX); + bitmap_andnot(tmp_map, tmp_map, info->vq_map, SVE_VQ_MAX); /* Find the lowest such VQ, if any: */ b = find_last_bit(tmp_map, SVE_VQ_MAX); @@ -777,9 +829,9 @@ int sve_verify_vq_map(void) * Mismatches above sve_max_virtualisable_vl are fine, since * no guest is allowed to configure ZCR_EL2.LEN to exceed this: */ - if (sve_vl_from_vq(__bit_to_vq(b)) <= sve_max_virtualisable_vl) { - pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n", - smp_processor_id()); + if (sve_vl_from_vq(__bit_to_vq(b)) <= info->max_virtualisable_vl) { + pr_warn("%s: cpu%d: Unsupported vector length(s) present\n", + info->name, smp_processor_id()); return -EINVAL; } @@ -788,6 +840,8 @@ int sve_verify_vq_map(void) static void __init sve_efi_setup(void) { + struct vl_info *info = &vl_info[ARM64_VEC_SVE]; + if (!IS_ENABLED(CONFIG_EFI)) return; @@ -796,11 +850,11 @@ static void __init sve_efi_setup(void) * This is evidence of a crippled system and we are returning void, * so no attempt is made to handle this situation here. */ - if (!sve_vl_valid(sve_max_vl)) + if (!sve_vl_valid(info->max_vl)) goto fail; efi_sve_state = __alloc_percpu( - SVE_SIG_REGS_SIZE(sve_vq_from_vl(sve_max_vl)), SVE_VQ_BYTES); + SVE_SIG_REGS_SIZE(sve_vq_from_vl(info->max_vl)), SVE_VQ_BYTES); if (!efi_sve_state) goto fail; @@ -849,6 +903,7 @@ u64 read_zcr_features(void) void __init sve_setup(void) { + struct vl_info *info = &vl_info[ARM64_VEC_SVE]; u64 zcr; DECLARE_BITMAP(tmp_map, SVE_VQ_MAX); unsigned long b; @@ -861,49 +916,52 @@ void __init sve_setup(void) * so sve_vq_map must have at least SVE_VQ_MIN set. * If something went wrong, at least try to patch it up: */ - if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map))) - set_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map); + if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map))) + set_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map); zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1); - sve_max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1); + info->max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1); /* * Sanity-check that the max VL we determined through CPU features * corresponds properly to sve_vq_map. If not, do our best: */ - if (WARN_ON(sve_max_vl != find_supported_vector_length(sve_max_vl))) - sve_max_vl = find_supported_vector_length(sve_max_vl); + if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SVE, + info->max_vl))) + info->max_vl = find_supported_vector_length(ARM64_VEC_SVE, + info->max_vl); /* * For the default VL, pick the maximum supported value <= 64. * VL == 64 is guaranteed not to grow the signal frame. */ - set_sve_default_vl(find_supported_vector_length(64)); + set_sve_default_vl(find_supported_vector_length(ARM64_VEC_SVE, 64)); - bitmap_andnot(tmp_map, sve_vq_partial_map, sve_vq_map, + bitmap_andnot(tmp_map, info->vq_partial_map, info->vq_map, SVE_VQ_MAX); b = find_last_bit(tmp_map, SVE_VQ_MAX); if (b >= SVE_VQ_MAX) /* No non-virtualisable VLs found */ - sve_max_virtualisable_vl = SVE_VQ_MAX; + info->max_virtualisable_vl = SVE_VQ_MAX; else if (WARN_ON(b == SVE_VQ_MAX - 1)) /* No virtualisable VLs? This is architecturally forbidden. */ - sve_max_virtualisable_vl = SVE_VQ_MIN; + info->max_virtualisable_vl = SVE_VQ_MIN; else /* b + 1 < SVE_VQ_MAX */ - sve_max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1)); + info->max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1)); - if (sve_max_virtualisable_vl > sve_max_vl) - sve_max_virtualisable_vl = sve_max_vl; + if (info->max_virtualisable_vl > info->max_vl) + info->max_virtualisable_vl = info->max_vl; - pr_info("SVE: maximum available vector length %u bytes per vector\n", - sve_max_vl); - pr_info("SVE: default vector length %u bytes per vector\n", - get_sve_default_vl()); + pr_info("%s: maximum available vector length %u bytes per vector\n", + info->name, info->max_vl); + pr_info("%s: default vector length %u bytes per vector\n", + info->name, get_sve_default_vl()); /* KVM decides whether to support mismatched systems. Just warn here: */ - if (sve_max_virtualisable_vl < sve_max_vl) - pr_warn("SVE: unvirtualisable vector lengths present\n"); + if (sve_max_virtualisable_vl() < sve_max_vl()) + pr_warn("%s: unvirtualisable vector lengths present\n", + info->name); sve_efi_setup(); } @@ -958,9 +1016,9 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) */ if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { unsigned long vq_minus_one = - sve_vq_from_vl(current->thread.sve_vl) - 1; + sve_vq_from_vl(task_get_sve_vl(current)) - 1; sve_set_vq(vq_minus_one); - sve_flush_live(vq_minus_one); + sve_flush_live(true, vq_minus_one); fpsimd_bind_task_to_cpu(); } else { fpsimd_to_sve(current); @@ -1030,10 +1088,43 @@ void fpsimd_thread_switch(struct task_struct *next) __put_cpu_fpsimd_context(); } -void fpsimd_flush_thread(void) +static void fpsimd_flush_thread_vl(enum vec_type type) { int vl, supported_vl; + /* + * Reset the task vector length as required. This is where we + * ensure that all user tasks have a valid vector length + * configured: no kernel task can become a user task without + * an exec and hence a call to this function. By the time the + * first call to this function is made, all early hardware + * probing is complete, so __sve_default_vl should be valid. + * If a bug causes this to go wrong, we make some noise and + * try to fudge thread.sve_vl to a safe value here. + */ + vl = task_get_vl_onexec(current, type); + if (!vl) + vl = get_default_vl(type); + + if (WARN_ON(!sve_vl_valid(vl))) + vl = SVE_VL_MIN; + + supported_vl = find_supported_vector_length(type, vl); + if (WARN_ON(supported_vl != vl)) + vl = supported_vl; + + task_set_vl(current, type, vl); + + /* + * If the task is not set to inherit, ensure that the vector + * length will be reset by a subsequent exec: + */ + if (!test_thread_flag(vec_vl_inherit_flag(type))) + task_set_vl_onexec(current, type, 0); +} + +void fpsimd_flush_thread(void) +{ if (!system_supports_fpsimd()) return; @@ -1046,36 +1137,7 @@ void fpsimd_flush_thread(void) if (system_supports_sve()) { clear_thread_flag(TIF_SVE); sve_free(current); - - /* - * Reset the task vector length as required. - * This is where we ensure that all user tasks have a valid - * vector length configured: no kernel task can become a user - * task without an exec and hence a call to this function. - * By the time the first call to this function is made, all - * early hardware probing is complete, so __sve_default_vl - * should be valid. - * If a bug causes this to go wrong, we make some noise and - * try to fudge thread.sve_vl to a safe value here. - */ - vl = current->thread.sve_vl_onexec ? - current->thread.sve_vl_onexec : get_sve_default_vl(); - - if (WARN_ON(!sve_vl_valid(vl))) - vl = SVE_VL_MIN; - - supported_vl = find_supported_vector_length(vl); - if (WARN_ON(supported_vl != vl)) - vl = supported_vl; - - current->thread.sve_vl = vl; - - /* - * If the task is not set to inherit, ensure that the vector - * length will be reset by a subsequent exec: - */ - if (!test_thread_flag(TIF_SVE_VL_INHERIT)) - current->thread.sve_vl_onexec = 0; + fpsimd_flush_thread_vl(ARM64_VEC_SVE); } put_cpu_fpsimd_context(); @@ -1120,7 +1182,7 @@ static void fpsimd_bind_task_to_cpu(void) WARN_ON(!system_supports_fpsimd()); last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; - last->sve_vl = current->thread.sve_vl; + last->sve_vl = task_get_sve_vl(current); current->thread.fpsimd_cpu = smp_processor_id(); if (system_supports_sve()) { @@ -1353,8 +1415,9 @@ void __efi_fpsimd_begin(void) __this_cpu_write(efi_sve_state_used, true); - sve_save_state(sve_state + sve_ffr_offset(sve_max_vl), - &this_cpu_ptr(&efi_fpsimd_state)->fpsr); + sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()), + &this_cpu_ptr(&efi_fpsimd_state)->fpsr, + true); } else { fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state)); } @@ -1378,9 +1441,10 @@ void __efi_fpsimd_end(void) likely(__this_cpu_read(efi_sve_state_used))) { char const *sve_state = this_cpu_ptr(efi_sve_state); - sve_load_state(sve_state + sve_ffr_offset(sve_max_vl), + sve_set_vq(sve_vq_from_vl(sve_get_vl()) - 1); + sve_load_state(sve_state + sve_ffr_offset(sve_max_vl()), &this_cpu_ptr(&efi_fpsimd_state)->fpsr, - sve_vq_from_vl(sve_get_vl()) - 1); + true); __this_cpu_write(efi_sve_state_used, false); } else { diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 7f467bd9db7a..fc62dfe73f93 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -236,11 +236,6 @@ void arch_ftrace_update_code(int command) command |= FTRACE_MAY_SLEEP; ftrace_modify_all_code(command); } - -int __init ftrace_dyn_arch_init(void) -{ - return 0; -} #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 17962452e31d..6a98f1a38c29 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -412,7 +412,7 @@ SYM_FUNC_END(__create_page_tables) scs_load \tsk adr_l \tmp1, __per_cpu_offset - ldr w\tmp2, [\tsk, #TSK_CPU] + ldr w\tmp2, [\tsk, #TSK_TI_CPU] ldr \tmp1, [\tmp1, \tmp2, lsl #3] set_this_cpu_offset \tmp1 .endm diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 81c0186a5e32..0e1d9c3c6a93 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -16,26 +16,6 @@ #include <asm/virt.h> /* - * To prevent the possibility of old and new partial table walks being visible - * in the tlb, switch the ttbr to a zero page when we invalidate the old - * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i - * Even switching to our copied tables will cause a changed output address at - * each stage of the walk. - */ -.macro break_before_make_ttbr_switch zero_page, page_table, tmp, tmp2 - phys_to_ttbr \tmp, \zero_page - msr ttbr1_el1, \tmp - isb - tlbi vmalle1 - dsb nsh - phys_to_ttbr \tmp, \page_table - offset_ttbr1 \tmp, \tmp2 - msr ttbr1_el1, \tmp - isb -.endm - - -/* * Resume from hibernate * * Loads temporary page tables then restores the memory image. @@ -112,56 +92,4 @@ alternative_insn "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE hvc #0 3: ret SYM_CODE_END(swsusp_arch_suspend_exit) - -/* - * Restore the hyp stub. - * This must be done before the hibernate page is unmapped by _cpu_resume(), - * but happens before any of the hyp-stub's code is cleaned to PoC. - * - * x24: The physical address of __hyp_stub_vectors - */ -SYM_CODE_START_LOCAL(el1_sync) - msr vbar_el2, x24 - eret -SYM_CODE_END(el1_sync) - -.macro invalid_vector label -SYM_CODE_START_LOCAL(\label) - b \label -SYM_CODE_END(\label) -.endm - - invalid_vector el2_sync_invalid - invalid_vector el2_irq_invalid - invalid_vector el2_fiq_invalid - invalid_vector el2_error_invalid - invalid_vector el1_sync_invalid - invalid_vector el1_irq_invalid - invalid_vector el1_fiq_invalid - invalid_vector el1_error_invalid - -/* el2 vectors - switch el2 here while we restore the memory image. */ - .align 11 -SYM_CODE_START(hibernate_el2_vectors) - ventry el2_sync_invalid // Synchronous EL2t - ventry el2_irq_invalid // IRQ EL2t - ventry el2_fiq_invalid // FIQ EL2t - ventry el2_error_invalid // Error EL2t - - ventry el2_sync_invalid // Synchronous EL2h - ventry el2_irq_invalid // IRQ EL2h - ventry el2_fiq_invalid // FIQ EL2h - ventry el2_error_invalid // Error EL2h - - ventry el1_sync // Synchronous 64-bit EL1 - ventry el1_irq_invalid // IRQ 64-bit EL1 - ventry el1_fiq_invalid // FIQ 64-bit EL1 - ventry el1_error_invalid // Error 64-bit EL1 - - ventry el1_sync_invalid // Synchronous 32-bit EL1 - ventry el1_irq_invalid // IRQ 32-bit EL1 - ventry el1_fiq_invalid // FIQ 32-bit EL1 - ventry el1_error_invalid // Error 32-bit EL1 -SYM_CODE_END(hibernate_el2_vectors) - .popsection diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 46a0b4d6e251..2758f75d6809 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -49,10 +49,7 @@ extern int in_suspend; /* Do we need to reset el2? */ -#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) - -/* temporary el2 vectors in the __hibernate_exit_text section. */ -extern char hibernate_el2_vectors[]; +#define el2_reset_needed() (is_hyp_nvhe()) /* hyp-stub vectors, used to restore el2 during resume from hibernate. */ extern char __hyp_stub_vectors[]; @@ -215,26 +212,7 @@ static int create_safe_exec_page(void *src_start, size_t length, if (rc) return rc; - /* - * Load our new page tables. A strict BBM approach requires that we - * ensure that TLBs are free of any entries that may overlap with the - * global mappings we are about to install. - * - * For a real hibernate/resume cycle TTBR0 currently points to a zero - * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI - * runtime services), while for a userspace-driven test_resume cycle it - * points to userspace page tables (and we must point it at a zero page - * ourselves). - * - * We change T0SZ as part of installing the idmap. This is undone by - * cpu_uninstall_idmap() in __cpu_suspend_exit(). - */ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - __cpu_set_tcr_t0sz(t0sz); - write_sysreg(trans_ttbr0, ttbr0_el1); - isb(); - + cpu_install_ttbr0(trans_ttbr0, t0sz); *phys_dst_addr = virt_to_phys(page); return 0; @@ -434,6 +412,7 @@ int swsusp_arch_resume(void) void *zero_page; size_t exit_size; pgd_t *tmp_pg_dir; + phys_addr_t el2_vectors; void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *, void *, phys_addr_t, phys_addr_t); struct trans_pgd_info trans_info = { @@ -461,6 +440,14 @@ int swsusp_arch_resume(void) return -ENOMEM; } + if (el2_reset_needed()) { + rc = trans_pgd_copy_el2_vectors(&trans_info, &el2_vectors); + if (rc) { + pr_err("Failed to setup el2 vectors\n"); + return rc; + } + } + exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start; /* * Copy swsusp_arch_suspend_exit() to a safe page. This will generate @@ -474,25 +461,13 @@ int swsusp_arch_resume(void) } /* - * The hibernate exit text contains a set of el2 vectors, that will - * be executed at el2 with the mmu off in order to reload hyp-stub. - */ - dcache_clean_inval_poc((unsigned long)hibernate_exit, - (unsigned long)hibernate_exit + exit_size); - - /* * KASLR will cause the el2 vectors to be in a different location in * the resumed kernel. Load hibernate's temporary copy into el2. * * We can skip this step if we booted at EL1, or are running with VHE. */ - if (el2_reset_needed()) { - phys_addr_t el2_vectors = (phys_addr_t)hibernate_exit; - el2_vectors += hibernate_el2_vectors - - __hibernate_exit_text_start; /* offset */ - + if (el2_reset_needed()) __hyp_set_vectors(el2_vectors); - } hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1, resume_hdr.reenter_kernel, restore_pblist, diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 213d56c14f60..1038494135c8 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -21,12 +21,8 @@ #include <asm/mmu.h> #include <asm/mmu_context.h> #include <asm/page.h> - -#include "cpu-reset.h" - -/* Global variables for the arm64_relocate_new_kernel routine. */ -extern const unsigned char arm64_relocate_new_kernel[]; -extern const unsigned long arm64_relocate_new_kernel_size; +#include <asm/sections.h> +#include <asm/trans_pgd.h> /** * kexec_image_info - For debugging output. @@ -43,7 +39,9 @@ static void _kexec_image_info(const char *func, int line, pr_debug(" start: %lx\n", kimage->start); pr_debug(" head: %lx\n", kimage->head); pr_debug(" nr_segments: %lu\n", kimage->nr_segments); + pr_debug(" dtb_mem: %pa\n", &kimage->arch.dtb_mem); pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc); + pr_debug(" el2_vectors: %pa\n", &kimage->arch.el2_vectors); for (i = 0; i < kimage->nr_segments; i++) { pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", @@ -60,29 +58,6 @@ void machine_kexec_cleanup(struct kimage *kimage) /* Empty routine needed to avoid build errors. */ } -int machine_kexec_post_load(struct kimage *kimage) -{ - void *reloc_code = page_to_virt(kimage->control_code_page); - - memcpy(reloc_code, arm64_relocate_new_kernel, - arm64_relocate_new_kernel_size); - kimage->arch.kern_reloc = __pa(reloc_code); - kexec_image_info(kimage); - - /* - * For execution with the MMU off, reloc_code needs to be cleaned to the - * PoC and invalidated from the I-cache. - */ - dcache_clean_inval_poc((unsigned long)reloc_code, - (unsigned long)reloc_code + - arm64_relocate_new_kernel_size); - icache_inval_pou((uintptr_t)reloc_code, - (uintptr_t)reloc_code + - arm64_relocate_new_kernel_size); - - return 0; -} - /** * machine_kexec_prepare - Prepare for a kexec reboot. * @@ -101,45 +76,6 @@ int machine_kexec_prepare(struct kimage *kimage) } /** - * kexec_list_flush - Helper to flush the kimage list and source pages to PoC. - */ -static void kexec_list_flush(struct kimage *kimage) -{ - kimage_entry_t *entry; - - for (entry = &kimage->head; ; entry++) { - unsigned int flag; - unsigned long addr; - - /* flush the list entries. */ - dcache_clean_inval_poc((unsigned long)entry, - (unsigned long)entry + - sizeof(kimage_entry_t)); - - flag = *entry & IND_FLAGS; - if (flag == IND_DONE) - break; - - addr = (unsigned long)phys_to_virt(*entry & PAGE_MASK); - - switch (flag) { - case IND_INDIRECTION: - /* Set entry point just before the new list page. */ - entry = (kimage_entry_t *)addr - 1; - break; - case IND_SOURCE: - /* flush the source pages. */ - dcache_clean_inval_poc(addr, addr + PAGE_SIZE); - break; - case IND_DESTINATION: - break; - default: - BUG(); - } - } -} - -/** * kexec_segment_flush - Helper to flush the kimage segments to PoC. */ static void kexec_segment_flush(const struct kimage *kimage) @@ -163,6 +99,75 @@ static void kexec_segment_flush(const struct kimage *kimage) } } +/* Allocates pages for kexec page table */ +static void *kexec_page_alloc(void *arg) +{ + struct kimage *kimage = (struct kimage *)arg; + struct page *page = kimage_alloc_control_pages(kimage, 0); + + if (!page) + return NULL; + + memset(page_address(page), 0, PAGE_SIZE); + + return page_address(page); +} + +int machine_kexec_post_load(struct kimage *kimage) +{ + int rc; + pgd_t *trans_pgd; + void *reloc_code = page_to_virt(kimage->control_code_page); + long reloc_size; + struct trans_pgd_info info = { + .trans_alloc_page = kexec_page_alloc, + .trans_alloc_arg = kimage, + }; + + /* If in place, relocation is not used, only flush next kernel */ + if (kimage->head & IND_DONE) { + kexec_segment_flush(kimage); + kexec_image_info(kimage); + return 0; + } + + kimage->arch.el2_vectors = 0; + if (is_hyp_nvhe()) { + rc = trans_pgd_copy_el2_vectors(&info, + &kimage->arch.el2_vectors); + if (rc) + return rc; + } + + /* Create a copy of the linear map */ + trans_pgd = kexec_page_alloc(kimage); + if (!trans_pgd) + return -ENOMEM; + rc = trans_pgd_create_copy(&info, &trans_pgd, PAGE_OFFSET, PAGE_END); + if (rc) + return rc; + kimage->arch.ttbr1 = __pa(trans_pgd); + kimage->arch.zero_page = __pa(empty_zero_page); + + reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start; + memcpy(reloc_code, __relocate_new_kernel_start, reloc_size); + kimage->arch.kern_reloc = __pa(reloc_code); + rc = trans_pgd_idmap_page(&info, &kimage->arch.ttbr0, + &kimage->arch.t0sz, reloc_code); + if (rc) + return rc; + kimage->arch.phys_offset = virt_to_phys(kimage) - (long)kimage; + + /* Flush the reloc_code in preparation for its execution. */ + dcache_clean_inval_poc((unsigned long)reloc_code, + (unsigned long)reloc_code + reloc_size); + icache_inval_pou((uintptr_t)reloc_code, + (uintptr_t)reloc_code + reloc_size); + kexec_image_info(kimage); + + return 0; +} + /** * machine_kexec - Do the kexec reboot. * @@ -180,31 +185,35 @@ void machine_kexec(struct kimage *kimage) WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()), "Some CPUs may be stale, kdump will be unreliable.\n"); - /* Flush the kimage list and its buffers. */ - kexec_list_flush(kimage); - - /* Flush the new image if already in place. */ - if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE)) - kexec_segment_flush(kimage); - pr_info("Bye!\n"); local_daif_mask(); /* - * cpu_soft_restart will shutdown the MMU, disable data caches, then - * transfer control to the kern_reloc which contains a copy of - * the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel - * uses physical addressing to relocate the new image to its final - * position and transfers control to the image entry point when the - * relocation is complete. + * Both restart and kernel_reloc will shutdown the MMU, disable data + * caches. However, restart will start new kernel or purgatory directly, + * kernel_reloc contains the body of arm64_relocate_new_kernel * In kexec case, kimage->start points to purgatory assuming that * kernel entry and dtb address are embedded in purgatory by * userspace (kexec-tools). * In kexec_file case, the kernel starts directly without purgatory. */ - cpu_soft_restart(kimage->arch.kern_reloc, kimage->head, kimage->start, - kimage->arch.dtb_mem); + if (kimage->head & IND_DONE) { + typeof(cpu_soft_restart) *restart; + + cpu_install_idmap(); + restart = (void *)__pa_symbol(function_nocfi(cpu_soft_restart)); + restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem, + 0, 0); + } else { + void (*kernel_reloc)(struct kimage *kimage); + + if (is_hyp_nvhe()) + __hyp_set_vectors(kimage->arch.el2_vectors); + cpu_install_ttbr0(kimage->arch.ttbr0, kimage->arch.t0sz); + kernel_reloc = (void *)kimage->arch.kern_reloc; + kernel_reloc(kimage); + } BUG(); /* Should never get here. */ } @@ -261,8 +270,6 @@ void arch_kexec_protect_crashkres(void) { int i; - kexec_segment_flush(kexec_crash_image); - for (i = 0; i < kexec_crash_image->nr_segments; i++) set_memory_valid( __phys_to_virt(kexec_crash_image->segment[i].mem), diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index e5e801bc5312..f418ebc65f95 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -26,9 +26,12 @@ static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred); #ifdef CONFIG_KASAN_HW_TAGS -/* Whether the MTE asynchronous mode is enabled. */ -DEFINE_STATIC_KEY_FALSE(mte_async_mode); -EXPORT_SYMBOL_GPL(mte_async_mode); +/* + * The asynchronous and asymmetric MTE modes have the same behavior for + * store operations. This flag is set when either of these modes is enabled. + */ +DEFINE_STATIC_KEY_FALSE(mte_async_or_asymm_mode); +EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode); #endif static void mte_sync_page_tags(struct page *page, pte_t old_pte, @@ -116,7 +119,7 @@ void mte_enable_kernel_sync(void) * Make sure we enter this function when no PE has set * async mode previously. */ - WARN_ONCE(system_uses_mte_async_mode(), + WARN_ONCE(system_uses_mte_async_or_asymm_mode(), "MTE async mode enabled system wide!"); __mte_enable_kernel("synchronous", SCTLR_ELx_TCF_SYNC); @@ -134,8 +137,34 @@ void mte_enable_kernel_async(void) * mode in between sync and async, this strategy needs * to be reviewed. */ - if (!system_uses_mte_async_mode()) - static_branch_enable(&mte_async_mode); + if (!system_uses_mte_async_or_asymm_mode()) + static_branch_enable(&mte_async_or_asymm_mode); +} + +void mte_enable_kernel_asymm(void) +{ + if (cpus_have_cap(ARM64_MTE_ASYMM)) { + __mte_enable_kernel("asymmetric", SCTLR_ELx_TCF_ASYMM); + + /* + * MTE asymm mode behaves as async mode for store + * operations. The mode is set system wide by the + * first PE that executes this function. + * + * Note: If in future KASAN acquires a runtime switching + * mode in between sync and async, this strategy needs + * to be reviewed. + */ + if (!system_uses_mte_async_or_asymm_mode()) + static_branch_enable(&mte_async_or_asymm_mode); + } else { + /* + * If the CPU does not support MTE asymmetric mode the + * kernel falls back on synchronous mode which is the + * default for kasan=on. + */ + mte_enable_kernel_sync(); + } } #endif @@ -179,6 +208,30 @@ static void mte_update_sctlr_user(struct task_struct *task) task->thread.sctlr_user = sctlr; } +static void mte_update_gcr_excl(struct task_struct *task) +{ + /* + * SYS_GCR_EL1 will be set to current->thread.mte_ctrl value by + * mte_set_user_gcr() in kernel_exit, but only if KASAN is enabled. + */ + if (kasan_hw_tags_enabled()) + return; + + write_sysreg_s( + ((task->thread.mte_ctrl >> MTE_CTRL_GCR_USER_EXCL_SHIFT) & + SYS_GCR_EL1_EXCL_MASK) | SYS_GCR_EL1_RRND, + SYS_GCR_EL1); +} + +void __init kasan_hw_tags_enable(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst) +{ + BUG_ON(nr_inst != 1); /* Branch -> NOP */ + + if (kasan_hw_tags_enabled()) + *updptr = cpu_to_le32(aarch64_insn_gen_nop()); +} + void mte_thread_init_user(void) { if (!system_supports_mte()) @@ -198,6 +251,7 @@ void mte_thread_switch(struct task_struct *next) return; mte_update_sctlr_user(next); + mte_update_gcr_excl(next); /* * Check if an async tag exception occurred at EL1. @@ -243,6 +297,7 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg) if (task == current) { preempt_disable(); mte_update_sctlr_user(task); + mte_update_gcr_excl(task); update_sctlr_el1(task->thread.sctlr_user); preempt_enable(); } diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 6dbcc89f6662..d9dfa82c1f18 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -7,6 +7,9 @@ * Copyright (C) 2013 Linaro Limited. * Author: Sandeepa Prabhu <sandeepa.prabhu@linaro.org> */ + +#define pr_fmt(fmt) "kprobes: " fmt + #include <linux/extable.h> #include <linux/kasan.h> #include <linux/kernel.h> @@ -218,7 +221,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, break; case KPROBE_HIT_SS: case KPROBE_REENTER: - pr_warn("Unrecoverable kprobe detected.\n"); + pr_warn("Failed to recover from reentered kprobes.\n"); dump_kprobe(p); BUG(); break; @@ -398,18 +401,17 @@ int __init arch_populate_kprobe_blacklist(void) void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs) { - return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline, - (void *)kernel_stack_pointer(regs)); + return (void *)kretprobe_trampoline_handler(regs, (void *)regs->regs[29]); } void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { ri->ret_addr = (kprobe_opcode_t *)regs->regs[30]; - ri->fp = (void *)kernel_stack_pointer(regs); + ri->fp = (void *)regs->regs[29]; /* replace return addr (x30) with trampoline */ - regs->regs[30] = (long)&kretprobe_trampoline; + regs->regs[30] = (long)&__kretprobe_trampoline; } int __kprobes arch_trampoline_kprobe(struct kprobe *p) diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S index 288a84e253cc..9a6499bed58b 100644 --- a/arch/arm64/kernel/probes/kprobes_trampoline.S +++ b/arch/arm64/kernel/probes/kprobes_trampoline.S @@ -61,11 +61,14 @@ ldp x28, x29, [sp, #S_X28] .endm -SYM_CODE_START(kretprobe_trampoline) +SYM_CODE_START(__kretprobe_trampoline) sub sp, sp, #PT_REGS_SIZE save_all_base_regs + /* Setup a frame pointer. */ + add x29, sp, #S_FP + mov x0, sp bl trampoline_probe_handler /* @@ -74,9 +77,10 @@ SYM_CODE_START(kretprobe_trampoline) */ mov lr, x0 + /* The frame pointer (x29) is restored with other registers. */ restore_all_base_regs add sp, sp, #PT_REGS_SIZE ret -SYM_CODE_END(kretprobe_trampoline) +SYM_CODE_END(__kretprobe_trampoline) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 40adb8cdbf5a..aacf2f5559a8 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -528,13 +528,11 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, return last; } -unsigned long get_wchan(struct task_struct *p) +unsigned long __get_wchan(struct task_struct *p) { struct stackframe frame; unsigned long stack_page, ret = 0; int count = 0; - if (!p || p == current || task_is_running(p)) - return 0; stack_page = (unsigned long)try_get_task_stack(p); if (!stack_page) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index e26196a33cf4..88a9034fb9b5 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -725,10 +725,10 @@ static void sve_init_header_from_task(struct user_sve_header *header, if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) header->flags |= SVE_PT_VL_INHERIT; - header->vl = target->thread.sve_vl; + header->vl = task_get_sve_vl(target); vq = sve_vq_from_vl(header->vl); - header->max_vl = sve_max_vl; + header->max_vl = sve_max_vl(); header->size = SVE_PT_SIZE(vq, header->flags); header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl), SVE_PT_REGS_SVE); @@ -820,7 +820,7 @@ static int sve_set(struct task_struct *target, goto out; /* Actual VL set may be less than the user asked for: */ - vq = sve_vq_from_vl(target->thread.sve_vl); + vq = sve_vq_from_vl(task_get_sve_vl(target)); /* Registers: FPSIMD-only case */ diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index b78ea5de97a4..f0a3df9e18a3 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -4,6 +4,8 @@ * * Copyright (C) Linaro. * Copyright (C) Huawei Futurewei Technologies. + * Copyright (C) 2021, Microsoft Corporation. + * Pasha Tatashin <pasha.tatashin@soleen.com> */ #include <linux/kexec.h> @@ -13,7 +15,16 @@ #include <asm/kexec.h> #include <asm/page.h> #include <asm/sysreg.h> +#include <asm/virt.h> +.macro turn_off_mmu tmp1, tmp2 + mov_q \tmp1, INIT_SCTLR_EL1_MMU_OFF + pre_disable_mmu_workaround + msr sctlr_el1, \tmp1 + isb +.endm + +.section ".kexec_relocate.text", "ax" /* * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it. * @@ -27,33 +38,24 @@ */ SYM_CODE_START(arm64_relocate_new_kernel) /* Setup the list loop variables. */ - mov x18, x2 /* x18 = dtb address */ - mov x17, x1 /* x17 = kimage_start */ - mov x16, x0 /* x16 = kimage_head */ - mov x14, xzr /* x14 = entry ptr */ - mov x13, xzr /* x13 = copy dest */ - /* Check if the new image needs relocation. */ - tbnz x16, IND_DONE_BIT, .Ldone + ldr x18, [x0, #KIMAGE_ARCH_ZERO_PAGE] /* x18 = zero page for BBM */ + ldr x17, [x0, #KIMAGE_ARCH_TTBR1] /* x17 = linear map copy */ + ldr x16, [x0, #KIMAGE_HEAD] /* x16 = kimage_head */ + ldr x22, [x0, #KIMAGE_ARCH_PHYS_OFFSET] /* x22 phys_offset */ raw_dcache_line_size x15, x1 /* x15 = dcache line size */ + break_before_make_ttbr_switch x18, x17, x1, x2 /* set linear map */ .Lloop: and x12, x16, PAGE_MASK /* x12 = addr */ - + sub x12, x12, x22 /* Convert x12 to virt */ /* Test the entry flags. */ .Ltest_source: tbz x16, IND_SOURCE_BIT, .Ltest_indirection /* Invalidate dest page to PoC. */ - mov x2, x13 - add x20, x2, #PAGE_SIZE - sub x1, x15, #1 - bic x2, x2, x1 -2: dc ivac, x2 - add x2, x2, x15 - cmp x2, x20 - b.lo 2b - dsb sy - + mov x19, x13 copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8 + add x1, x19, #PAGE_SIZE + dcache_by_myline_op civac, sy, x19, x1, x15, x20 b .Lnext .Ltest_indirection: tbz x16, IND_INDIRECTION_BIT, .Ltest_destination @@ -65,31 +67,26 @@ SYM_CODE_START(arm64_relocate_new_kernel) .Lnext: ldr x16, [x14], #8 /* entry = *ptr++ */ tbz x16, IND_DONE_BIT, .Lloop /* while (!(entry & DONE)) */ -.Ldone: /* wait for writes from copy_page to finish */ dsb nsh ic iallu dsb nsh isb + ldr x4, [x0, #KIMAGE_START] /* relocation start */ + ldr x1, [x0, #KIMAGE_ARCH_EL2_VECTORS] /* relocation start */ + ldr x0, [x0, #KIMAGE_ARCH_DTB_MEM] /* dtb address */ + turn_off_mmu x12, x13 /* Start new image. */ - mov x0, x18 - mov x1, xzr + cbz x1, .Lel1 + mov x1, x4 /* relocation start */ + mov x2, x0 /* dtb address */ + mov x3, xzr + mov x4, xzr + mov x0, #HVC_SOFT_RESTART + hvc #0 /* Jumps from el2 */ +.Lel1: mov x2, xzr mov x3, xzr - br x17 - + br x4 /* Jumps from el1 */ SYM_CODE_END(arm64_relocate_new_kernel) - -.align 3 /* To keep the 64-bit values below naturally aligned. */ - -.Lcopy_end: -.org KEXEC_CONTROL_PAGE_SIZE - -/* - * arm64_relocate_new_kernel_size - Number of bytes to copy to the - * control_code_page. - */ -.globl arm64_relocate_new_kernel_size -arm64_relocate_new_kernel_size: - .quad .Lcopy_end - arm64_relocate_new_kernel diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 47f77d1234cb..d20620a1c51a 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -202,7 +202,7 @@ unsigned long sdei_arch_get_entry_point(int conduit) * dropped to EL1 because we don't support VHE, then we can't support * SDEI. */ - if (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) { + if (is_hyp_nvhe()) { pr_err("Not supported on this hardware/boot configuration\n"); goto out_err; } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index c287b9407f28..8f6372b44b65 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -227,7 +227,7 @@ static int preserve_sve_context(struct sve_context __user *ctx) { int err = 0; u16 reserved[ARRAY_SIZE(ctx->__reserved)]; - unsigned int vl = current->thread.sve_vl; + unsigned int vl = task_get_sve_vl(current); unsigned int vq = 0; if (test_thread_flag(TIF_SVE)) @@ -266,7 +266,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) if (__copy_from_user(&sve, user->sve, sizeof(sve))) return -EFAULT; - if (sve.vl != current->thread.sve_vl) + if (sve.vl != task_get_sve_vl(current)) return -EINVAL; if (sve.head.size <= sizeof(*user->sve)) { @@ -594,10 +594,10 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, unsigned int vq = 0; if (add_all || test_thread_flag(TIF_SVE)) { - int vl = sve_max_vl; + int vl = sve_max_vl(); if (!add_all) - vl = current->thread.sve_vl; + vl = task_get_sve_vl(current); vq = sve_vq_from_vl(vl); } diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 8982a2b78acf..c30624fff6ac 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -41,6 +41,9 @@ void start_backtrace(struct stackframe *frame, unsigned long fp, #ifdef CONFIG_FUNCTION_GRAPH_TRACER frame->graph = 0; #endif +#ifdef CONFIG_KRETPROBES + frame->kr_cur = NULL; +#endif /* * Prime the first unwind. @@ -129,6 +132,10 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) frame->pc = ret_stack->ret; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +#ifdef CONFIG_KRETPROBES + if (is_kretprobe_trampoline(frame->pc)) + frame->pc = kretprobe_find_ret_addr(tsk, (void *)frame->fp, &frame->kr_cur); +#endif frame->pc = ptrauth_strip_insn_pac(frame->pc); diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 4dd14a6620c1..9ab78ad826e2 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -103,6 +103,8 @@ int __init parse_acpi_topology(void) cpu_topology[cpu].thread_id = -1; cpu_topology[cpu].core_id = topology_id; } + topology_id = find_acpi_cpu_topology_cluster(cpu); + cpu_topology[cpu].cluster_id = topology_id; topology_id = find_acpi_cpu_topology_package(cpu); cpu_topology[cpu].package_id = topology_id; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b03e383d944a..7b21213a570f 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -400,11 +400,11 @@ static int call_undef_hook(struct pt_regs *regs) unsigned long flags; u32 instr; int (*fn)(struct pt_regs *regs, u32 instr) = NULL; - void __user *pc = (void __user *)instruction_pointer(regs); + unsigned long pc = instruction_pointer(regs); if (!user_mode(regs)) { __le32 instr_le; - if (get_kernel_nofault(instr_le, (__force __le32 *)pc)) + if (get_kernel_nofault(instr_le, (__le32 *)pc)) goto exit; instr = le32_to_cpu(instr_le); } else if (compat_thumb_mode(regs)) { @@ -527,14 +527,9 @@ NOKPROBE_SYMBOL(do_ptrauth_fault); "1: " insn ", %1\n" \ " mov %w0, #0\n" \ "2:\n" \ - " .pushsection .fixup,\"ax\"\n" \ - " .align 2\n" \ - "3: mov %w0, %w2\n" \ - " b 2b\n" \ - " .popsection\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) \ : "=r" (res) \ - : "r" (address), "i" (-EFAULT)); \ + : "r" (address)); \ uaccess_ttbr0_disable(); \ } @@ -654,6 +649,12 @@ static const struct sys64_hook sys64_hooks[] = { .handler = cntvct_read_handler, }, { + /* Trap read access to CNTVCTSS_EL0 */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTVCTSS, + .handler = cntvct_read_handler, + }, + { /* Trap read access to CNTFRQ_EL0 */ .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK, .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ, @@ -729,6 +730,11 @@ static const struct sys64_hook cp15_64_hooks[] = { .esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCT, .handler = compat_cntvct_read_handler, }, + { + .esr_mask = ESR_ELx_CP15_64_ISS_SYS_MASK, + .esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCTSS, + .handler = compat_cntvct_read_handler, + }, {}, }; diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 3dba0c4f8f42..c8fec493a450 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -10,18 +10,15 @@ include $(srctree)/lib/vdso/Makefile # Same as cc-*option, but using CC_COMPAT instead of CC ifeq ($(CONFIG_CC_IS_CLANG), y) -CC_COMPAT_CLANG_FLAGS := --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%)) - CC_COMPAT ?= $(CC) -CC_COMPAT += $(CC_COMPAT_CLANG_FLAGS) - -ifneq ($(LLVM),) -LD_COMPAT ?= $(LD) +CC_COMPAT += --target=arm-linux-gnueabi else -LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld +CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc endif + +ifeq ($(CONFIG_LD_IS_LLD), y) +LD_COMPAT ?= $(LD) else -CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld endif @@ -29,8 +26,6 @@ cc32-option = $(call try-run,\ $(CC_COMPAT) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) cc32-disable-warning = $(call try-run,\ $(CC_COMPAT) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) -cc32-as-instr = $(call try-run,\ - printf "%b\n" "$(1)" | $(CC_COMPAT) $(VDSO_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3)) # We cannot use the global flags to compile the vDSO files, the main reason # being that the 32-bit compiler may be older than the main (64-bit) compiler @@ -40,16 +35,13 @@ cc32-as-instr = $(call try-run,\ # As a result we set our own flags here. # KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile -VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include) +VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc +VDSO_CPPFLAGS += -isystem $(shell $(CC_COMPAT) -print-file-name=include 2>/dev/null) VDSO_CPPFLAGS += $(LINUXINCLUDE) # Common C and assembly flags # From top-level Makefile VDSO_CAFLAGS := $(VDSO_CPPFLAGS) -ifneq ($(shell $(CC_COMPAT) --version 2>&1 | head -n 1 | grep clang),) -VDSO_CAFLAGS += --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%)) -endif - VDSO_CAFLAGS += $(call cc32-option,-fno-PIE) ifdef CONFIG_DEBUG_INFO VDSO_CAFLAGS += -g @@ -67,13 +59,7 @@ endif # From arm vDSO Makefile VDSO_CAFLAGS += -fPIC -fno-builtin -fno-stack-protector VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING - - -# Try to compile for ARMv8. If the compiler is too old and doesn't support it, -# fall back to v7. There is no easy way to check for what architecture the code -# is being compiled, so define a macro specifying that (see arch/arm/Makefile). -VDSO_CAFLAGS += $(call cc32-option,-march=armv8-a -D__LINUX_ARM_ARCH__=8,\ - -march=armv7-a -D__LINUX_ARM_ARCH__=7) +VDSO_CAFLAGS += -march=armv8-a VDSO_CFLAGS := $(VDSO_CAFLAGS) VDSO_CFLAGS += -DENABLE_COMPAT_VDSO=1 @@ -113,12 +99,6 @@ endif VDSO_AFLAGS := $(VDSO_CAFLAGS) VDSO_AFLAGS += -D__ASSEMBLY__ -# Check for binutils support for dmb ishld -dmbinstr := $(call cc32-as-instr,dmb ishld,-DCONFIG_AS_DMB_ISHLD=1) - -VDSO_CFLAGS += $(dmbinstr) -VDSO_AFLAGS += $(dmbinstr) - # From arm vDSO Makefile VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1 VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096 diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index f6b1a88245db..50bab186c49b 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -57,12 +57,13 @@ #define SBSS_ALIGN 0 #endif -#define RO_EXCEPTION_TABLE_ALIGN 8 +#define RO_EXCEPTION_TABLE_ALIGN 4 #define RUNTIME_DISCARD_EXIT #include <asm-generic/vmlinux.lds.h> #include <asm/cache.h> #include <asm/kernel-pgtable.h> +#include <asm/kexec.h> #include <asm/memory.h> #include <asm/page.h> @@ -100,6 +101,16 @@ jiffies = jiffies_64; #define HIBERNATE_TEXT #endif +#ifdef CONFIG_KEXEC_CORE +#define KEXEC_TEXT \ + . = ALIGN(SZ_4K); \ + __relocate_new_kernel_start = .; \ + *(.kexec_relocate.text) \ + __relocate_new_kernel_end = .; +#else +#define KEXEC_TEXT +#endif + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 #define TRAMP_TEXT \ . = ALIGN(PAGE_SIZE); \ @@ -160,8 +171,8 @@ SECTIONS HYPERVISOR_TEXT IDMAP_TEXT HIBERNATE_TEXT + KEXEC_TEXT TRAMP_TEXT - *(.fixup) *(.gnu.warning) . = ALIGN(16); *(.got) /* Global offset table */ @@ -348,3 +359,10 @@ ASSERT(swapper_pg_dir - reserved_pg_dir == RESERVED_SWAPPER_OFFSET, ASSERT(swapper_pg_dir - tramp_pg_dir == TRAMP_SWAPPER_OFFSET, "TRAMP_SWAPPER_OFFSET is wrong!") #endif + +#ifdef CONFIG_KEXEC_CORE +/* kexec relocation code should fit into one KEXEC_CONTROL_PAGE_SIZE */ +ASSERT(__relocate_new_kernel_end - (__relocate_new_kernel_start & ~(SZ_4K - 1)) + <= SZ_4K, "kexec relocation code is too big or misaligned") +ASSERT(KEXEC_CONTROL_PAGE_SIZE >= SZ_4K, "KEXEC_CONTROL_PAGE_SIZE is broken") +#endif diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S index 3c635929771a..e950875e31ce 100644 --- a/arch/arm64/kvm/hyp/fpsimd.S +++ b/arch/arm64/kvm/hyp/fpsimd.S @@ -21,11 +21,13 @@ SYM_FUNC_START(__fpsimd_restore_state) SYM_FUNC_END(__fpsimd_restore_state) SYM_FUNC_START(__sve_restore_state) - __sve_load 0, x1, 2 + mov x2, #1 + sve_load 0, x1, x2, 3 ret SYM_FUNC_END(__sve_restore_state) SYM_FUNC_START(__sve_save_state) - sve_save 0, x1, 2 + mov x2, #1 + sve_save 0, x1, x2, 3 ret SYM_FUNC_END(__sve_save_state) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index c6e98c7e918b..7a0af1d39303 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -31,8 +31,12 @@ #include <asm/processor.h> #include <asm/thread_info.h> -extern struct exception_table_entry __start___kvm_ex_table; -extern struct exception_table_entry __stop___kvm_ex_table; +struct kvm_exception_table_entry { + int insn, fixup; +}; + +extern struct kvm_exception_table_entry __start___kvm_ex_table; +extern struct kvm_exception_table_entry __stop___kvm_ex_table; /* Check whether the FP regs were dirtied while in the host-side run loop: */ static inline bool update_fp_enabled(struct kvm_vcpu *vcpu) @@ -469,7 +473,7 @@ static inline void __kvm_unexpected_el2_exception(void) { extern char __guest_exit_panic[]; unsigned long addr, fixup; - struct exception_table_entry *entry, *end; + struct kvm_exception_table_entry *entry, *end; unsigned long elr_el2 = read_sysreg(elr_el2); entry = &__start___kvm_ex_table; diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index acad22ebac12..426bd7fbc3fd 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -46,7 +46,7 @@ unsigned int kvm_sve_max_vl; int kvm_arm_init_sve(void) { if (system_supports_sve()) { - kvm_sve_max_vl = sve_max_virtualisable_vl; + kvm_sve_max_vl = sve_max_virtualisable_vl(); /* * The get_sve_reg()/set_sve_reg() ioctl interface will need @@ -61,7 +61,7 @@ int kvm_arm_init_sve(void) * Don't even try to make use of vector lengths that * aren't available on all CPUs, for now: */ - if (kvm_sve_max_vl < sve_max_vl) + if (kvm_sve_max_vl < sve_max_vl()) pr_warn("KVM: SVE vector length for guests limited to %u bytes\n", kvm_sve_max_vl); } @@ -102,7 +102,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu) * kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and * set_sve_vls(). Double-check here just to be sure: */ - if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl || + if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl() || vl > SVE_VL_ARCH_MAX)) return -EIO; diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index a7efb2ad2a1c..a5a5f5b97b17 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -4,7 +4,7 @@ */ #include <linux/linkage.h> -#include <asm/assembler.h> +#include <asm/asm-uaccess.h> .text @@ -45,13 +45,11 @@ USER(9f, sttrh wzr, [x0]) USER(7f, sttrb wzr, [x2, #-1]) 5: mov x0, #0 ret -SYM_FUNC_END(__arch_clear_user) -EXPORT_SYMBOL(__arch_clear_user) - .section .fixup,"ax" - .align 2 + // Exception fixups 7: sub x0, x2, #5 // Adjust for faulting on the final byte... 8: add x0, x0, #4 // ...or the second word of the 4-7 byte case 9: sub x0, x2, x0 ret - .previous +SYM_FUNC_END(__arch_clear_user) +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 2cf999e41d30..34e317907524 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -60,11 +60,8 @@ SYM_FUNC_START(__arch_copy_from_user) #include "copy_template.S" mov x0, #0 // Nothing to copy ret -SYM_FUNC_END(__arch_copy_from_user) -EXPORT_SYMBOL(__arch_copy_from_user) - .section .fixup,"ax" - .align 2 + // Exception fixups 9997: cmp dst, dstin b.ne 9998f // Before being absolutely sure we couldn't copy anything, try harder @@ -72,4 +69,5 @@ USER(9998f, ldtrb tmp1w, [srcin]) strb tmp1w, [dst], #1 9998: sub x0, end, dst // bytes not copied ret - .previous +SYM_FUNC_END(__arch_copy_from_user) +EXPORT_SYMBOL(__arch_copy_from_user) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 9f380eecf653..802231772608 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -59,11 +59,8 @@ SYM_FUNC_START(__arch_copy_to_user) #include "copy_template.S" mov x0, #0 ret -SYM_FUNC_END(__arch_copy_to_user) -EXPORT_SYMBOL(__arch_copy_to_user) - .section .fixup,"ax" - .align 2 + // Exception fixups 9997: cmp dst, dstin b.ne 9998f // Before being absolutely sure we couldn't copy anything, try harder @@ -72,4 +69,5 @@ USER(9998f, sttrb tmp1w, [dst]) add dst, dst, #1 9998: sub x0, end, dst // bytes not copied ret - .previous +SYM_FUNC_END(__arch_copy_to_user) +EXPORT_SYMBOL(__arch_copy_to_user) diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index f188c9092696..ff1e800ba7a1 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP_CORE) += ptdump.o obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o +obj-$(CONFIG_TRANS_TABLE) += trans_pgd-asm.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_ARM64_MTE) += mteswap.o KASAN_SANITIZE_physaddr.o += n diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index aa0060178343..c3d53811a15e 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -3,20 +3,87 @@ * Based on arch/arm/mm/extable.c */ +#include <linux/bitfield.h> #include <linux/extable.h> #include <linux/uaccess.h> -int fixup_exception(struct pt_regs *regs) +#include <asm/asm-extable.h> +#include <asm/ptrace.h> + +typedef bool (*ex_handler_t)(const struct exception_table_entry *, + struct pt_regs *); + +static inline unsigned long +get_ex_fixup(const struct exception_table_entry *ex) +{ + return ((unsigned long)&ex->fixup + ex->fixup); +} + +static bool ex_handler_fixup(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + regs->pc = get_ex_fixup(ex); + return true; +} + +static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); + int reg_zero = FIELD_GET(EX_DATA_REG_ZERO, ex->data); + + pt_regs_write_reg(regs, reg_err, -EFAULT); + pt_regs_write_reg(regs, reg_zero, 0); + + regs->pc = get_ex_fixup(ex); + return true; +} + +static bool +ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->type); + int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->type); + unsigned long data, addr, offset; + + addr = pt_regs_read_reg(regs, reg_addr); + + offset = addr & 0x7UL; + addr &= ~0x7UL; + + data = *(unsigned long*)addr; + +#ifndef __AARCH64EB__ + data >>= 8 * offset; +#else + data <<= 8 * offset; +#endif + + pt_regs_write_reg(regs, reg_data, data); + + regs->pc = get_ex_fixup(ex); + return true; +} + +bool fixup_exception(struct pt_regs *regs) { - const struct exception_table_entry *fixup; + const struct exception_table_entry *ex; - fixup = search_exception_tables(instruction_pointer(regs)); - if (!fixup) - return 0; + ex = search_exception_tables(instruction_pointer(regs)); + if (!ex) + return false; - if (in_bpf_jit(regs)) - return arm64_bpf_fixup_exception(fixup, regs); + switch (ex->type) { + case EX_TYPE_FIXUP: + return ex_handler_fixup(ex, regs); + case EX_TYPE_BPF: + return ex_handler_bpf(ex, regs); + case EX_TYPE_UACCESS_ERR_ZERO: + return ex_handler_uaccess_err_zero(ex, regs); + case EX_TYPE_LOAD_UNALIGNED_ZEROPAD: + return ex_handler_load_unaligned_zeropad(ex, regs); + } - regs->pc = (unsigned long)&fixup->fixup + fixup->fixup; - return 1; + BUG(); } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 23505fc35324..ffb9c229610a 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -40,11 +40,11 @@ void __init arm64_hugetlb_cma_reserve(void) { int order; -#ifdef CONFIG_ARM64_4K_PAGES - order = PUD_SHIFT - PAGE_SHIFT; -#else - order = CONT_PMD_SHIFT + PMD_SHIFT - PAGE_SHIFT; -#endif + if (pud_sect_supported()) + order = PUD_SHIFT - PAGE_SHIFT; + else + order = CONT_PMD_SHIFT - PAGE_SHIFT; + /* * HugeTLB CMA reservation is required for gigantic * huge pages which could not be allocated via the @@ -62,8 +62,9 @@ bool arch_hugetlb_migration_supported(struct hstate *h) size_t pagesize = huge_page_size(h); switch (pagesize) { -#ifdef CONFIG_ARM64_4K_PAGES +#ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: + return pud_sect_supported(); #endif case PMD_SIZE: case CONT_PMD_SIZE: @@ -126,8 +127,11 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize) *pgsize = size; switch (size) { -#ifdef CONFIG_ARM64_4K_PAGES +#ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: + if (pud_sect_supported()) + contig_ptes = 1; + break; #endif case PMD_SIZE: contig_ptes = 1; @@ -489,9 +493,9 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma, static int __init hugetlbpage_init(void) { -#ifdef CONFIG_ARM64_4K_PAGES - hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); -#endif + if (pud_sect_supported()) + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + hugetlb_add_hstate(CONT_PMD_SHIFT - PAGE_SHIFT); hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); hugetlb_add_hstate(CONT_PTE_SHIFT - PAGE_SHIFT); @@ -503,8 +507,9 @@ arch_initcall(hugetlbpage_init); bool __init arch_hugetlb_valid_size(unsigned long size) { switch (size) { -#ifdef CONFIG_ARM64_4K_PAGES +#ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: + return pud_sect_supported(); #endif case CONT_PMD_SIZE: case PMD_SIZE: diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 37a81754d9b6..a8834434af99 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -160,43 +160,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) free_area_init(max_zone_pfns); } -int pfn_valid(unsigned long pfn) -{ - phys_addr_t addr = PFN_PHYS(pfn); - struct mem_section *ms; - - /* - * Ensure the upper PAGE_SHIFT bits are clear in the - * pfn. Else it might lead to false positives when - * some of the upper bits are set, but the lower bits - * match a valid pfn. - */ - if (PHYS_PFN(addr) != pfn) - return 0; - - if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) - return 0; - - ms = __pfn_to_section(pfn); - if (!valid_section(ms)) - return 0; - - /* - * ZONE_DEVICE memory does not have the memblock entries. - * memblock_is_map_memory() check for ZONE_DEVICE based - * addresses will always fail. Even the normal hotplugged - * memory will never have MEMBLOCK_NOMAP flag set in their - * memblock entries. Skip memblock search for all non early - * memory sections covering all of hotplug memory including - * both normal and ZONE_DEVICE based. - */ - if (!early_section(ms)) - return pfn_section_valid(ms, pfn); - - return memblock_is_memory(addr); -} -EXPORT_SYMBOL(pfn_valid); - int pfn_is_map_memory(unsigned long pfn) { phys_addr_t addr = PFN_PHYS(pfn); @@ -416,8 +379,6 @@ void __init mem_init(void) else if (!xen_swiotlb_detect()) swiotlb_force = SWIOTLB_NO_FORCE; - set_max_mapnr(max_pfn - PHYS_PFN_OFFSET); - /* this will put all unused low memory onto the freelists */ memblock_free_all(); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index cfd9deb347c3..fd85b51b9d50 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1499,6 +1499,11 @@ int arch_add_memory(int nid, u64 start, u64 size, if (ret) __remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size); + else { + max_pfn = PFN_UP(start + size); + max_low_pfn = max_pfn; + } + return ret; } diff --git a/arch/arm64/mm/trans_pgd-asm.S b/arch/arm64/mm/trans_pgd-asm.S new file mode 100644 index 000000000000..021c31573bcb --- /dev/null +++ b/arch/arm64/mm/trans_pgd-asm.S @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * Copyright (c) 2021, Microsoft Corporation. + * Pasha Tatashin <pasha.tatashin@soleen.com> + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/kvm_asm.h> + +.macro invalid_vector label +SYM_CODE_START_LOCAL(\label) + .align 7 + b \label +SYM_CODE_END(\label) +.endm + +.macro el1_sync_vector +SYM_CODE_START_LOCAL(el1_sync) + .align 7 + cmp x0, #HVC_SET_VECTORS /* Called from hibernate */ + b.ne 1f + msr vbar_el2, x1 + mov x0, xzr + eret +1: cmp x0, #HVC_SOFT_RESTART /* Called from kexec */ + b.ne 2f + mov x0, x2 + mov x2, x4 + mov x4, x1 + mov x1, x3 + br x4 +2: /* Unexpected argument, set an error */ + mov_q x0, HVC_STUB_ERR + eret +SYM_CODE_END(el1_sync) +.endm + +SYM_CODE_START(trans_pgd_stub_vectors) + invalid_vector hyp_stub_el2t_sync_invalid // Synchronous EL2t + invalid_vector hyp_stub_el2t_irq_invalid // IRQ EL2t + invalid_vector hyp_stub_el2t_fiq_invalid // FIQ EL2t + invalid_vector hyp_stub_el2t_error_invalid // Error EL2t + + invalid_vector hyp_stub_el2h_sync_invalid // Synchronous EL2h + invalid_vector hyp_stub_el2h_irq_invalid // IRQ EL2h + invalid_vector hyp_stub_el2h_fiq_invalid // FIQ EL2h + invalid_vector hyp_stub_el2h_error_invalid // Error EL2h + + el1_sync_vector // Synchronous 64-bit EL1 + invalid_vector hyp_stub_el1_irq_invalid // IRQ 64-bit EL1 + invalid_vector hyp_stub_el1_fiq_invalid // FIQ 64-bit EL1 + invalid_vector hyp_stub_el1_error_invalid // Error 64-bit EL1 + + invalid_vector hyp_stub_32b_el1_sync_invalid // Synchronous 32-bit EL1 + invalid_vector hyp_stub_32b_el1_irq_invalid // IRQ 32-bit EL1 + invalid_vector hyp_stub_32b_el1_fiq_invalid // FIQ 32-bit EL1 + invalid_vector hyp_stub_32b_el1_error_invalid // Error 32-bit EL1 + .align 11 +SYM_INNER_LABEL(__trans_pgd_stub_vectors_end, SYM_L_LOCAL) +SYM_CODE_END(trans_pgd_stub_vectors) + +# Check the trans_pgd_stub_vectors didn't overflow +.org . - (__trans_pgd_stub_vectors_end - trans_pgd_stub_vectors) + SZ_2K diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c index 527f0a39c3da..d7da8ca40d2e 100644 --- a/arch/arm64/mm/trans_pgd.c +++ b/arch/arm64/mm/trans_pgd.c @@ -5,8 +5,8 @@ * * This file derived from: arch/arm64/kernel/hibernate.c * - * Copyright (c) 2020, Microsoft Corporation. - * Pavel Tatashin <pasha.tatashin@soleen.com> + * Copyright (c) 2021, Microsoft Corporation. + * Pasha Tatashin <pasha.tatashin@soleen.com> * */ @@ -218,63 +218,6 @@ int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **dst_pgdp, } /* - * Add map entry to trans_pgd for a base-size page at PTE level. - * info: contains allocator and its argument - * trans_pgd: page table in which new map is added. - * page: page to be mapped. - * dst_addr: new VA address for the page - * pgprot: protection for the page. - * - * Returns 0 on success, and -ENOMEM on failure. - */ -int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd, - void *page, unsigned long dst_addr, pgprot_t pgprot) -{ - pgd_t *pgdp; - p4d_t *p4dp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - - pgdp = pgd_offset_pgd(trans_pgd, dst_addr); - if (pgd_none(READ_ONCE(*pgdp))) { - p4dp = trans_alloc(info); - if (!pgdp) - return -ENOMEM; - pgd_populate(NULL, pgdp, p4dp); - } - - p4dp = p4d_offset(pgdp, dst_addr); - if (p4d_none(READ_ONCE(*p4dp))) { - pudp = trans_alloc(info); - if (!pudp) - return -ENOMEM; - p4d_populate(NULL, p4dp, pudp); - } - - pudp = pud_offset(p4dp, dst_addr); - if (pud_none(READ_ONCE(*pudp))) { - pmdp = trans_alloc(info); - if (!pmdp) - return -ENOMEM; - pud_populate(NULL, pudp, pmdp); - } - - pmdp = pmd_offset(pudp, dst_addr); - if (pmd_none(READ_ONCE(*pmdp))) { - ptep = trans_alloc(info); - if (!ptep) - return -ENOMEM; - pmd_populate_kernel(NULL, pmdp, ptep); - } - - ptep = pte_offset_kernel(pmdp, dst_addr); - set_pte(ptep, pfn_pte(virt_to_pfn(page), pgprot)); - - return 0; -} - -/* * The page we want to idmap may be outside the range covered by VA_BITS that * can be built using the kernel's p?d_populate() helpers. As a one off, for a * single page, we build these page tables bottom up and just assume that will @@ -322,3 +265,26 @@ int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, return 0; } + +/* + * Create a copy of the vector table so we can call HVC_SET_VECTORS or + * HVC_SOFT_RESTART from contexts where the table may be overwritten. + */ +int trans_pgd_copy_el2_vectors(struct trans_pgd_info *info, + phys_addr_t *el2_vectors) +{ + void *hyp_stub = trans_alloc(info); + + if (!hyp_stub) + return -ENOMEM; + *el2_vectors = virt_to_phys(hyp_stub); + memcpy(hyp_stub, &trans_pgd_stub_vectors, ARM64_VECTOR_TABLE_LEN); + caches_clean_inval_pou((unsigned long)hyp_stub, + (unsigned long)hyp_stub + + ARM64_VECTOR_TABLE_LEN); + dcache_clean_inval_poc((unsigned long)hyp_stub, + (unsigned long)hyp_stub + + ARM64_VECTOR_TABLE_LEN); + + return 0; +} diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 41c23f474ea6..3a8a7140a9bf 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -13,6 +13,7 @@ #include <linux/printk.h> #include <linux/slab.h> +#include <asm/asm-extable.h> #include <asm/byteorder.h> #include <asm/cacheflush.h> #include <asm/debug-monitors.h> @@ -358,15 +359,15 @@ static void build_epilogue(struct jit_ctx *ctx) #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) #define BPF_FIXUP_REG_MASK GENMASK(31, 27) -int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, - struct pt_regs *regs) +bool ex_handler_bpf(const struct exception_table_entry *ex, + struct pt_regs *regs) { off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); regs->regs[dst_reg] = 0; regs->pc = (unsigned long)&ex->fixup - offset; - return 1; + return true; } /* For accesses to BTF pointers, add an entry to the exception table */ @@ -412,6 +413,8 @@ static int add_exception_handler(const struct bpf_insn *insn, ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); + ex->type = EX_TYPE_BPF; + ctx->exentry_idx++; return 0; } @@ -1136,6 +1139,11 @@ out: return prog; } +u64 bpf_jit_alloc_exec_limit(void) +{ + return BPF_JIT_REGION_SIZE; +} + void *bpf_jit_alloc_exec(unsigned long size) { return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 49305c2e6dfd..870c39537dd0 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -18,6 +18,7 @@ HAS_CRC32 HAS_DCPODP HAS_DCPOP HAS_E0PD +HAS_ECV HAS_EPAN HAS_GENERIC_AUTH HAS_GENERIC_AUTH_ARCH @@ -39,6 +40,7 @@ HW_DBM KVM_PROTECTED_MODE MISMATCHED_CACHE_TYPE MTE +MTE_ASYMM SPECTRE_V2 SPECTRE_V3A SPECTRE_V4 @@ -53,6 +55,9 @@ WORKAROUND_1418040 WORKAROUND_1463225 WORKAROUND_1508412 WORKAROUND_1542419 +WORKAROUND_TRBE_OVERWRITE_FILL_MODE +WORKAROUND_TSB_FLUSH_FAILURE +WORKAROUND_TRBE_WRITE_OUT_OF_RANGE WORKAROUND_CAVIUM_23154 WORKAROUND_CAVIUM_27456 WORKAROUND_CAVIUM_30115 |