diff options
Diffstat (limited to 'arch')
193 files changed, 2206 insertions, 1057 deletions
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 6d5eb8267e42..a045f3086047 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -9,6 +9,7 @@ config ARC def_bool y select ARC_TIMERS + select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_HAS_SG_CHAIN @@ -28,8 +29,12 @@ config ARC select GENERIC_SMP_IDLE_THREAD select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK + select HAVE_DEBUG_STACKOVERFLOW select HAVE_FUTEX_CMPXCHG if FUTEX + select HAVE_GENERIC_DMA_COHERENT select HAVE_IOREMAP_PROT + select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZMA select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_MEMBLOCK @@ -44,11 +49,6 @@ config ARC select OF_EARLY_FLATTREE select OF_RESERVED_MEM select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING - select HAVE_DEBUG_STACKOVERFLOW - select HAVE_GENERIC_DMA_COHERENT - select HAVE_KERNEL_GZIP - select HAVE_KERNEL_LZMA - select ARCH_HAS_PTE_SPECIAL config ARCH_HAS_CACHE_LINE_SIZE def_bool y @@ -149,7 +149,7 @@ config ARC_CPU_770 Support for ARC770 core introduced with Rel 4.10 (Summer 2011) This core has a bunch of cool new features: -MMU-v3: Variable Page Sz (4k, 8k, 16k), bigger J-TLB (128x4) - Shared Address Spaces (for sharing TLB entires in MMU) + Shared Address Spaces (for sharing TLB entries in MMU) -Caches: New Prog Model, Region Flush -Insns: endian swap, load-locked/store-conditional, time-stamp-ctr diff --git a/arch/arc/Makefile b/arch/arc/Makefile index fb026196aaab..644815c0516e 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -6,33 +6,11 @@ # published by the Free Software Foundation. # -ifeq ($(CROSS_COMPILE),) -ifndef CONFIG_CPU_BIG_ENDIAN -CROSS_COMPILE := arc-linux- -else -CROSS_COMPILE := arceb-linux- -endif -endif - KBUILD_DEFCONFIG := nsim_700_defconfig cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 -cflags-$(CONFIG_ISA_ARCV2) += -mcpu=archs - -is_700 = $(shell $(CC) -dM -E - < /dev/null | grep -q "ARC700" && echo 1 || echo 0) - -ifdef CONFIG_ISA_ARCOMPACT -ifeq ($(is_700), 0) - $(error Toolchain not configured for ARCompact builds) -endif -endif - -ifdef CONFIG_ISA_ARCV2 -ifeq ($(is_700), 1) - $(error Toolchain not configured for ARCv2 builds) -endif -endif +cflags-$(CONFIG_ISA_ARCV2) += -mcpu=hs38 ifdef CONFIG_ARC_CURR_IN_REG # For a global register defintion, make sure it gets passed to every file @@ -43,10 +21,7 @@ ifdef CONFIG_ARC_CURR_IN_REG LINUXINCLUDE += -include ${src}/arch/arc/include/asm/current.h endif -upto_gcc44 := $(call cc-ifversion, -le, 0404, y) -atleast_gcc44 := $(call cc-ifversion, -ge, 0404, y) - -cflags-$(atleast_gcc44) += -fsection-anchors +cflags-y += -fsection-anchors cflags-$(CONFIG_ARC_HAS_LLSC) += -mlock cflags-$(CONFIG_ARC_HAS_SWAPE) += -mswape @@ -82,12 +57,7 @@ cflags-$(disable_small_data) += -mno-sdata -fcall-used-gp cflags-$(CONFIG_CPU_BIG_ENDIAN) += -mbig-endian ldflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB -# STAR 9000518362: (fixed with binutils shipping with gcc 4.8) -# arc-linux-uclibc-ld (buildroot) or arceb-elf32-ld (EZChip) don't accept -# --build-id w/o "-marclinux". Default arc-elf32-ld is OK -ldflags-$(upto_gcc44) += -marclinux - -LIBGCC := $(shell $(CC) $(cflags-y) --print-libgcc-file-name) +LIBGCC = $(shell $(CC) $(cflags-y) --print-libgcc-file-name) # Modules with short calls might break for calls into builtin-kernel KBUILD_CFLAGS_MODULE += -mlong-calls -mno-millicode diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index dc91c663bcc0..d75d65ddf8e3 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -94,6 +94,32 @@ }; /* + * Mark DMA peripherals connected via IOC port as dma-coherent. We do + * it via overlay because peripherals defined in axs10x_mb.dtsi are + * used for both AXS101 and AXS103 boards and only AXS103 has IOC (so + * only AXS103 board has HW-coherent DMA peripherals) + * We don't need to mark pgu@17000 as dma-coherent because it uses + * external DMA buffer located outside of IOC aperture. + */ + axs10x_mb { + ethernet@0x18000 { + dma-coherent; + }; + + ehci@0x40000 { + dma-coherent; + }; + + ohci@0x60000 { + dma-coherent; + }; + + mmc@0x15000 { + dma-coherent; + }; + }; + + /* * The DW APB ICTL intc on MB is connected to CPU intc via a * DT "invisible" DW APB GPIO block, configured to simply pass thru * interrupts - setup accordinly in platform init (plat-axs10x/ax10x.c) diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 69ff4895f2ba..a05bb737ea63 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -101,6 +101,32 @@ }; /* + * Mark DMA peripherals connected via IOC port as dma-coherent. We do + * it via overlay because peripherals defined in axs10x_mb.dtsi are + * used for both AXS101 and AXS103 boards and only AXS103 has IOC (so + * only AXS103 board has HW-coherent DMA peripherals) + * We don't need to mark pgu@17000 as dma-coherent because it uses + * external DMA buffer located outside of IOC aperture. + */ + axs10x_mb { + ethernet@0x18000 { + dma-coherent; + }; + + ehci@0x40000 { + dma-coherent; + }; + + ohci@0x60000 { + dma-coherent; + }; + + mmc@0x15000 { + dma-coherent; + }; + }; + + /* * This INTC is actually connected to DW APB GPIO * which acts as a wire between MB INTC and CPU INTC. * GPIO INTC is configured in platform init code diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index 47b74fbc403c..37bafd44e36d 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -9,6 +9,10 @@ */ / { + aliases { + ethernet = &gmac; + }; + axs10x_mb { compatible = "simple-bus"; #address-cells = <1>; @@ -68,7 +72,7 @@ }; }; - ethernet@0x18000 { + gmac: ethernet@0x18000 { #interrupt-cells = <1>; compatible = "snps,dwmac"; reg = < 0x18000 0x2000 >; @@ -81,6 +85,7 @@ max-speed = <100>; resets = <&creg_rst 5>; reset-names = "stmmaceth"; + mac-address = [00 00 00 00 00 00]; /* Filled in by U-Boot */ }; ehci@0x40000 { diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 006aa3de5348..ef149f59929a 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -25,6 +25,10 @@ bootargs = "earlycon=uart8250,mmio32,0xf0005000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1"; }; + aliases { + ethernet = &gmac; + }; + cpus { #address-cells = <1>; #size-cells = <0>; @@ -163,7 +167,7 @@ #clock-cells = <0>; }; - ethernet@8000 { + gmac: ethernet@8000 { #interrupt-cells = <1>; compatible = "snps,dwmac"; reg = <0x8000 0x2000>; @@ -176,6 +180,8 @@ phy-handle = <&phy0>; resets = <&cgu_rst HSDK_ETH_RESET>; reset-names = "stmmaceth"; + mac-address = [00 00 00 00 00 00]; /* Filled in by U-Boot */ + dma-coherent; mdio { #address-cells = <1>; @@ -194,12 +200,14 @@ compatible = "snps,hsdk-v1.0-ohci", "generic-ohci"; reg = <0x60000 0x100>; interrupts = <15>; + dma-coherent; }; ehci@40000 { compatible = "snps,hsdk-v1.0-ehci", "generic-ehci"; reg = <0x40000 0x100>; interrupts = <15>; + dma-coherent; }; mmc@a000 { @@ -212,6 +220,7 @@ clock-names = "biu", "ciu"; interrupts = <12>; bus-width = <4>; + dma-coherent; }; }; diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig index a635ea972304..41bc08be6a3b 100644 --- a/arch/arc/configs/axs101_defconfig +++ b/arch/arc/configs/axs101_defconfig @@ -1,5 +1,3 @@ -CONFIG_DEFAULT_HOSTNAME="ARCLinux" -# CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y # CONFIG_CROSS_MEMORY_ATTACH is not set @@ -63,7 +61,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y CONFIG_MOUSE_SERIAL=y CONFIG_MOUSE_SYNAPTICS_USB=y # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DW=y diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig index aa507e423075..1e1c4a8011b5 100644 --- a/arch/arc/configs/axs103_defconfig +++ b/arch/arc/configs/axs103_defconfig @@ -1,5 +1,3 @@ -CONFIG_DEFAULT_HOSTNAME="ARCLinux" -# CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y # CONFIG_CROSS_MEMORY_ATTACH is not set @@ -64,7 +62,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y CONFIG_MOUSE_SERIAL=y CONFIG_MOUSE_SYNAPTICS_USB=y # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DW=y diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig index eba07f468654..6b0c0cfd5c30 100644 --- a/arch/arc/configs/axs103_smp_defconfig +++ b/arch/arc/configs/axs103_smp_defconfig @@ -1,5 +1,3 @@ -CONFIG_DEFAULT_HOSTNAME="ARCLinux" -# CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y # CONFIG_CROSS_MEMORY_ATTACH is not set @@ -65,7 +63,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y CONFIG_MOUSE_SERIAL=y CONFIG_MOUSE_SYNAPTICS_USB=y # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DW=y diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig index 098b19fbaa51..240dd2cd5148 100644 --- a/arch/arc/configs/haps_hs_defconfig +++ b/arch/arc/configs/haps_hs_defconfig @@ -1,4 +1,3 @@ -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -57,7 +56,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y # CONFIG_SERIO_SERPORT is not set CONFIG_SERIO_ARC_PS2=y # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=1 diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig index 0104c404d897..14ae7e5acc7c 100644 --- a/arch/arc/configs/haps_hs_smp_defconfig +++ b/arch/arc/configs/haps_hs_smp_defconfig @@ -1,4 +1,3 @@ -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -60,7 +59,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y # CONFIG_SERIO_SERPORT is not set CONFIG_SERIO_ARC_PS2=y # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=1 diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 6491be0ddbc9..1dec2b4bc5e6 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -1,4 +1,3 @@ -CONFIG_DEFAULT_HOSTNAME="ARCLinux" CONFIG_SYSVIPC=y # CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_NO_HZ_IDLE=y diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig index 7c9c706ae7f6..31ba224bbfb4 100644 --- a/arch/arc/configs/nps_defconfig +++ b/arch/arc/configs/nps_defconfig @@ -59,7 +59,6 @@ CONFIG_NETCONSOLE=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=1 diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index 99e05cf63fca..8e0b8b134cd9 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig @@ -1,5 +1,4 @@ # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -44,7 +43,6 @@ CONFIG_LXT_PHY=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_ARC=y CONFIG_SERIAL_ARC_CONSOLE=y # CONFIG_HW_RANDOM is not set diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig index 0dc4f9b737e7..739b90e5e893 100644 --- a/arch/arc/configs/nsim_hs_defconfig +++ b/arch/arc/configs/nsim_hs_defconfig @@ -1,5 +1,4 @@ # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -45,7 +44,6 @@ CONFIG_DEVTMPFS=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_ARC=y CONFIG_SERIAL_ARC_CONSOLE=y # CONFIG_HW_RANDOM is not set diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig index be3c30a15e54..b5895bdf3a93 100644 --- a/arch/arc/configs/nsim_hs_smp_defconfig +++ b/arch/arc/configs/nsim_hs_smp_defconfig @@ -1,5 +1,4 @@ # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set # CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_HIGH_RES_TIMERS=y @@ -44,7 +43,6 @@ CONFIG_DEVTMPFS=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_ARC=y CONFIG_SERIAL_ARC_CONSOLE=y # CONFIG_HW_RANDOM is not set diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 3a74b9b21772..f14eeff7d308 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -1,5 +1,4 @@ # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y # CONFIG_CROSS_MEMORY_ATTACH is not set @@ -48,7 +47,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y # CONFIG_SERIO_SERPORT is not set CONFIG_SERIO_ARC_PS2=y # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=1 diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index ea2834b4dc1d..025298a48305 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -1,5 +1,4 @@ # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y # CONFIG_CROSS_MEMORY_ATTACH is not set @@ -47,7 +46,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y # CONFIG_SERIO_SERPORT is not set CONFIG_SERIO_ARC_PS2=y # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=1 diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index 80a5a1b4924b..df7b77b13b82 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -1,4 +1,3 @@ -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_SWAP is not set CONFIG_SYSVIPC=y # CONFIG_CROSS_MEMORY_ATTACH is not set @@ -58,7 +57,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y # CONFIG_SERIO_SERPORT is not set CONFIG_SERIO_ARC_PS2=y # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=1 diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig index 2cc87f909747..a7f65313f84a 100644 --- a/arch/arc/configs/tb10x_defconfig +++ b/arch/arc/configs/tb10x_defconfig @@ -57,7 +57,6 @@ CONFIG_STMMAC_ETH=y # CONFIG_SERIO is not set # CONFIG_VT is not set # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=1 diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig index f629493929ea..db47c3541f15 100644 --- a/arch/arc/configs/vdk_hs38_defconfig +++ b/arch/arc/configs/vdk_hs38_defconfig @@ -1,5 +1,4 @@ # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y @@ -53,7 +52,6 @@ CONFIG_NATIONAL_PHY=y CONFIG_MOUSE_PS2_TOUCHKIT=y CONFIG_SERIO_ARC_PS2=y # CONFIG_LEGACY_PTYS is not set -# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DW=y diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig index 21f0ca26a05d..a8ac5e917d9a 100644 --- a/arch/arc/configs/vdk_hs38_smp_defconfig +++ b/arch/arc/configs/vdk_hs38_smp_defconfig @@ -1,5 +1,4 @@ # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_DEFAULT_HOSTNAME="ARCLinux" # CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 4e0072730241..158af079838d 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -84,7 +84,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ "1: llock %[orig], [%[ctr]] \n" \ " " #asm_op " %[val], %[orig], %[i] \n" \ " scond %[val], [%[ctr]] \n" \ - " \n" \ + " bnz 1b \n" \ : [val] "=&r" (val), \ [orig] "=&r" (orig) \ : [ctr] "r" (&v->counter), \ diff --git a/arch/arc/include/asm/dma-mapping.h b/arch/arc/include/asm/dma-mapping.h new file mode 100644 index 000000000000..c946c0a83e76 --- /dev/null +++ b/arch/arc/include/asm/dma-mapping.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +// (C) 2018 Synopsys, Inc. (www.synopsys.com) + +#ifndef ASM_ARC_DMA_MAPPING_H +#define ASM_ARC_DMA_MAPPING_H + +#include <asm-generic/dma-mapping.h> + +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *iommu, bool coherent); +#define arch_setup_dma_ops arch_setup_dma_ops + +#endif diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index 4674541eba3f..8ce6e7235915 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -241,6 +241,26 @@ int copy_thread(unsigned long clone_flags, task_thread_info(current)->thr_ptr; } + + /* + * setup usermode thread pointer #1: + * when child is picked by scheduler, __switch_to() uses @c_callee to + * populate usermode callee regs: this works (despite being in a kernel + * function) since special return path for child @ret_from_fork() + * ensures those regs are not clobbered all the way to RTIE to usermode + */ + c_callee->r25 = task_thread_info(p)->thr_ptr; + +#ifdef CONFIG_ARC_CURR_IN_REG + /* + * setup usermode thread pointer #2: + * however for this special use of r25 in kernel, __switch_to() sets + * r25 for kernel needs and only in the final return path is usermode + * r25 setup, from pt_regs->user_r25. So set that up as well + */ + c_regs->user_r25 = c_callee->r25; +#endif + return 0; } diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 783b20354f8b..e8d9fb452346 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -83,9 +83,6 @@ done: static void show_faulting_vma(unsigned long address, char *buf) { struct vm_area_struct *vma; - struct inode *inode; - unsigned long ino = 0; - dev_t dev = 0; char *nm = buf; struct mm_struct *active_mm = current->active_mm; @@ -99,12 +96,10 @@ static void show_faulting_vma(unsigned long address, char *buf) * if the container VMA is not found */ if (vma && (vma->vm_start <= address)) { - struct file *file = vma->vm_file; - if (file) { - nm = file_path(file, buf, PAGE_SIZE - 1); - inode = file_inode(vma->vm_file); - dev = inode->i_sb->s_dev; - ino = inode->i_ino; + if (vma->vm_file) { + nm = file_path(vma->vm_file, buf, PAGE_SIZE - 1); + if (IS_ERR(nm)) + nm = "?"; } pr_info(" @off 0x%lx in [%s]\n" " VMA: 0x%08lx to 0x%08lx\n", diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 25c631942500..f2701c13a66b 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -65,7 +65,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n", perip_base, - IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency ")); + IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) ")); return buf; } @@ -897,15 +897,6 @@ static void __dma_cache_wback_slc(phys_addr_t start, unsigned long sz) } /* - * DMA ops for systems with IOC - * IOC hardware snoops all DMA traffic keeping the caches consistent with - * memory - eliding need for any explicit cache maintenance of DMA buffers - */ -static void __dma_cache_wback_inv_ioc(phys_addr_t start, unsigned long sz) {} -static void __dma_cache_inv_ioc(phys_addr_t start, unsigned long sz) {} -static void __dma_cache_wback_ioc(phys_addr_t start, unsigned long sz) {} - -/* * Exported DMA API */ void dma_cache_wback_inv(phys_addr_t start, unsigned long sz) @@ -1153,6 +1144,19 @@ noinline void __init arc_ioc_setup(void) { unsigned int ioc_base, mem_sz; + /* + * As for today we don't support both IOC and ZONE_HIGHMEM enabled + * simultaneously. This happens because as of today IOC aperture covers + * only ZONE_NORMAL (low mem) and any dma transactions outside this + * region won't be HW coherent. + * If we want to use both IOC and ZONE_HIGHMEM we can use + * bounce_buffer to handle dma transactions to HIGHMEM. + * Also it is possible to modify dma_direct cache ops or increase IOC + * aperture size if we are planning to use HIGHMEM without PAE. + */ + if (IS_ENABLED(CONFIG_HIGHMEM)) + panic("IOC and HIGHMEM can't be used simultaneously"); + /* Flush + invalidate + disable L1 dcache */ __dc_disable(); @@ -1264,11 +1268,7 @@ void __init arc_cache_init_master(void) if (is_isa_arcv2() && ioc_enable) arc_ioc_setup(); - if (is_isa_arcv2() && ioc_enable) { - __dma_cache_wback_inv = __dma_cache_wback_inv_ioc; - __dma_cache_inv = __dma_cache_inv_ioc; - __dma_cache_wback = __dma_cache_wback_ioc; - } else if (is_isa_arcv2() && l2_line_sz && slc_enable) { + if (is_isa_arcv2() && l2_line_sz && slc_enable) { __dma_cache_wback_inv = __dma_cache_wback_inv_slc; __dma_cache_inv = __dma_cache_inv_slc; __dma_cache_wback = __dma_cache_wback_slc; @@ -1277,6 +1277,12 @@ void __init arc_cache_init_master(void) __dma_cache_inv = __dma_cache_inv_l1; __dma_cache_wback = __dma_cache_wback_l1; } + /* + * In case of IOC (say IOC+SLC case), pointers above could still be set + * but end up not being relevant as the first function in chain is not + * called at all for @dma_direct_ops + * arch_sync_dma_for_cpu() -> dma_cache_*() -> __dma_cache_*() + */ } void __ref arc_cache_init(void) diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index ec47e6079f5d..c75d5c3470e3 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -6,20 +6,17 @@ * published by the Free Software Foundation. */ -/* - * DMA Coherent API Notes - * - * I/O is inherently non-coherent on ARC. So a coherent DMA buffer is - * implemented by accessing it using a kernel virtual address, with - * Cache bit off in the TLB entry. - * - * The default DMA address == Phy address which is 0x8000_0000 based. - */ - #include <linux/dma-noncoherent.h> #include <asm/cache.h> #include <asm/cacheflush.h> +/* + * ARCH specific callbacks for generic noncoherent DMA ops (dma/noncoherent.c) + * - hardware IOC not available (or "dma-coherent" not set for device in DT) + * - But still handle both coherent and non-coherent requests from caller + * + * For DMA coherent hardware (IOC) generic code suffices + */ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { @@ -27,42 +24,29 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, struct page *page; phys_addr_t paddr; void *kvaddr; - int need_coh = 1, need_kvaddr = 0; - - page = alloc_pages(gfp, order); - if (!page) - return NULL; + bool need_coh = !(attrs & DMA_ATTR_NON_CONSISTENT); /* - * IOC relies on all data (even coherent DMA data) being in cache - * Thus allocate normal cached memory - * - * The gains with IOC are two pronged: - * -For streaming data, elides need for cache maintenance, saving - * cycles in flush code, and bus bandwidth as all the lines of a - * buffer need to be flushed out to memory - * -For coherent data, Read/Write to buffers terminate early in cache - * (vs. always going to memory - thus are faster) + * __GFP_HIGHMEM flag is cleared by upper layer functions + * (in include/linux/dma-mapping.h) so we should never get a + * __GFP_HIGHMEM here. */ - if ((is_isa_arcv2() && ioc_enable) || - (attrs & DMA_ATTR_NON_CONSISTENT)) - need_coh = 0; + BUG_ON(gfp & __GFP_HIGHMEM); - /* - * - A coherent buffer needs MMU mapping to enforce non-cachability - * - A highmem page needs a virtual handle (hence MMU mapping) - * independent of cachability - */ - if (PageHighMem(page) || need_coh) - need_kvaddr = 1; + page = alloc_pages(gfp, order); + if (!page) + return NULL; /* This is linear addr (0x8000_0000 based) */ paddr = page_to_phys(page); *dma_handle = paddr; - /* This is kernel Virtual address (0x7000_0000 based) */ - if (need_kvaddr) { + /* + * A coherent buffer needs MMU mapping to enforce non-cachability. + * kvaddr is kernel Virtual address (0x7000_0000 based). + */ + if (need_coh) { kvaddr = ioremap_nocache(paddr, size); if (kvaddr == NULL) { __free_pages(page, order); @@ -93,12 +77,8 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr, { phys_addr_t paddr = dma_handle; struct page *page = virt_to_page(paddr); - int is_non_coh = 1; - - is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) || - (is_isa_arcv2() && ioc_enable); - if (PageHighMem(page) || !is_non_coh) + if (!(attrs & DMA_ATTR_NON_CONSISTENT)) iounmap((void __force __iomem *)vaddr); __free_pages(page, get_order(size)); @@ -185,3 +165,23 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, break; } } + +/* + * Plug in coherent or noncoherent dma ops + */ +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *iommu, bool coherent) +{ + /* + * IOC hardware snoops all DMA traffic keeping the caches consistent + * with memory - eliding need for any explicit cache maintenance of + * DMA buffers - so we can use dma_direct cache ops. + */ + if (is_isa_arcv2() && ioc_enable && coherent) { + set_dma_ops(dev, &dma_direct_ops); + dev_info(dev, "use dma_direct_ops cache ops\n"); + } else { + set_dma_ops(dev, &dma_noncoherent_ops); + dev_info(dev, "use dma_noncoherent_ops cache ops\n"); + } +} diff --git a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts index b10dccd0958f..3b1baa8605a7 100644 --- a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts +++ b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts @@ -11,6 +11,7 @@ #include "sama5d2-pinfunc.h" #include <dt-bindings/mfd/atmel-flexcom.h> #include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/pinctrl/at91.h> / { model = "Atmel SAMA5D2 PTC EK"; @@ -299,6 +300,7 @@ <PIN_PA30__NWE_NANDWE>, <PIN_PB2__NRD_NANDOE>; bias-pull-up; + atmel,drive-strength = <ATMEL_PIO_DRVSTR_ME>; }; ale_cle_rdy_cs { diff --git a/arch/arm/boot/dts/bcm63138.dtsi b/arch/arm/boot/dts/bcm63138.dtsi index 43ee992ccdcf..6df61518776f 100644 --- a/arch/arm/boot/dts/bcm63138.dtsi +++ b/arch/arm/boot/dts/bcm63138.dtsi @@ -106,21 +106,23 @@ global_timer: timer@1e200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x1e200 0x20>; - interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>; clocks = <&axi_clk>; }; local_timer: local-timer@1e600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0x1e600 0x20>; - interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | + IRQ_TYPE_EDGE_RISING)>; clocks = <&axi_clk>; }; twd_watchdog: watchdog@1e620 { compatible = "arm,cortex-a9-twd-wdt"; reg = <0x1e620 0x20>; - interrupts = <GIC_PPI 14 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | + IRQ_TYPE_LEVEL_HIGH)>; }; armpll: armpll { @@ -158,7 +160,7 @@ serial0: serial@600 { compatible = "brcm,bcm6345-uart"; reg = <0x600 0x1b>; - interrupts = <GIC_SPI 32 0>; + interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>; clocks = <&periph_clk>; clock-names = "periph"; status = "disabled"; @@ -167,7 +169,7 @@ serial1: serial@620 { compatible = "brcm,bcm6345-uart"; reg = <0x620 0x1b>; - interrupts = <GIC_SPI 33 0>; + interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>; clocks = <&periph_clk>; clock-names = "periph"; status = "disabled"; @@ -180,7 +182,7 @@ reg = <0x2000 0x600>, <0xf0 0x10>; reg-names = "nand", "nand-int-base"; status = "disabled"; - interrupts = <GIC_SPI 38 0>; + interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "nand"; }; diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi index 7423d462d1e4..50dde84b72ed 100644 --- a/arch/arm/boot/dts/imx53-qsb-common.dtsi +++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi @@ -123,6 +123,17 @@ }; }; +&cpu0 { + /* CPU rated to 1GHz, not 1.2GHz as per the default settings */ + operating-points = < + /* kHz uV */ + 166666 850000 + 400000 900000 + 800000 1050000 + 1000000 1200000 + >; +}; + &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; diff --git a/arch/arm/boot/dts/sama5d3_emac.dtsi b/arch/arm/boot/dts/sama5d3_emac.dtsi index 7cb235ef0fb6..6e9e1c2f9def 100644 --- a/arch/arm/boot/dts/sama5d3_emac.dtsi +++ b/arch/arm/boot/dts/sama5d3_emac.dtsi @@ -41,7 +41,7 @@ }; macb1: ethernet@f802c000 { - compatible = "cdns,at91sam9260-macb", "cdns,macb"; + compatible = "atmel,sama5d3-macb", "cdns,at91sam9260-macb", "cdns,macb"; reg = <0xf802c000 0x100>; interrupts = <35 IRQ_TYPE_LEVEL_HIGH 3>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi index 661be948ab74..185541a5b69f 100644 --- a/arch/arm/boot/dts/stm32mp157c.dtsi +++ b/arch/arm/boot/dts/stm32mp157c.dtsi @@ -1078,8 +1078,8 @@ interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>; clocks = <&rcc SPI6_K>; resets = <&rcc SPI6_R>; - dmas = <&mdma1 34 0x0 0x40008 0x0 0x0 0>, - <&mdma1 35 0x0 0x40002 0x0 0x0 0>; + dmas = <&mdma1 34 0x0 0x40008 0x0 0x0>, + <&mdma1 35 0x0 0x40002 0x0 0x0>; dma-names = "rx", "tx"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sun8i-r40.dtsi b/arch/arm/boot/dts/sun8i-r40.dtsi index ffd9f00f74a4..5f547c161baf 100644 --- a/arch/arm/boot/dts/sun8i-r40.dtsi +++ b/arch/arm/boot/dts/sun8i-r40.dtsi @@ -800,8 +800,7 @@ }; hdmi_phy: hdmi-phy@1ef0000 { - compatible = "allwinner,sun8i-r40-hdmi-phy", - "allwinner,sun50i-a64-hdmi-phy"; + compatible = "allwinner,sun8i-r40-hdmi-phy"; reg = <0x01ef0000 0x10000>; clocks = <&ccu CLK_BUS_HDMI1>, <&ccu CLK_HDMI_SLOW>, <&ccu 7>, <&ccu 16>; diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 79906cecb091..3ad482d2f1eb 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -223,7 +223,6 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events); #define KVM_ARCH_WANT_MMU_NOTIFIER -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/arm/kernel/vmlinux.lds.h b/arch/arm/kernel/vmlinux.lds.h index ae5fdff18406..8247bc15addc 100644 --- a/arch/arm/kernel/vmlinux.lds.h +++ b/arch/arm/kernel/vmlinux.lds.h @@ -49,6 +49,8 @@ #define ARM_DISCARD \ *(.ARM.exidx.exit.text) \ *(.ARM.extab.exit.text) \ + *(.ARM.exidx.text.exit) \ + *(.ARM.extab.text.exit) \ ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text)) \ ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text)) \ ARM_EXIT_DISCARD(EXIT_TEXT) \ diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 450c7a4fbc8a..cb094e55dc5f 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -478,15 +478,15 @@ static const struct coproc_reg cp15_regs[] = { /* ICC_SGI1R */ { CRm64(12), Op1( 0), is64, access_gic_sgi}, - /* ICC_ASGI1R */ - { CRm64(12), Op1( 1), is64, access_gic_sgi}, - /* ICC_SGI0R */ - { CRm64(12), Op1( 2), is64, access_gic_sgi}, /* VBAR: swapped by interrupt.S. */ { CRn(12), CRm( 0), Op1( 0), Op2( 0), is32, NULL, reset_val, c12_VBAR, 0x00000000 }, + /* ICC_ASGI1R */ + { CRm64(12), Op1( 1), is64, access_gic_sgi}, + /* ICC_SGI0R */ + { CRm64(12), Op1( 2), is64, access_gic_sgi}, /* ICC_SRE */ { CRn(12), CRm(12), Op1( 0), Op2(5), is32, access_gic_sre }, diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index fc91205ff46c..5bf9443cfbaa 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -473,7 +473,7 @@ void pci_ioremap_set_mem_type(int mem_type) int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr) { - BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT); + BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT); return ioremap_page_range(PCI_IO_VIRT_BASE + offset, PCI_IO_VIRT_BASE + offset + SZ_64K, diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index fbc74b5fa3ed..8edf93b4490f 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -413,3 +413,4 @@ 396 common pkey_free sys_pkey_free 397 common statx sys_statx 398 common rseq sys_rseq +399 common io_pgetevents sys_io_pgetevents diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts index ceffc40810ee..48daec7f78ba 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts @@ -46,6 +46,7 @@ pinctrl-0 = <&mmc0_pins>; vmmc-supply = <®_cldo1>; cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>; + bus-width = <4>; status = "okay"; }; @@ -56,6 +57,7 @@ vqmmc-supply = <®_bldo2>; non-removable; cap-mmc-hw-reset; + bus-width = <8>; status = "okay"; }; diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 1b5e0e843c3a..7e2b3e360086 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -28,7 +28,7 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm goto("1: nop\n\t" + asm_volatile_goto("1: nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align 3\n\t" ".quad 1b, %l[l_yes], %c0\n\t" @@ -42,7 +42,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm goto("1: b %l[l_yes]\n\t" + asm_volatile_goto("1: b %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align 3\n\t" ".quad 1b, %l[l_yes], %c0\n\t" diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 15501921fc75..2842bf149029 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -61,8 +61,7 @@ struct kvm_arch { u64 vmid_gen; u32 vmid; - /* 1-level 2nd stage table and lock */ - spinlock_t pgd_lock; + /* 1-level 2nd stage table, protected by kvm->mmu_lock */ pgd_t *pgd; /* VTTBR value associated with above pgd and vmid */ @@ -357,7 +356,6 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events); #define KVM_ARCH_WANT_MMU_NOTIFIER -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 95ac7374d723..4c8b13bede80 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -54,6 +54,7 @@ arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +arm64-obj-$(CONFIG_CRASH_CORE) += crash_core.o arm64-obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o arm64-obj-$(CONFIG_ARM64_SSBD) += ssbd.o diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c new file mode 100644 index 000000000000..ca4c3e12d8c5 --- /dev/null +++ b/arch/arm64/kernel/crash_core.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Linaro. + * Copyright (C) Huawei Futurewei Technologies. + */ + +#include <linux/crash_core.h> +#include <asm/memory.h> + +void arch_crash_save_vmcoreinfo(void) +{ + VMCOREINFO_NUMBER(VA_BITS); + /* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */ + vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n", + kimage_voffset); + vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n", + PHYS_OFFSET); + vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); +} diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index f6a5c6bc1434..922add8adb74 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -358,14 +358,3 @@ void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) } } #endif /* CONFIG_HIBERNATION */ - -void arch_crash_save_vmcoreinfo(void) -{ - VMCOREINFO_NUMBER(VA_BITS); - /* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */ - vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n", - kimage_voffset); - vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n", - PHYS_OFFSET); - vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); -} diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 8e38d5267f22..e213f8e867f6 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -966,6 +966,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, return 0; } +static int armv8pmu_filter_match(struct perf_event *event) +{ + unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT; + return evtype != ARMV8_PMUV3_PERFCTR_CHAIN; +} + static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; @@ -1114,6 +1120,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->stop = armv8pmu_stop, cpu_pmu->reset = armv8pmu_reset, cpu_pmu->set_event_filter = armv8pmu_set_event_filter; + cpu_pmu->filter_match = armv8pmu_filter_match; return 0; } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 95670a1eafb0..d0f62dd24c90 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -64,6 +64,9 @@ #include <asm/xen/hypervisor.h> #include <asm/mmu_context.h> +static int num_standard_resources; +static struct resource *standard_resources; + phys_addr_t __fdt_pointer __initdata; /* @@ -206,14 +209,19 @@ static void __init request_standard_resources(void) { struct memblock_region *region; struct resource *res; + unsigned long i = 0; kernel_code.start = __pa_symbol(_text); kernel_code.end = __pa_symbol(__init_begin - 1); kernel_data.start = __pa_symbol(_sdata); kernel_data.end = __pa_symbol(_end - 1); + num_standard_resources = memblock.memory.cnt; + standard_resources = alloc_bootmem_low(num_standard_resources * + sizeof(*standard_resources)); + for_each_memblock(memory, region) { - res = alloc_bootmem_low(sizeof(*res)); + res = &standard_resources[i++]; if (memblock_is_nomap(region)) { res->name = "reserved"; res->flags = IORESOURCE_MEM; @@ -243,36 +251,26 @@ static void __init request_standard_resources(void) static int __init reserve_memblock_reserved_regions(void) { - phys_addr_t start, end, roundup_end = 0; - struct resource *mem, *res; - u64 i; - - for_each_reserved_mem_region(i, &start, &end) { - if (end <= roundup_end) - continue; /* done already */ - - start = __pfn_to_phys(PFN_DOWN(start)); - end = __pfn_to_phys(PFN_UP(end)) - 1; - roundup_end = end; - - res = kzalloc(sizeof(*res), GFP_ATOMIC); - if (WARN_ON(!res)) - return -ENOMEM; - res->start = start; - res->end = end; - res->name = "reserved"; - res->flags = IORESOURCE_MEM; - - mem = request_resource_conflict(&iomem_resource, res); - /* - * We expected memblock_reserve() regions to conflict with - * memory created by request_standard_resources(). - */ - if (WARN_ON_ONCE(!mem)) + u64 i, j; + + for (i = 0; i < num_standard_resources; ++i) { + struct resource *mem = &standard_resources[i]; + phys_addr_t r_start, r_end, mem_size = resource_size(mem); + + if (!memblock_is_region_reserved(mem->start, mem_size)) continue; - kfree(res); - reserve_region_with_split(mem, start, end, "reserved"); + for_each_reserved_mem_region(j, &r_start, &r_end) { + resource_size_t start, end; + + start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start); + end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end); + + if (start > mem->end || end < mem->start) + continue; + + reserve_region_with_split(mem, start, end, "reserved"); + } } return 0; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 07256b08226c..a6c9fbaeaefc 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -57,6 +57,45 @@ static u64 core_reg_offset_from_id(u64 id) return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); } +static int validate_core_offset(const struct kvm_one_reg *reg) +{ + u64 off = core_reg_offset_from_id(reg->id); + int size; + + switch (off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + case KVM_REG_ARM_CORE_REG(regs.sp): + case KVM_REG_ARM_CORE_REG(regs.pc): + case KVM_REG_ARM_CORE_REG(regs.pstate): + case KVM_REG_ARM_CORE_REG(sp_el1): + case KVM_REG_ARM_CORE_REG(elr_el1): + case KVM_REG_ARM_CORE_REG(spsr[0]) ... + KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): + size = sizeof(__u64); + break; + + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + size = sizeof(__uint128_t); + break; + + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + size = sizeof(__u32); + break; + + default: + return -EINVAL; + } + + if (KVM_REG_SIZE(reg->id) == size && + IS_ALIGNED(off, size / sizeof(__u32))) + return 0; + + return -EINVAL; +} + static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { /* @@ -76,6 +115,9 @@ static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; + if (validate_core_offset(reg)) + return -EINVAL; + if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id))) return -EFAULT; @@ -98,6 +140,9 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs) return -ENOENT; + if (validate_core_offset(reg)) + return -EINVAL; + if (KVM_REG_SIZE(reg->id) > sizeof(tmp)) return -EINVAL; @@ -107,17 +152,25 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) { - u32 mode = (*(u32 *)valp) & PSR_AA32_MODE_MASK; + u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK; switch (mode) { case PSR_AA32_MODE_USR: + if (!system_supports_32bit_el0()) + return -EINVAL; + break; case PSR_AA32_MODE_FIQ: case PSR_AA32_MODE_IRQ: case PSR_AA32_MODE_SVC: case PSR_AA32_MODE_ABT: case PSR_AA32_MODE_UND: + if (!vcpu_el1_is_32bit(vcpu)) + return -EINVAL; + break; case PSR_MODE_EL0t: case PSR_MODE_EL1t: case PSR_MODE_EL1h: + if (vcpu_el1_is_32bit(vcpu)) + return -EINVAL; break; default: err = -EINVAL; diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index d496ef579859..ca46153d7915 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -98,8 +98,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu) val = read_sysreg(cpacr_el1); val |= CPACR_EL1_TTA; val &= ~CPACR_EL1_ZEN; - if (!update_fp_enabled(vcpu)) + if (!update_fp_enabled(vcpu)) { val &= ~CPACR_EL1_FPEN; + __activate_traps_fpsimd32(vcpu); + } write_sysreg(val, cpacr_el1); @@ -114,8 +116,10 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) val = CPTR_EL2_DEFAULT; val |= CPTR_EL2_TTA | CPTR_EL2_TZ; - if (!update_fp_enabled(vcpu)) + if (!update_fp_enabled(vcpu)) { val |= CPTR_EL2_TFP; + __activate_traps_fpsimd32(vcpu); + } write_sysreg(val, cptr_el2); } @@ -129,7 +133,6 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); - __activate_traps_fpsimd32(vcpu); if (has_vhe()) activate_traps_vhe(vcpu); else diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 192b3ba07075..f58ea503ad01 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -117,11 +117,14 @@ static pte_t get_clear_flush(struct mm_struct *mm, /* * If HW_AFDBM is enabled, then the HW could turn on - * the dirty bit for any page in the set, so check - * them all. All hugetlb entries are already young. + * the dirty or accessed bit for any page in the set, + * so check them all. */ if (pte_dirty(pte)) orig_pte = pte_mkdirty(orig_pte); + + if (pte_young(pte)) + orig_pte = pte_mkyoung(orig_pte); } if (valid) { @@ -320,11 +323,40 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, return get_clear_flush(mm, addr, ptep, pgsize, ncontig); } +/* + * huge_ptep_set_access_flags will update access flags (dirty, accesssed) + * and write permission. + * + * For a contiguous huge pte range we need to check whether or not write + * permission has to change only on the first pte in the set. Then for + * all the contiguous ptes we need to check whether or not there is a + * discrepancy between dirty or young. + */ +static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig) +{ + int i; + + if (pte_write(pte) != pte_write(huge_ptep_get(ptep))) + return 1; + + for (i = 0; i < ncontig; i++) { + pte_t orig_pte = huge_ptep_get(ptep + i); + + if (pte_dirty(pte) != pte_dirty(orig_pte)) + return 1; + + if (pte_young(pte) != pte_young(orig_pte)) + return 1; + } + + return 0; +} + int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty) { - int ncontig, i, changed = 0; + int ncontig, i; size_t pgsize = 0; unsigned long pfn = pte_pfn(pte), dpfn; pgprot_t hugeprot; @@ -336,19 +368,23 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); dpfn = pgsize >> PAGE_SHIFT; + if (!__cont_access_flags_changed(ptep, pte, ncontig)) + return 0; + orig_pte = get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); - if (!pte_same(orig_pte, pte)) - changed = 1; - /* Make sure we don't lose the dirty state */ + /* Make sure we don't lose the dirty or young state */ if (pte_dirty(orig_pte)) pte = pte_mkdirty(pte); + if (pte_young(orig_pte)) + pte = pte_mkyoung(pte); + hugeprot = pte_pgprot(pte); for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot)); - return changed; + return 1; } void huge_ptep_set_wrprotect(struct mm_struct *mm, diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6deb836a102a..9498c15b847b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -987,8 +987,9 @@ int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr) pmd = READ_ONCE(*pmdp); - /* No-op for empty entry and WARN_ON for valid entry */ - if (!pmd_present(pmd) || !pmd_table(pmd)) { + if (!pmd_present(pmd)) + return 1; + if (!pmd_table(pmd)) { VM_WARN_ON(!pmd_table(pmd)); return 1; } @@ -1009,8 +1010,9 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr) pud = READ_ONCE(*pudp); - /* No-op for empty entry and WARN_ON for valid entry */ - if (!pud_present(pud) || !pud_table(pud)) { + if (!pud_present(pud)) + return 1; + if (!pud_table(pud)) { VM_WARN_ON(!pud_table(pud)); return 1; } diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h index 5e4a59b3ec1b..2691a1857d20 100644 --- a/arch/hexagon/include/asm/bitops.h +++ b/arch/hexagon/include/asm/bitops.h @@ -211,7 +211,7 @@ static inline long ffz(int x) * This is defined the same way as ffs. * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ -static inline long fls(int x) +static inline int fls(int x) { int r; @@ -232,7 +232,7 @@ static inline long fls(int x) * the libc and compiler builtin ffs routines, therefore * differs in spirit from the above ffz (man ffs). */ -static inline long ffs(int x) +static inline int ffs(int x) { int r; diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c index 77459df34e2e..7ebe7ad19d15 100644 --- a/arch/hexagon/kernel/dma.c +++ b/arch/hexagon/kernel/dma.c @@ -60,7 +60,7 @@ static void *hexagon_dma_alloc_coherent(struct device *dev, size_t size, panic("Can't create %s() memory pool!", __func__); else gen_pool_add(coherent_pool, - pfn_to_virt(max_low_pfn), + (unsigned long)pfn_to_virt(max_low_pfn), hexagon_coherent_pool_size, -1); } diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c index 70dde040779b..f5453d944ff5 100644 --- a/arch/m68k/mm/mcfmmu.c +++ b/arch/m68k/mm/mcfmmu.c @@ -172,7 +172,7 @@ void __init cf_bootmem_alloc(void) high_memory = (void *)_ramend; /* Reserve kernel text/data/bss */ - memblock_reserve(memstart, memstart - _rambase); + memblock_reserve(_rambase, memstart - _rambase); m68k_virt_to_node_shift = fls(_ramend - 1) - 6; module_fixup(NULL, __start_fixup, __stop_fixup); diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index a9af1d2dcd69..2c1c53d12179 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -931,7 +931,6 @@ enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu, bool write); #define KVM_ARCH_WANT_MMU_NOTIFIER -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h index 4901833498f7..8441b2698e64 100644 --- a/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h +++ b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h @@ -40,6 +40,7 @@ struct ltq_dma_channel { int desc; /* the current descriptor */ struct ltq_dma_desc *desc_base; /* the descriptor base */ int phys; /* physical addr */ + struct device *dev; }; enum { diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index b2fa62922d88..49d6046ca1d0 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -13,6 +13,7 @@ #include <linux/atomic.h> #include <linux/cpumask.h> +#include <linux/sizes.h> #include <linux/threads.h> #include <asm/cachectl.h> @@ -80,11 +81,10 @@ extern unsigned int vced_count, vcei_count; #endif -/* - * One page above the stack is used for branch delay slot "emulation". - * See dsemul.c for details. - */ -#define STACK_TOP ((TASK_SIZE & PAGE_MASK) - PAGE_SIZE) +#define VDSO_RANDOMIZE_SIZE (TASK_IS_32BIT_ADDR ? SZ_1M : SZ_256M) + +extern unsigned long mips_stack_top(void); +#define STACK_TOP mips_stack_top() /* * This decides where the kernel will search for a free chunk of vm diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 8fc69891e117..d4f7fd4550e1 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -32,6 +32,7 @@ #include <linux/nmi.h> #include <linux/cpu.h> +#include <asm/abi.h> #include <asm/asm.h> #include <asm/bootinfo.h> #include <asm/cpu.h> @@ -39,6 +40,7 @@ #include <asm/dsp.h> #include <asm/fpu.h> #include <asm/irq.h> +#include <asm/mips-cps.h> #include <asm/msa.h> #include <asm/pgtable.h> #include <asm/mipsregs.h> @@ -645,6 +647,29 @@ out: return pc; } +unsigned long mips_stack_top(void) +{ + unsigned long top = TASK_SIZE & PAGE_MASK; + + /* One page for branch delay slot "emulation" */ + top -= PAGE_SIZE; + + /* Space for the VDSO, data page & GIC user page */ + top -= PAGE_ALIGN(current->thread.abi->vdso->size); + top -= PAGE_SIZE; + top -= mips_gic_present() ? PAGE_SIZE : 0; + + /* Space for cache colour alignment */ + if (cpu_has_dc_aliases) + top -= shm_align_mask + 1; + + /* Space to randomize the VDSO base */ + if (current->flags & PF_RANDOMIZE) + top -= VDSO_RANDOMIZE_SIZE; + + return top; +} + /* * Don't forget that the stack pointer must be aligned on a 8 bytes * boundary for 32-bits ABI and 16 bytes for 64-bits ABI. diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index c71d1eb7da59..8aaaa42f91ed 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -846,6 +846,34 @@ static void __init arch_mem_init(char **cmdline_p) struct memblock_region *reg; extern void plat_mem_setup(void); + /* + * Initialize boot_command_line to an innocuous but non-empty string in + * order to prevent early_init_dt_scan_chosen() from copying + * CONFIG_CMDLINE into it without our knowledge. We handle + * CONFIG_CMDLINE ourselves below & don't want to duplicate its + * content because repeating arguments can be problematic. + */ + strlcpy(boot_command_line, " ", COMMAND_LINE_SIZE); + + /* call board setup routine */ + plat_mem_setup(); + + /* + * Make sure all kernel memory is in the maps. The "UP" and + * "DOWN" are opposite for initdata since if it crosses over + * into another memory section you don't want that to be + * freed when the initdata is freed. + */ + arch_mem_addpart(PFN_DOWN(__pa_symbol(&_text)) << PAGE_SHIFT, + PFN_UP(__pa_symbol(&_edata)) << PAGE_SHIFT, + BOOT_MEM_RAM); + arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT, + PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT, + BOOT_MEM_INIT_RAM); + + pr_info("Determined physical RAM map:\n"); + print_memory_map(); + #if defined(CONFIG_CMDLINE_BOOL) && defined(CONFIG_CMDLINE_OVERRIDE) strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); #else @@ -873,26 +901,6 @@ static void __init arch_mem_init(char **cmdline_p) } #endif #endif - - /* call board setup routine */ - plat_mem_setup(); - - /* - * Make sure all kernel memory is in the maps. The "UP" and - * "DOWN" are opposite for initdata since if it crosses over - * into another memory section you don't want that to be - * freed when the initdata is freed. - */ - arch_mem_addpart(PFN_DOWN(__pa_symbol(&_text)) << PAGE_SHIFT, - PFN_UP(__pa_symbol(&_edata)) << PAGE_SHIFT, - BOOT_MEM_RAM); - arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT, - PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT, - BOOT_MEM_INIT_RAM); - - pr_info("Determined physical RAM map:\n"); - print_memory_map(); - strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index 019035d7225c..48a9c6b90e07 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -13,13 +13,16 @@ #include <linux/err.h> #include <linux/init.h> #include <linux/ioport.h> +#include <linux/kernel.h> #include <linux/mm.h> +#include <linux/random.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/timekeeper_internal.h> #include <asm/abi.h> #include <asm/mips-cps.h> +#include <asm/page.h> #include <asm/vdso.h> /* Kernel-provided data used by the VDSO. */ @@ -95,6 +98,21 @@ void update_vsyscall_tz(void) } } +static unsigned long vdso_base(void) +{ + unsigned long base; + + /* Skip the delay slot emulation page */ + base = STACK_TOP + PAGE_SIZE; + + if (current->flags & PF_RANDOMIZE) { + base += get_random_int() & (VDSO_RANDOMIZE_SIZE - 1); + base = PAGE_ALIGN(base); + } + + return base; +} + int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mips_vdso_image *image = current->thread.abi->vdso; @@ -128,12 +146,30 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) vvar_size = gic_size + PAGE_SIZE; size = vvar_size + image->size; - base = get_unmapped_area(NULL, 0, size, 0, 0); + /* + * Find a region that's large enough for us to perform the + * colour-matching alignment below. + */ + if (cpu_has_dc_aliases) + size += shm_align_mask + 1; + + base = get_unmapped_area(NULL, vdso_base(), size, 0, 0); if (IS_ERR_VALUE(base)) { ret = base; goto out; } + /* + * If we suffer from dcache aliasing, ensure that the VDSO data page + * mapping is coloured the same as the kernel's mapping of that memory. + * This ensures that when the kernel updates the VDSO data userland + * will observe it without requiring cache invalidations. + */ + if (cpu_has_dc_aliases) { + base = __ALIGN_MASK(base, shm_align_mask); + base += ((unsigned long)&vdso_data - gic_size) & shm_align_mask; + } + data_addr = base + gic_size; vdso_addr = data_addr + PAGE_SIZE; diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index ee64db032793..d8dcdb350405 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -512,16 +512,6 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, return 1; } -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) -{ - unsigned long end = hva + PAGE_SIZE; - - handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL); - - kvm_mips_callbacks->flush_shadow_all(kvm); - return 0; -} - int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) { handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); diff --git a/arch/mips/lantiq/xway/dma.c b/arch/mips/lantiq/xway/dma.c index 4b9fbb6744ad..664f2f7f55c1 100644 --- a/arch/mips/lantiq/xway/dma.c +++ b/arch/mips/lantiq/xway/dma.c @@ -130,7 +130,7 @@ ltq_dma_alloc(struct ltq_dma_channel *ch) unsigned long flags; ch->desc = 0; - ch->desc_base = dma_zalloc_coherent(NULL, + ch->desc_base = dma_zalloc_coherent(ch->dev, LTQ_DESC_NUM * LTQ_DESC_SIZE, &ch->phys, GFP_ATOMIC); @@ -182,7 +182,7 @@ ltq_dma_free(struct ltq_dma_channel *ch) if (!ch->desc_base) return; ltq_dma_close(ch); - dma_free_coherent(NULL, LTQ_DESC_NUM * LTQ_DESC_SIZE, + dma_free_coherent(ch->dev, LTQ_DESC_NUM * LTQ_DESC_SIZE, ch->desc_base, ch->phys); } EXPORT_SYMBOL_GPL(ltq_dma_free); diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 3a6f34ef5ffc..069acec3df9f 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -280,9 +280,11 @@ * unset_bytes = end_addr - current_addr + 1 * a2 = t1 - a0 + 1 */ + .set reorder PTR_SUBU a2, t1, a0 + PTR_ADDIU a2, 1 jr ra - PTR_ADDIU a2, 1 + .set noreorder .endm diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 1d4248fa55e9..7068f341133d 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -40,6 +40,10 @@ config NDS32 select NO_IOPORT_MAP select RTC_LIB select THREAD_INFO_IN_TASK + select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_DYNAMIC_FTRACE help Andes(nds32) Linux support. diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile index 63f4f173e5f4..3509fac10491 100644 --- a/arch/nds32/Makefile +++ b/arch/nds32/Makefile @@ -5,6 +5,10 @@ KBUILD_DEFCONFIG := defconfig comma = , +ifdef CONFIG_FUNCTION_TRACER +arch-y += -malways-save-lp -mno-relax +endif + KBUILD_CFLAGS += $(call cc-option, -mno-sched-prolog-epilog) KBUILD_CFLAGS += -mcmodel=large diff --git a/arch/nds32/include/asm/elf.h b/arch/nds32/include/asm/elf.h index 56c479058802..f5f9cf7e0544 100644 --- a/arch/nds32/include/asm/elf.h +++ b/arch/nds32/include/asm/elf.h @@ -121,9 +121,9 @@ struct elf32_hdr; */ #define ELF_CLASS ELFCLASS32 #ifdef __NDS32_EB__ -#define ELF_DATA ELFDATA2MSB; +#define ELF_DATA ELFDATA2MSB #else -#define ELF_DATA ELFDATA2LSB; +#define ELF_DATA ELFDATA2LSB #endif #define ELF_ARCH EM_NDS32 #define USE_ELF_CORE_DUMP diff --git a/arch/nds32/include/asm/ftrace.h b/arch/nds32/include/asm/ftrace.h new file mode 100644 index 000000000000..2f96cc96aa35 --- /dev/null +++ b/arch/nds32/include/asm/ftrace.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_NDS32_FTRACE_H +#define __ASM_NDS32_FTRACE_H + +#ifdef CONFIG_FUNCTION_TRACER + +#define HAVE_FUNCTION_GRAPH_FP_TEST + +#define MCOUNT_ADDR ((unsigned long)(_mcount)) +/* mcount call is composed of three instructions: + * sethi + ori + jral + */ +#define MCOUNT_INSN_SIZE 12 + +extern void _mcount(unsigned long parent_ip); + +#ifdef CONFIG_DYNAMIC_FTRACE + +#define FTRACE_ADDR ((unsigned long)_ftrace_caller) + +#ifdef __NDS32_EL__ +#define INSN_NOP 0x09000040 +#define INSN_SIZE(insn) (((insn & 0x00000080) == 0) ? 4 : 2) +#define IS_SETHI(insn) ((insn & 0x000000fe) == 0x00000046) +#define ENDIAN_CONVERT(insn) be32_to_cpu(insn) +#else /* __NDS32_EB__ */ +#define INSN_NOP 0x40000009 +#define INSN_SIZE(insn) (((insn & 0x80000000) == 0) ? 4 : 2) +#define IS_SETHI(insn) ((insn & 0xfe000000) == 0x46000000) +#define ENDIAN_CONVERT(insn) (insn) +#endif + +extern void _ftrace_caller(unsigned long parent_ip); +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} +struct dyn_arch_ftrace { +}; + +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#endif /* CONFIG_FUNCTION_TRACER */ + +#endif /* __ASM_NDS32_FTRACE_H */ diff --git a/arch/nds32/include/asm/nds32.h b/arch/nds32/include/asm/nds32.h index 19b19394a936..68c38151c3e4 100644 --- a/arch/nds32/include/asm/nds32.h +++ b/arch/nds32/include/asm/nds32.h @@ -17,6 +17,7 @@ #else #define FP_OFFSET (-2) #endif +#define LP_OFFSET (-1) extern void __init early_trap_init(void); static inline void GIE_ENABLE(void) diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h index 18a009f3804d..362a32d9bd16 100644 --- a/arch/nds32/include/asm/uaccess.h +++ b/arch/nds32/include/asm/uaccess.h @@ -38,7 +38,7 @@ struct exception_table_entry { extern int fixup_exception(struct pt_regs *regs); #define KERNEL_DS ((mm_segment_t) { ~0UL }) -#define USER_DS ((mm_segment_t) {TASK_SIZE - 1}) +#define USER_DS ((mm_segment_t) {TASK_SIZE - 1}) #define get_ds() (KERNEL_DS) #define get_fs() (current_thread_info()->addr_limit) @@ -49,11 +49,11 @@ static inline void set_fs(mm_segment_t fs) current_thread_info()->addr_limit = fs; } -#define segment_eq(a, b) ((a) == (b)) +#define segment_eq(a, b) ((a) == (b)) #define __range_ok(addr, size) (size <= get_fs() && addr <= (get_fs() -size)) -#define access_ok(type, addr, size) \ +#define access_ok(type, addr, size) \ __range_ok((unsigned long)addr, (unsigned long)size) /* * Single-value transfer routines. They automatically use the right @@ -75,70 +75,73 @@ static inline void set_fs(mm_segment_t fs) * versions are void (ie, don't return a value as such). */ -#define get_user(x,p) \ -({ \ - long __e = -EFAULT; \ - if(likely(access_ok(VERIFY_READ, p, sizeof(*p)))) { \ - __e = __get_user(x,p); \ - } else \ - x = 0; \ - __e; \ -}) -#define __get_user(x,ptr) \ +#define get_user __get_user \ + +#define __get_user(x, ptr) \ ({ \ long __gu_err = 0; \ - __get_user_err((x),(ptr),__gu_err); \ + __get_user_check((x), (ptr), __gu_err); \ __gu_err; \ }) -#define __get_user_error(x,ptr,err) \ +#define __get_user_error(x, ptr, err) \ ({ \ - __get_user_err((x),(ptr),err); \ - (void) 0; \ + __get_user_check((x), (ptr), (err)); \ + (void)0; \ }) -#define __get_user_err(x,ptr,err) \ +#define __get_user_check(x, ptr, err) \ +({ \ + const __typeof__(*(ptr)) __user *__p = (ptr); \ + might_fault(); \ + if (access_ok(VERIFY_READ, __p, sizeof(*__p))) { \ + __get_user_err((x), __p, (err)); \ + } else { \ + (x) = 0; (err) = -EFAULT; \ + } \ +}) + +#define __get_user_err(x, ptr, err) \ do { \ - unsigned long __gu_addr = (unsigned long)(ptr); \ unsigned long __gu_val; \ __chk_user_ptr(ptr); \ switch (sizeof(*(ptr))) { \ case 1: \ - __get_user_asm("lbi",__gu_val,__gu_addr,err); \ + __get_user_asm("lbi", __gu_val, (ptr), (err)); \ break; \ case 2: \ - __get_user_asm("lhi",__gu_val,__gu_addr,err); \ + __get_user_asm("lhi", __gu_val, (ptr), (err)); \ break; \ case 4: \ - __get_user_asm("lwi",__gu_val,__gu_addr,err); \ + __get_user_asm("lwi", __gu_val, (ptr), (err)); \ break; \ case 8: \ - __get_user_asm_dword(__gu_val,__gu_addr,err); \ + __get_user_asm_dword(__gu_val, (ptr), (err)); \ break; \ default: \ BUILD_BUG(); \ break; \ } \ - (x) = (__typeof__(*(ptr)))__gu_val; \ + (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) -#define __get_user_asm(inst,x,addr,err) \ - asm volatile( \ - "1: "inst" %1,[%2]\n" \ - "2:\n" \ - " .section .fixup,\"ax\"\n" \ - " .align 2\n" \ - "3: move %0, %3\n" \ - " move %1, #0\n" \ - " b 2b\n" \ - " .previous\n" \ - " .section __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .long 1b, 3b\n" \ - " .previous" \ - : "+r" (err), "=&r" (x) \ - : "r" (addr), "i" (-EFAULT) \ - : "cc") +#define __get_user_asm(inst, x, addr, err) \ + __asm__ __volatile__ ( \ + "1: "inst" %1,[%2]\n" \ + "2:\n" \ + " .section .fixup,\"ax\"\n" \ + " .align 2\n" \ + "3: move %0, %3\n" \ + " move %1, #0\n" \ + " b 2b\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 1b, 3b\n" \ + " .previous" \ + : "+r" (err), "=&r" (x) \ + : "r" (addr), "i" (-EFAULT) \ + : "cc") #ifdef __NDS32_EB__ #define __gu_reg_oper0 "%H1" @@ -149,61 +152,66 @@ do { \ #endif #define __get_user_asm_dword(x, addr, err) \ - asm volatile( \ - "\n1:\tlwi " __gu_reg_oper0 ",[%2]\n" \ - "\n2:\tlwi " __gu_reg_oper1 ",[%2+4]\n" \ - "3:\n" \ - " .section .fixup,\"ax\"\n" \ - " .align 2\n" \ - "4: move %0, %3\n" \ - " b 3b\n" \ - " .previous\n" \ - " .section __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .long 1b, 4b\n" \ - " .long 2b, 4b\n" \ - " .previous" \ - : "+r"(err), "=&r"(x) \ - : "r"(addr), "i"(-EFAULT) \ - : "cc") -#define put_user(x,p) \ -({ \ - long __e = -EFAULT; \ - if(likely(access_ok(VERIFY_WRITE, p, sizeof(*p)))) { \ - __e = __put_user(x,p); \ - } \ - __e; \ -}) -#define __put_user(x,ptr) \ + __asm__ __volatile__ ( \ + "\n1:\tlwi " __gu_reg_oper0 ",[%2]\n" \ + "\n2:\tlwi " __gu_reg_oper1 ",[%2+4]\n" \ + "3:\n" \ + " .section .fixup,\"ax\"\n" \ + " .align 2\n" \ + "4: move %0, %3\n" \ + " b 3b\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 1b, 4b\n" \ + " .long 2b, 4b\n" \ + " .previous" \ + : "+r"(err), "=&r"(x) \ + : "r"(addr), "i"(-EFAULT) \ + : "cc") + +#define put_user __put_user \ + +#define __put_user(x, ptr) \ ({ \ long __pu_err = 0; \ - __put_user_err((x),(ptr),__pu_err); \ + __put_user_err((x), (ptr), __pu_err); \ __pu_err; \ }) -#define __put_user_error(x,ptr,err) \ +#define __put_user_error(x, ptr, err) \ +({ \ + __put_user_err((x), (ptr), (err)); \ + (void)0; \ +}) + +#define __put_user_check(x, ptr, err) \ ({ \ - __put_user_err((x),(ptr),err); \ - (void) 0; \ + __typeof__(*(ptr)) __user *__p = (ptr); \ + might_fault(); \ + if (access_ok(VERIFY_WRITE, __p, sizeof(*__p))) { \ + __put_user_err((x), __p, (err)); \ + } else { \ + (err) = -EFAULT; \ + } \ }) -#define __put_user_err(x,ptr,err) \ +#define __put_user_err(x, ptr, err) \ do { \ - unsigned long __pu_addr = (unsigned long)(ptr); \ __typeof__(*(ptr)) __pu_val = (x); \ __chk_user_ptr(ptr); \ switch (sizeof(*(ptr))) { \ case 1: \ - __put_user_asm("sbi",__pu_val,__pu_addr,err); \ + __put_user_asm("sbi", __pu_val, (ptr), (err)); \ break; \ case 2: \ - __put_user_asm("shi",__pu_val,__pu_addr,err); \ + __put_user_asm("shi", __pu_val, (ptr), (err)); \ break; \ case 4: \ - __put_user_asm("swi",__pu_val,__pu_addr,err); \ + __put_user_asm("swi", __pu_val, (ptr), (err)); \ break; \ case 8: \ - __put_user_asm_dword(__pu_val,__pu_addr,err); \ + __put_user_asm_dword(__pu_val, (ptr), (err)); \ break; \ default: \ BUILD_BUG(); \ @@ -211,22 +219,22 @@ do { \ } \ } while (0) -#define __put_user_asm(inst,x,addr,err) \ - asm volatile( \ - "1: "inst" %1,[%2]\n" \ - "2:\n" \ - " .section .fixup,\"ax\"\n" \ - " .align 2\n" \ - "3: move %0, %3\n" \ - " b 2b\n" \ - " .previous\n" \ - " .section __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .long 1b, 3b\n" \ - " .previous" \ - : "+r" (err) \ - : "r" (x), "r" (addr), "i" (-EFAULT) \ - : "cc") +#define __put_user_asm(inst, x, addr, err) \ + __asm__ __volatile__ ( \ + "1: "inst" %1,[%2]\n" \ + "2:\n" \ + " .section .fixup,\"ax\"\n" \ + " .align 2\n" \ + "3: move %0, %3\n" \ + " b 2b\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 1b, 3b\n" \ + " .previous" \ + : "+r" (err) \ + : "r" (x), "r" (addr), "i" (-EFAULT) \ + : "cc") #ifdef __NDS32_EB__ #define __pu_reg_oper0 "%H2" @@ -237,23 +245,24 @@ do { \ #endif #define __put_user_asm_dword(x, addr, err) \ - asm volatile( \ - "\n1:\tswi " __pu_reg_oper0 ",[%1]\n" \ - "\n2:\tswi " __pu_reg_oper1 ",[%1+4]\n" \ - "3:\n" \ - " .section .fixup,\"ax\"\n" \ - " .align 2\n" \ - "4: move %0, %3\n" \ - " b 3b\n" \ - " .previous\n" \ - " .section __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .long 1b, 4b\n" \ - " .long 2b, 4b\n" \ - " .previous" \ - : "+r"(err) \ - : "r"(addr), "r"(x), "i"(-EFAULT) \ - : "cc") + __asm__ __volatile__ ( \ + "\n1:\tswi " __pu_reg_oper0 ",[%1]\n" \ + "\n2:\tswi " __pu_reg_oper1 ",[%1+4]\n" \ + "3:\n" \ + " .section .fixup,\"ax\"\n" \ + " .align 2\n" \ + "4: move %0, %3\n" \ + " b 3b\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 1b, 4b\n" \ + " .long 2b, 4b\n" \ + " .previous" \ + : "+r"(err) \ + : "r"(addr), "r"(x), "i"(-EFAULT) \ + : "cc") + extern unsigned long __arch_clear_user(void __user * addr, unsigned long n); extern long strncpy_from_user(char *dest, const char __user * src, long count); extern __must_check long strlen_user(const char __user * str); diff --git a/arch/nds32/kernel/Makefile b/arch/nds32/kernel/Makefile index 42792743e8b9..27cded39fa66 100644 --- a/arch/nds32/kernel/Makefile +++ b/arch/nds32/kernel/Makefile @@ -21,3 +21,9 @@ extra-y := head.o vmlinux.lds obj-y += vdso/ + +obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o + +ifdef CONFIG_FUNCTION_TRACER +CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) +endif diff --git a/arch/nds32/kernel/atl2c.c b/arch/nds32/kernel/atl2c.c index 0c6d031a1c4a..0c5386e72098 100644 --- a/arch/nds32/kernel/atl2c.c +++ b/arch/nds32/kernel/atl2c.c @@ -9,7 +9,8 @@ void __iomem *atl2c_base; static const struct of_device_id atl2c_ids[] __initconst = { - {.compatible = "andestech,atl2c",} + {.compatible = "andestech,atl2c",}, + {} }; static int __init atl2c_of_init(void) diff --git a/arch/nds32/kernel/ex-entry.S b/arch/nds32/kernel/ex-entry.S index b8ae4e9a6b93..21a144071566 100644 --- a/arch/nds32/kernel/ex-entry.S +++ b/arch/nds32/kernel/ex-entry.S @@ -118,7 +118,7 @@ common_exception_handler: /* interrupt */ 2: #ifdef CONFIG_TRACE_IRQFLAGS - jal trace_hardirqs_off + jal __trace_hardirqs_off #endif move $r0, $sp sethi $lp, hi20(ret_from_intr) diff --git a/arch/nds32/kernel/ex-exit.S b/arch/nds32/kernel/ex-exit.S index 03e4f7788a18..f00af92f7e22 100644 --- a/arch/nds32/kernel/ex-exit.S +++ b/arch/nds32/kernel/ex-exit.S @@ -138,8 +138,8 @@ no_work_pending: #ifdef CONFIG_TRACE_IRQFLAGS lwi $p0, [$sp+(#IPSW_OFFSET)] andi $p0, $p0, #0x1 - la $r10, trace_hardirqs_off - la $r9, trace_hardirqs_on + la $r10, __trace_hardirqs_off + la $r9, __trace_hardirqs_on cmovz $r9, $p0, $r10 jral $r9 #endif diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c new file mode 100644 index 000000000000..a0a9679ad5de --- /dev/null +++ b/arch/nds32/kernel/ftrace.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/ftrace.h> +#include <linux/uaccess.h> +#include <asm/cacheflush.h> + +#ifndef CONFIG_DYNAMIC_FTRACE +extern void (*ftrace_trace_function)(unsigned long, unsigned long, + struct ftrace_ops*, struct pt_regs*); +extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace); +extern void ftrace_graph_caller(void); + +noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs) +{ + __asm__ (""); /* avoid to optimize as pure function */ +} + +noinline void _mcount(unsigned long parent_ip) +{ + /* save all state by the compiler prologue */ + + unsigned long ip = (unsigned long)__builtin_return_address(0); + + if (ftrace_trace_function != ftrace_stub) + ftrace_trace_function(ip - MCOUNT_INSN_SIZE, parent_ip, + NULL, NULL); + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + if (ftrace_graph_return != (trace_func_graph_ret_t)ftrace_stub + || ftrace_graph_entry != ftrace_graph_entry_stub) + ftrace_graph_caller(); +#endif + + /* restore all state by the compiler epilogue */ +} +EXPORT_SYMBOL(_mcount); + +#else /* CONFIG_DYNAMIC_FTRACE */ + +noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs) +{ + __asm__ (""); /* avoid to optimize as pure function */ +} + +noinline void __naked _mcount(unsigned long parent_ip) +{ + __asm__ (""); /* avoid to optimize as pure function */ +} +EXPORT_SYMBOL(_mcount); + +#define XSTR(s) STR(s) +#define STR(s) #s +void _ftrace_caller(unsigned long parent_ip) +{ + /* save all state needed by the compiler prologue */ + + /* + * prepare arguments for real tracing function + * first arg : __builtin_return_address(0) - MCOUNT_INSN_SIZE + * second arg : parent_ip + */ + __asm__ __volatile__ ( + "move $r1, %0 \n\t" + "addi $r0, %1, #-" XSTR(MCOUNT_INSN_SIZE) "\n\t" + : + : "r" (parent_ip), "r" (__builtin_return_address(0))); + + /* a placeholder for the call to a real tracing function */ + __asm__ __volatile__ ( + "ftrace_call: \n\t" + "nop \n\t" + "nop \n\t" + "nop \n\t"); + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* a placeholder for the call to ftrace_graph_caller */ + __asm__ __volatile__ ( + "ftrace_graph_call: \n\t" + "nop \n\t" + "nop \n\t" + "nop \n\t"); +#endif + /* restore all state needed by the compiler epilogue */ +} + +int __init ftrace_dyn_arch_init(void) +{ + return 0; +} + +int ftrace_arch_code_modify_prepare(void) +{ + set_all_modules_text_rw(); + return 0; +} + +int ftrace_arch_code_modify_post_process(void) +{ + set_all_modules_text_ro(); + return 0; +} + +static unsigned long gen_sethi_insn(unsigned long addr) +{ + unsigned long opcode = 0x46000000; + unsigned long imm = addr >> 12; + unsigned long rt_num = 0xf << 20; + + return ENDIAN_CONVERT(opcode | rt_num | imm); +} + +static unsigned long gen_ori_insn(unsigned long addr) +{ + unsigned long opcode = 0x58000000; + unsigned long imm = addr & 0x0000fff; + unsigned long rt_num = 0xf << 20; + unsigned long ra_num = 0xf << 15; + + return ENDIAN_CONVERT(opcode | rt_num | ra_num | imm); +} + +static unsigned long gen_jral_insn(unsigned long addr) +{ + unsigned long opcode = 0x4a000001; + unsigned long rt_num = 0x1e << 20; + unsigned long rb_num = 0xf << 10; + + return ENDIAN_CONVERT(opcode | rt_num | rb_num); +} + +static void ftrace_gen_call_insn(unsigned long *call_insns, + unsigned long addr) +{ + call_insns[0] = gen_sethi_insn(addr); /* sethi $r15, imm20u */ + call_insns[1] = gen_ori_insn(addr); /* ori $r15, $r15, imm15u */ + call_insns[2] = gen_jral_insn(addr); /* jral $lp, $r15 */ +} + +static int __ftrace_modify_code(unsigned long pc, unsigned long *old_insn, + unsigned long *new_insn, bool validate) +{ + unsigned long orig_insn[3]; + + if (validate) { + if (probe_kernel_read(orig_insn, (void *)pc, MCOUNT_INSN_SIZE)) + return -EFAULT; + if (memcmp(orig_insn, old_insn, MCOUNT_INSN_SIZE)) + return -EINVAL; + } + + if (probe_kernel_write((void *)pc, new_insn, MCOUNT_INSN_SIZE)) + return -EPERM; + + return 0; +} + +static int ftrace_modify_code(unsigned long pc, unsigned long *old_insn, + unsigned long *new_insn, bool validate) +{ + int ret; + + ret = __ftrace_modify_code(pc, old_insn, new_insn, validate); + if (ret) + return ret; + + flush_icache_range(pc, pc + MCOUNT_INSN_SIZE); + + return ret; +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long pc = (unsigned long)&ftrace_call; + unsigned long old_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP}; + unsigned long new_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP}; + + if (func != ftrace_stub) + ftrace_gen_call_insn(new_insn, (unsigned long)func); + + return ftrace_modify_code(pc, old_insn, new_insn, false); +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long pc = rec->ip; + unsigned long nop_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP}; + unsigned long call_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP}; + + ftrace_gen_call_insn(call_insn, addr); + + return ftrace_modify_code(pc, nop_insn, call_insn, true); +} + +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + unsigned long pc = rec->ip; + unsigned long nop_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP}; + unsigned long call_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP}; + + ftrace_gen_call_insn(call_insn, addr); + + return ftrace_modify_code(pc, call_insn, nop_insn, true); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long)&return_to_handler; + struct ftrace_graph_ent trace; + unsigned long old; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + old = *parent; + + trace.func = self_addr; + trace.depth = current->curr_ret_stack + 1; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) + return; + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer, NULL); + + if (err == -EBUSY) + return; + + *parent = return_hooker; +} + +noinline void ftrace_graph_caller(void) +{ + unsigned long *parent_ip = + (unsigned long *)(__builtin_frame_address(2) - 4); + + unsigned long selfpc = + (unsigned long)(__builtin_return_address(1) - MCOUNT_INSN_SIZE); + + unsigned long frame_pointer = + (unsigned long)__builtin_frame_address(3); + + prepare_ftrace_return(parent_ip, selfpc, frame_pointer); +} + +extern unsigned long ftrace_return_to_handler(unsigned long frame_pointer); +void __naked return_to_handler(void) +{ + __asm__ __volatile__ ( + /* save state needed by the ABI */ + "smw.adm $r0,[$sp],$r1,#0x0 \n\t" + + /* get original return address */ + "move $r0, $fp \n\t" + "bal ftrace_return_to_handler\n\t" + "move $lp, $r0 \n\t" + + /* restore state nedded by the ABI */ + "lmw.bim $r0,[$sp],$r1,#0x0 \n\t"); +} + +#ifdef CONFIG_DYNAMIC_FTRACE +extern unsigned long ftrace_graph_call; + +static int ftrace_modify_graph_caller(bool enable) +{ + unsigned long pc = (unsigned long)&ftrace_graph_call; + unsigned long nop_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP}; + unsigned long call_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP}; + + ftrace_gen_call_insn(call_insn, (unsigned long)ftrace_graph_caller); + + if (enable) + return ftrace_modify_code(pc, nop_insn, call_insn, true); + else + return ftrace_modify_code(pc, call_insn, nop_insn, true); +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + + +#ifdef CONFIG_TRACE_IRQFLAGS +noinline void __trace_hardirqs_off(void) +{ + trace_hardirqs_off(); +} +noinline void __trace_hardirqs_on(void) +{ + trace_hardirqs_on(); +} +#endif /* CONFIG_TRACE_IRQFLAGS */ diff --git a/arch/nds32/kernel/module.c b/arch/nds32/kernel/module.c index 4167283d8293..1e31829cbc2a 100644 --- a/arch/nds32/kernel/module.c +++ b/arch/nds32/kernel/module.c @@ -40,7 +40,7 @@ void do_reloc16(unsigned int val, unsigned int *loc, unsigned int val_mask, tmp2 = tmp & loc_mask; if (partial_in_place) { - tmp &= (!loc_mask); + tmp &= (~loc_mask); tmp = tmp2 | ((tmp + ((val & val_mask) >> val_shift)) & val_mask); } else { @@ -70,7 +70,7 @@ void do_reloc32(unsigned int val, unsigned int *loc, unsigned int val_mask, tmp2 = tmp & loc_mask; if (partial_in_place) { - tmp &= (!loc_mask); + tmp &= (~loc_mask); tmp = tmp2 | ((tmp + ((val & val_mask) >> val_shift)) & val_mask); } else { diff --git a/arch/nds32/kernel/stacktrace.c b/arch/nds32/kernel/stacktrace.c index 8b231e910ea6..d974c0c1c65f 100644 --- a/arch/nds32/kernel/stacktrace.c +++ b/arch/nds32/kernel/stacktrace.c @@ -4,6 +4,7 @@ #include <linux/sched/debug.h> #include <linux/sched/task_stack.h> #include <linux/stacktrace.h> +#include <linux/ftrace.h> void save_stack_trace(struct stack_trace *trace) { @@ -16,6 +17,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) unsigned long *fpn; int skip = trace->skip; int savesched; + int graph_idx = 0; if (tsk == current) { __asm__ __volatile__("\tori\t%0, $fp, #0\n":"=r"(fpn)); @@ -29,10 +31,12 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) && (fpn >= (unsigned long *)TASK_SIZE)) { unsigned long lpp, fpp; - lpp = fpn[-1]; + lpp = fpn[LP_OFFSET]; fpp = fpn[FP_OFFSET]; if (!__kernel_text_address(lpp)) break; + else + lpp = ftrace_graph_ret_addr(tsk, &graph_idx, lpp, NULL); if (savesched || !in_sched_functions(lpp)) { if (skip) { diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index a6205fd4db52..1496aab48998 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -8,6 +8,7 @@ #include <linux/kdebug.h> #include <linux/sched/task_stack.h> #include <linux/uaccess.h> +#include <linux/ftrace.h> #include <asm/proc-fns.h> #include <asm/unistd.h> @@ -94,28 +95,6 @@ static void dump_instr(struct pt_regs *regs) set_fs(fs); } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -#include <linux/ftrace.h> -static void -get_real_ret_addr(unsigned long *addr, struct task_struct *tsk, int *graph) -{ - if (*addr == (unsigned long)return_to_handler) { - int index = tsk->curr_ret_stack; - - if (tsk->ret_stack && index >= *graph) { - index -= *graph; - *addr = tsk->ret_stack[index].ret; - (*graph)++; - } - } -} -#else -static inline void -get_real_ret_addr(unsigned long *addr, struct task_struct *tsk, int *graph) -{ -} -#endif - #define LOOP_TIMES (100) static void __dump(struct task_struct *tsk, unsigned long *base_reg) { @@ -126,7 +105,8 @@ static void __dump(struct task_struct *tsk, unsigned long *base_reg) while (!kstack_end(base_reg)) { ret_addr = *base_reg++; if (__kernel_text_address(ret_addr)) { - get_real_ret_addr(&ret_addr, tsk, &graph); + ret_addr = ftrace_graph_ret_addr( + tsk, &graph, ret_addr, NULL); print_ip_sym(ret_addr); } if (--cnt < 0) @@ -137,15 +117,12 @@ static void __dump(struct task_struct *tsk, unsigned long *base_reg) !((unsigned long)base_reg & 0x3) && ((unsigned long)base_reg >= TASK_SIZE)) { unsigned long next_fp; -#if !defined(NDS32_ABI_2) - ret_addr = base_reg[0]; - next_fp = base_reg[1]; -#else - ret_addr = base_reg[-1]; + ret_addr = base_reg[LP_OFFSET]; next_fp = base_reg[FP_OFFSET]; -#endif if (__kernel_text_address(ret_addr)) { - get_real_ret_addr(&ret_addr, tsk, &graph); + + ret_addr = ftrace_graph_ret_addr( + tsk, &graph, ret_addr, NULL); print_ip_sym(ret_addr); } if (--cnt < 0) @@ -196,11 +173,10 @@ void die(const char *str, struct pt_regs *regs, int err) pr_emerg("CPU: %i\n", smp_processor_id()); show_regs(regs); pr_emerg("Process %s (pid: %d, stack limit = 0x%p)\n", - tsk->comm, tsk->pid, task_thread_info(tsk) + 1); + tsk->comm, tsk->pid, end_of_stack(tsk)); if (!user_mode(regs) || in_interrupt()) { - dump_mem("Stack: ", regs->sp, - THREAD_SIZE + (unsigned long)task_thread_info(tsk)); + dump_mem("Stack: ", regs->sp, (regs->sp + PAGE_SIZE) & PAGE_MASK); dump_instr(regs); dump_stack(); } diff --git a/arch/nds32/kernel/vmlinux.lds.S b/arch/nds32/kernel/vmlinux.lds.S index 288313b886ef..9e90f30a181d 100644 --- a/arch/nds32/kernel/vmlinux.lds.S +++ b/arch/nds32/kernel/vmlinux.lds.S @@ -13,14 +13,26 @@ OUTPUT_ARCH(nds32) ENTRY(_stext_lma) jiffies = jiffies_64; +#if defined(CONFIG_GCOV_KERNEL) +#define NDS32_EXIT_KEEP(x) x +#else +#define NDS32_EXIT_KEEP(x) +#endif + SECTIONS { _stext_lma = TEXTADDR - LOAD_OFFSET; . = TEXTADDR; __init_begin = .; HEAD_TEXT_SECTION + .exit.text : { + NDS32_EXIT_KEEP(EXIT_TEXT) + } INIT_TEXT_SECTION(PAGE_SIZE) INIT_DATA_SECTION(16) + .exit.data : { + NDS32_EXIT_KEEP(EXIT_DATA) + } PERCPU_SECTION(L1_CACHE_BYTES) __init_end = .; diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index f329b466e68f..2d14f17838d2 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -426,7 +426,7 @@ void unwind_frame_init_task(struct unwind_frame_info *info, r.gr[30] = get_parisc_stackpointer(); regs = &r; } - unwind_frame_init(info, task, &r); + unwind_frame_init(info, task, regs); } else { unwind_frame_init_from_blocked_task(info, task); } diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 13a688fc8cd0..2a2486526d1f 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -114,7 +114,7 @@ */ #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) /* * user access blocked by key */ @@ -132,7 +132,7 @@ */ #define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) #define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \ H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4) @@ -1051,7 +1051,6 @@ static inline void vmemmap_remove_mapping(unsigned long start, return hash__vmemmap_remove_mapping(start, page_size); } #endif -struct page *realmode_pfn_to_page(unsigned long pfn); static inline pte_t pmd_pte(pmd_t pmd) { diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index ab3a4fba38e3..3d4b88cb8599 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -220,8 +220,6 @@ extern void iommu_del_device(struct device *dev); extern int __init tce_iommu_bus_notifier_init(void); extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, unsigned long *hpa, enum dma_data_direction *direction); -extern long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry, - unsigned long *hpa, enum dma_data_direction *direction); #else static inline void iommu_register_group(struct iommu_table_group *table_group, int pci_domain_number, diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index b2f89b621b15..b694d6af1150 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -38,6 +38,7 @@ extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, unsigned long ua, unsigned int pageshift, unsigned long *hpa); extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, unsigned long ua, unsigned int pageshift, unsigned long *hpa); +extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua); extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem); #endif diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 1a951b00465d..1fffbba8d6a5 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -9,6 +9,7 @@ extern void ppc_printk_progress(char *s, unsigned short hex); extern unsigned int rtas_data; extern unsigned long long memory_limit; +extern bool init_mem_is_free; extern unsigned long klimit; extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ea04dfb8c092..2d8fc8c9da7a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1314,9 +1314,7 @@ EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100) #ifdef CONFIG_PPC_DENORMALISATION mfspr r10,SPRN_HSRR1 - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */ - addi r11,r11,-4 /* HSRR0 is next instruction */ bne+ denorm_assist #endif @@ -1382,6 +1380,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) */ XVCPSGNDP32(32) denorm_done: + mfspr r11,SPRN_HSRR0 + subi r11,r11,4 mtspr SPRN_HSRR0,r11 mtcrf 0x80,r9 ld r9,PACA_EXGEN+EX_R9(r13) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index af7a20dc6e09..19b4c628f3be 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1013,31 +1013,6 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, } EXPORT_SYMBOL_GPL(iommu_tce_xchg); -#ifdef CONFIG_PPC_BOOK3S_64 -long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry, - unsigned long *hpa, enum dma_data_direction *direction) -{ - long ret; - - ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); - - if (!ret && ((*direction == DMA_FROM_DEVICE) || - (*direction == DMA_BIDIRECTIONAL))) { - struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT); - - if (likely(pg)) { - SetPageDirty(pg); - } else { - tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); - ret = -EFAULT; - } - } - - return ret; -} -EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm); -#endif - int iommu_take_ownership(struct iommu_table *tbl) { unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 913c5725cdb2..bb6ac471a784 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1306,6 +1306,16 @@ void show_user_instructions(struct pt_regs *regs) pc = regs->nip - (instructions_to_print * 3 / 4 * sizeof(int)); + /* + * Make sure the NIP points at userspace, not kernel text/data or + * elsewhere. + */ + if (!__access_ok(pc, instructions_to_print * sizeof(int), USER_DS)) { + pr_info("%s[%d]: Bad NIP, not dumping instructions.\n", + current->comm, current->pid); + return; + } + pr_info("%s[%d]: code: ", current->comm, current->pid); for (i = 0; i < instructions_to_print; i++) { diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 6bffbc5affe7..7716374786bd 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -176,13 +176,27 @@ _GLOBAL(tm_reclaim) std r1, PACATMSCRATCH(r13) ld r1, PACAR1(r13) - /* Store the PPR in r11 and reset to decent value */ std r11, GPR11(r1) /* Temporary stash */ + /* + * Move the saved user r1 to the kernel stack in case PACATMSCRATCH is + * clobbered by an exception once we turn on MSR_RI below. + */ + ld r11, PACATMSCRATCH(r13) + std r11, GPR1(r1) + + /* + * Store r13 away so we can free up the scratch SPR for the SLB fault + * handler (needed once we start accessing the thread_struct). + */ + GET_SCRATCH0(r11) + std r11, GPR13(r1) + /* Reset MSR RI so we can take SLB faults again */ li r11, MSR_RI mtmsrd r11, 1 + /* Store the PPR in r11 and reset to decent value */ mfspr r11, SPRN_PPR HMT_MEDIUM @@ -207,11 +221,11 @@ _GLOBAL(tm_reclaim) SAVE_GPR(8, r7) /* user r8 */ SAVE_GPR(9, r7) /* user r9 */ SAVE_GPR(10, r7) /* user r10 */ - ld r3, PACATMSCRATCH(r13) /* user r1 */ + ld r3, GPR1(r1) /* user r1 */ ld r4, GPR7(r1) /* user r7 */ ld r5, GPR11(r1) /* user r11 */ ld r6, GPR12(r1) /* user r12 */ - GET_SCRATCH0(8) /* user r13 */ + ld r8, GPR13(r1) /* user r13 */ std r3, GPR1(r7) std r4, GPR7(r7) std r5, GPR11(r7) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 3c0e8fb2b773..68e14afecac8 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -358,7 +358,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned long pp, key; unsigned long v, orig_v, gr; __be64 *hptep; - int index; + long int index; int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); if (kvm_is_radix(vcpu->kvm)) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 0af1c0aea1fe..998f8d089ac7 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -525,8 +525,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr) { struct kvm *kvm = vcpu->kvm; - unsigned long mmu_seq, pte_size; - unsigned long gpa, gfn, hva, pfn; + unsigned long mmu_seq; + unsigned long gpa, gfn, hva; struct kvm_memory_slot *memslot; struct page *page = NULL; long ret; @@ -623,9 +623,10 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, */ hva = gfn_to_hva_memslot(memslot, gfn); if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) { - pfn = page_to_pfn(page); upgrade_write = true; } else { + unsigned long pfn; + /* Call KVM generic code to do the slow-path check */ pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, writing, upgrade_p); @@ -639,63 +640,55 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, } } - /* See if we can insert a 1GB or 2MB large PTE here */ - level = 0; - if (page && PageCompound(page)) { - pte_size = PAGE_SIZE << compound_order(compound_head(page)); - if (pte_size >= PUD_SIZE && - (gpa & (PUD_SIZE - PAGE_SIZE)) == - (hva & (PUD_SIZE - PAGE_SIZE))) { - level = 2; - pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1); - } else if (pte_size >= PMD_SIZE && - (gpa & (PMD_SIZE - PAGE_SIZE)) == - (hva & (PMD_SIZE - PAGE_SIZE))) { - level = 1; - pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1); - } - } - /* - * Compute the PTE value that we need to insert. + * Read the PTE from the process' radix tree and use that + * so we get the shift and attribute bits. */ - if (page) { - pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE | - _PAGE_ACCESSED; - if (writing || upgrade_write) - pgflags |= _PAGE_WRITE | _PAGE_DIRTY; - pte = pfn_pte(pfn, __pgprot(pgflags)); - } else { - /* - * Read the PTE from the process' radix tree and use that - * so we get the attribute bits. - */ - local_irq_disable(); - ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); - pte = *ptep; + local_irq_disable(); + ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + /* + * If the PTE disappeared temporarily due to a THP + * collapse, just return and let the guest try again. + */ + if (!ptep) { local_irq_enable(); - if (shift == PUD_SHIFT && - (gpa & (PUD_SIZE - PAGE_SIZE)) == - (hva & (PUD_SIZE - PAGE_SIZE))) { - level = 2; - } else if (shift == PMD_SHIFT && - (gpa & (PMD_SIZE - PAGE_SIZE)) == - (hva & (PMD_SIZE - PAGE_SIZE))) { - level = 1; - } else if (shift && shift != PAGE_SHIFT) { - /* Adjust PFN */ - unsigned long mask = (1ul << shift) - PAGE_SIZE; - pte = __pte(pte_val(pte) | (hva & mask)); - } - pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED); - if (writing || upgrade_write) { - if (pte_val(pte) & _PAGE_WRITE) - pte = __pte(pte_val(pte) | _PAGE_DIRTY); - } else { - pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY)); + if (page) + put_page(page); + return RESUME_GUEST; + } + pte = *ptep; + local_irq_enable(); + + /* Get pte level from shift/size */ + if (shift == PUD_SHIFT && + (gpa & (PUD_SIZE - PAGE_SIZE)) == + (hva & (PUD_SIZE - PAGE_SIZE))) { + level = 2; + } else if (shift == PMD_SHIFT && + (gpa & (PMD_SIZE - PAGE_SIZE)) == + (hva & (PMD_SIZE - PAGE_SIZE))) { + level = 1; + } else { + level = 0; + if (shift > PAGE_SHIFT) { + /* + * If the pte maps more than one page, bring over + * bits from the virtual address to get the real + * address of the specific single page we want. + */ + unsigned long rpnmask = (1ul << shift) - PAGE_SIZE; + pte = __pte(pte_val(pte) | (hva & rpnmask)); } } + pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED); + if (writing || upgrade_write) { + if (pte_val(pte) & _PAGE_WRITE) + pte = __pte(pte_val(pte) | _PAGE_DIRTY); + } else { + pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY)); + } + /* Allocate space in the tree and write the PTE */ ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq); @@ -725,10 +718,10 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, gpa, shift); kvmppc_radix_tlbie_page(kvm, gpa, shift); if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) { - unsigned long npages = 1; + unsigned long psize = PAGE_SIZE; if (shift) - npages = 1ul << (shift - PAGE_SHIFT); - kvmppc_update_dirty_map(memslot, gfn, npages); + psize = 1ul << shift; + kvmppc_update_dirty_map(memslot, gfn, psize); } } return 0; diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 506a4d400458..6821ead4b4eb 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -187,12 +187,35 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry) +static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl, + unsigned long entry, unsigned long *hpa, + enum dma_data_direction *direction) +{ + long ret; + + ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); + + if (!ret && ((*direction == DMA_FROM_DEVICE) || + (*direction == DMA_BIDIRECTIONAL))) { + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RM(tbl, entry); + /* + * kvmppc_rm_tce_iommu_do_map() updates the UA cache after + * calling this so we still get here a valid UA. + */ + if (pua && *pua) + mm_iommu_ua_mark_dirty_rm(mm, be64_to_cpu(*pua)); + } + + return ret; +} + +static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl, + unsigned long entry) { unsigned long hpa = 0; enum dma_data_direction dir = DMA_NONE; - iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); + iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); } static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, @@ -224,7 +247,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm, unsigned long hpa = 0; long ret; - if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir)) + if (iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir)) /* * real mode xchg can fail if struct page crosses * a page boundary @@ -236,7 +259,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm, ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry); if (ret) - iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); + iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); return ret; } @@ -282,7 +305,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem))) return H_CLOSED; - ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); + ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); if (ret) { mm_iommu_mapped_dec(mem); /* @@ -371,7 +394,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, return ret; WARN_ON_ONCE_RM(1); - kvmppc_rm_clear_tce(stit->tbl, entry); + kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); } kvmppc_tce_put(stt, entry, tce); @@ -520,7 +543,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, goto unlock_exit; WARN_ON_ONCE_RM(1); - kvmppc_rm_clear_tce(stit->tbl, entry); + kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); } kvmppc_tce_put(stt, entry + i, tce); @@ -571,7 +594,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, return ret; WARN_ON_ONCE_RM(1); - kvmppc_rm_clear_tce(stit->tbl, entry); + kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); } } diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index 886ed94b9c13..d05c8af4ac51 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -443,6 +443,9 @@ _GLOBAL(csum_ipv6_magic) addc r0, r8, r9 ld r10, 0(r4) ld r11, 8(r4) +#ifdef CONFIG_CPU_LITTLE_ENDIAN + rotldi r5, r5, 8 +#endif adde r0, r0, r10 add r5, r5, r7 adde r0, r0, r11 diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 850f3b8f4da5..5ffee298745f 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -142,7 +142,7 @@ static inline int unmap_patch_area(unsigned long addr) return 0; } -int patch_instruction(unsigned int *addr, unsigned int instr) +static int do_patch_instruction(unsigned int *addr, unsigned int instr) { int err; unsigned int *patch_addr = NULL; @@ -182,12 +182,22 @@ out: } #else /* !CONFIG_STRICT_KERNEL_RWX */ -int patch_instruction(unsigned int *addr, unsigned int instr) +static int do_patch_instruction(unsigned int *addr, unsigned int instr) { return raw_patch_instruction(addr, instr); } #endif /* CONFIG_STRICT_KERNEL_RWX */ + +int patch_instruction(unsigned int *addr, unsigned int instr) +{ + /* Make sure we aren't patching a freed init section */ + if (init_mem_is_free && init_section_contains(addr, 4)) { + pr_debug("Skipping init section patching addr: 0x%px\n", addr); + return 0; + } + return do_patch_instruction(addr, instr); +} NOKPROBE_SYMBOL(patch_instruction); int patch_branch(unsigned int *addr, unsigned long target, int flags) diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 51ce091914f9..7a9886f98b0c 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -308,55 +308,6 @@ void register_page_bootmem_memmap(unsigned long section_nr, { } -/* - * We do not have access to the sparsemem vmemmap, so we fallback to - * walking the list of sparsemem blocks which we already maintain for - * the sake of crashdump. In the long run, we might want to maintain - * a tree if performance of that linear walk becomes a problem. - * - * realmode_pfn_to_page functions can fail due to: - * 1) As real sparsemem blocks do not lay in RAM continously (they - * are in virtual address space which is not available in the real mode), - * the requested page struct can be split between blocks so get_page/put_page - * may fail. - * 2) When huge pages are used, the get_page/put_page API will fail - * in real mode as the linked addresses in the page struct are virtual - * too. - */ -struct page *realmode_pfn_to_page(unsigned long pfn) -{ - struct vmemmap_backing *vmem_back; - struct page *page; - unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; - unsigned long pg_va = (unsigned long) pfn_to_page(pfn); - - for (vmem_back = vmemmap_list; vmem_back; vmem_back = vmem_back->list) { - if (pg_va < vmem_back->virt_addr) - continue; - - /* After vmemmap_list entry free is possible, need check all */ - if ((pg_va + sizeof(struct page)) <= - (vmem_back->virt_addr + page_size)) { - page = (struct page *) (vmem_back->phys + pg_va - - vmem_back->virt_addr); - return page; - } - } - - /* Probably that page struct is split between real pages */ - return NULL; -} -EXPORT_SYMBOL_GPL(realmode_pfn_to_page); - -#else - -struct page *realmode_pfn_to_page(unsigned long pfn) -{ - struct page *page = pfn_to_page(pfn); - return page; -} -EXPORT_SYMBOL_GPL(realmode_pfn_to_page); - #endif /* CONFIG_SPARSEMEM_VMEMMAP */ #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 5c8530d0c611..04ccb274a620 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -63,6 +63,7 @@ #endif unsigned long long memory_limit; +bool init_mem_is_free; #ifdef CONFIG_HIGHMEM pte_t *kmap_pte; @@ -396,6 +397,7 @@ void free_initmem(void) { ppc_md.progress = ppc_printk_progress; mark_initmem_nx(); + init_mem_is_free = true; free_initmem_default(POISON_FREE_INITMEM); } diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index c9ee9e23845f..56c2234cc6ae 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -18,11 +18,15 @@ #include <linux/migrate.h> #include <linux/hugetlb.h> #include <linux/swap.h> +#include <linux/sizes.h> #include <asm/mmu_context.h> #include <asm/pte-walk.h> static DEFINE_MUTEX(mem_list_mutex); +#define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1 +#define MM_IOMMU_TABLE_GROUP_PAGE_MASK ~(SZ_4K - 1) + struct mm_iommu_table_group_mem_t { struct list_head next; struct rcu_head rcu; @@ -263,6 +267,9 @@ static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem) if (!page) continue; + if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY) + SetPageDirty(page); + put_page(page); mem->hpas[i] = 0; } @@ -360,7 +367,6 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm, return ret; } -EXPORT_SYMBOL_GPL(mm_iommu_lookup_rm); struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, unsigned long ua, unsigned long entries) @@ -390,7 +396,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, if (pageshift > mem->pageshift) return -EFAULT; - *hpa = *va | (ua & ~PAGE_MASK); + *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK); return 0; } @@ -413,11 +419,31 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, if (!pa) return -EFAULT; - *hpa = *pa | (ua & ~PAGE_MASK); + *hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK); return 0; } -EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa_rm); + +extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua) +{ + struct mm_iommu_table_group_mem_t *mem; + long entry; + void *va; + unsigned long *pa; + + mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE); + if (!mem) + return; + + entry = (ua - mem->ua) >> PAGE_SHIFT; + va = &mem->hpas[entry]; + + pa = (void *) vmalloc_to_phys(va); + if (!pa) + return; + + *pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY; +} long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) { diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 35ac5422903a..055b211b7126 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1204,7 +1204,9 @@ int find_and_online_cpu_nid(int cpu) int new_nid; /* Use associativity from first thread for all siblings */ - vphn_get_associativity(cpu, associativity); + if (vphn_get_associativity(cpu, associativity)) + return cpu_to_node(cpu); + new_nid = associativity_to_nid(associativity); if (new_nid < 0 || !node_possible(new_nid)) new_nid = first_online_node; @@ -1215,9 +1217,10 @@ int find_and_online_cpu_nid(int cpu) * Need to ensure that NODE_DATA is initialized for a node from * available memory (see memblock_alloc_try_nid). If unable to * init the node, then default to nearest node that has memory - * installed. + * installed. Skip onlining a node if the subsystems are not + * yet initialized. */ - if (try_online_node(new_nid)) + if (!topology_inited || try_online_node(new_nid)) new_nid = first_online_node; #else /* @@ -1452,7 +1455,8 @@ static struct timer_list topology_timer; static void reset_topology_timer(void) { - mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ); + if (vphn_enabled) + mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ); } #ifdef CONFIG_SMP diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c index 333b1f80c435..b271b283c785 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/pkeys.c @@ -45,7 +45,7 @@ static void scan_pkey_feature(void) * Since any pkey can be used for data or execute, we will just treat * all keys as equal and track them as one entity. */ - pkeys_total = be32_to_cpu(vals[0]); + pkeys_total = vals[0]; pkeys_devtree_defined = true; } diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c index 6c5db1acbe8d..fe9691040f54 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c +++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c @@ -276,7 +276,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, level_shift = entries_shift + 3; level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT); - if ((level_shift - 3) * levels + page_shift >= 60) + if ((level_shift - 3) * levels + page_shift >= 55) return -EINVAL; /* Allocate TCE table */ diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h new file mode 100644 index 000000000000..c9fecd120d18 --- /dev/null +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_PROTOTYPES_H + +#include <linux/ftrace.h> +#include <asm-generic/asm-prototypes.h> + +#endif /* _ASM_RISCV_PROTOTYPES_H */ diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index db20dc630e7e..b2d26d9d8489 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -85,15 +85,8 @@ atomic_t hart_lottery; #ifdef CONFIG_BLK_DEV_INITRD static void __init setup_initrd(void) { - extern char __initramfs_start[]; - extern unsigned long __initramfs_size; unsigned long size; - if (__initramfs_size > 0) { - initrd_start = (unsigned long)(&__initramfs_start); - initrd_end = initrd_start + __initramfs_size; - } - if (initrd_start >= initrd_end) { printk(KERN_INFO "initrd not found or empty"); goto disable; @@ -193,7 +186,7 @@ static void __init setup_bootmem(void) BUG_ON(mem_size == 0); set_max_mapnr(PFN_DOWN(mem_size)); - max_low_pfn = pfn_base + PFN_DOWN(mem_size); + max_low_pfn = memblock_end_of_DRAM(); #ifdef CONFIG_BLK_DEV_INITRD setup_initrd(); diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 80b27294c1de..ab9a0ebecc19 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -208,7 +208,7 @@ static int cbc_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier, walk->dst.virt.addr, walk->src.virt.addr, n); if (k) ret = blkcipher_walk_done(desc, walk, nbytes - k); - if (n < k) { + if (k < n) { if (__cbc_paes_set_key(ctx) != 0) return blkcipher_walk_done(desc, walk, -EIO); memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE); diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index f31a15044c24..a8418e1379eb 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -16,7 +16,13 @@ typedef struct { unsigned long asce; unsigned long asce_limit; unsigned long vdso_base; - /* The mmu context allocates 4K page tables. */ + /* + * The following bitfields need a down_write on the mm + * semaphore when they are written to. As they are only + * written once, they can be read without a lock. + * + * The mmu context allocates 4K page tables. + */ unsigned int alloc_pgste:1; /* The mmu context uses extended page tables. */ unsigned int has_pgste:1; diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 3cae9168f63c..e44a8d7959f5 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -108,7 +108,8 @@ int sclp_early_get_core_info(struct sclp_core_info *info); void sclp_early_get_ipl_info(struct sclp_ipl_info *info); void sclp_early_detect(void); void sclp_early_printk(const char *s); -void __sclp_early_printk(const char *s, unsigned int len); +void sclp_early_printk_force(const char *s); +void __sclp_early_printk(const char *s, unsigned int len, unsigned int force); int _sclp_get_core_info(struct sclp_core_info *info); int sclp_core_configure(u8 core); diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c index 9431784d7796..40c1dfec944e 100644 --- a/arch/s390/kernel/early_printk.c +++ b/arch/s390/kernel/early_printk.c @@ -10,7 +10,7 @@ static void sclp_early_write(struct console *con, const char *s, unsigned int len) { - __sclp_early_printk(s, len); + __sclp_early_printk(s, len, 0); } static struct console sclp_early_console = { diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S index a049a7b9d6e8..c1a080b11ae9 100644 --- a/arch/s390/kernel/swsusp.S +++ b/arch/s390/kernel/swsusp.S @@ -198,12 +198,10 @@ pgm_check_entry: /* Suspend CPU not available -> panic */ larl %r15,init_thread_union - ahi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER) + aghi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER) + aghi %r15,-STACK_FRAME_OVERHEAD larl %r2,.Lpanic_string - lghi %r1,0 - sam31 - sigp %r1,%r0,SIGP_SET_ARCHITECTURE - brasl %r14,sclp_early_printk + brasl %r14,sclp_early_printk_force larl %r3,.Ldisabled_wait_31 lpsw 0(%r3) 4: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 91ad4a9425c0..ac5da6b0b862 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -481,7 +481,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_S390_HPAGE_1M: r = 0; - if (hpage) + if (hpage && !kvm_is_ucontrol(kvm)) r = 1; break; case KVM_CAP_S390_MEM_OP: @@ -691,11 +691,13 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) mutex_lock(&kvm->lock); if (kvm->created_vcpus) r = -EBUSY; - else if (!hpage || kvm->arch.use_cmma) + else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm)) r = -EINVAL; else { r = 0; + down_write(&kvm->mm->mmap_sem); kvm->mm->context.allow_gmap_hpage_1m = 1; + up_write(&kvm->mm->mmap_sem); /* * We might have to create fake 4k page * tables. To avoid that the hardware works on diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index d68f10441a16..8679bd74d337 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -280,9 +280,11 @@ retry: goto retry; } } - if (rc) - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); up_read(¤t->mm->mmap_sem); + if (rc == -EFAULT) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + if (rc < 0) + return rc; vcpu->run->s.regs.gprs[reg1] &= ~0xff; vcpu->run->s.regs.gprs[reg1] |= key; return 0; @@ -324,9 +326,11 @@ retry: goto retry; } } - if (rc < 0) - return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); up_read(¤t->mm->mmap_sem); + if (rc == -EFAULT) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + if (rc < 0) + return rc; kvm_s390_set_psw_cc(vcpu, rc); return 0; } @@ -390,12 +394,12 @@ static int handle_sske(struct kvm_vcpu *vcpu) FAULT_FLAG_WRITE, &unlocked); rc = !rc ? -EAGAIN : rc; } + up_read(¤t->mm->mmap_sem); if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - - up_read(¤t->mm->mmap_sem); - if (rc >= 0) - start += PAGE_SIZE; + if (rc < 0) + return rc; + start += PAGE_SIZE; } if (m3 & (SSKE_MC | SSKE_MR)) { @@ -1002,13 +1006,15 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) FAULT_FLAG_WRITE, &unlocked); rc = !rc ? -EAGAIN : rc; } + up_read(¤t->mm->mmap_sem); if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - - up_read(¤t->mm->mmap_sem); - if (rc >= 0) - start += PAGE_SIZE; + if (rc == -EAGAIN) + continue; + if (rc < 0) + return rc; } + start += PAGE_SIZE; } if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) { diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 63844b95c22c..a2b28cd1e3fe 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -173,7 +173,8 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) return set_validity_icpt(scb_s, 0x0039U); /* copy only the wrapping keys */ - if (read_guest_real(vcpu, crycb_addr + 72, &vsie_page->crycb, 56)) + if (read_guest_real(vcpu, crycb_addr + 72, + vsie_page->crycb.dea_wrapping_key_mask, 56)) return set_validity_icpt(scb_s, 0x0035U); scb_s->ecb3 |= ecb3_flags; diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index bb44990c8212..911c7ded35f1 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -708,11 +708,13 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) vmaddr |= gaddr & ~PMD_MASK; /* Find vma in the parent mm */ vma = find_vma(gmap->mm, vmaddr); + if (!vma) + continue; /* * We do not discard pages that are backed by * hugetlbfs, so we don't have to refault them. */ - if (vma && is_vm_hugetlb_page(vma)) + if (is_vm_hugetlb_page(vma)) continue; size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); zap_page_range(vma, vmaddr, size); diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index 666d6b5c0440..9c3fc03abe9a 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -28,7 +28,7 @@ typedef struct { unsigned short sock_id; /* physical package */ unsigned short core_id; unsigned short max_cache_id; /* groupings of highest shared cache */ - unsigned short proc_id; /* strand (aka HW thread) id */ + signed short proc_id; /* strand (aka HW thread) id */ } cpuinfo_sparc; DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h index 09acf0ddec10..45b4bf1875e6 100644 --- a/arch/sparc/include/uapi/asm/unistd.h +++ b/arch/sparc/include/uapi/asm/unistd.h @@ -427,8 +427,9 @@ #define __NR_preadv2 358 #define __NR_pwritev2 359 #define __NR_statx 360 +#define __NR_io_pgetevents 361 -#define NR_syscalls 361 +#define NR_syscalls 362 /* Bitmask values returned from kern_features system call. */ #define KERN_FEATURE_MIXED_MODE_STACK 0x00000001 diff --git a/arch/sparc/kernel/kgdb_32.c b/arch/sparc/kernel/kgdb_32.c index 5868fc333ea8..639c8e54530a 100644 --- a/arch/sparc/kernel/kgdb_32.c +++ b/arch/sparc/kernel/kgdb_32.c @@ -122,7 +122,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, linux_regs->pc = addr; linux_regs->npc = addr + 4; } - /* fallthru */ + /* fall through */ case 'D': case 'k': diff --git a/arch/sparc/kernel/kgdb_64.c b/arch/sparc/kernel/kgdb_64.c index d5f7dc6323d5..a68bbddbdba4 100644 --- a/arch/sparc/kernel/kgdb_64.c +++ b/arch/sparc/kernel/kgdb_64.c @@ -148,7 +148,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, linux_regs->tpc = addr; linux_regs->tnpc = addr + 4; } - /* fallthru */ + /* fall through */ case 'D': case 'k': diff --git a/arch/sparc/kernel/of_device_32.c b/arch/sparc/kernel/of_device_32.c index 3641a294ed54..e4abe9b8f97a 100644 --- a/arch/sparc/kernel/of_device_32.c +++ b/arch/sparc/kernel/of_device_32.c @@ -9,6 +9,7 @@ #include <linux/irq.h> #include <linux/of_device.h> #include <linux/of_platform.h> +#include <linux/dma-mapping.h> #include <asm/leon.h> #include <asm/leon_amba.h> @@ -381,6 +382,9 @@ static struct platform_device * __init scan_one_device(struct device_node *dp, else dev_set_name(&op->dev, "%08x", dp->phandle); + op->dev.coherent_dma_mask = DMA_BIT_MASK(32); + op->dev.dma_mask = &op->dev.coherent_dma_mask; + if (of_device_register(op)) { printk("%s: Could not register of device.\n", dp->full_name); diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c index 44e4d4435bed..6df6086968c6 100644 --- a/arch/sparc/kernel/of_device_64.c +++ b/arch/sparc/kernel/of_device_64.c @@ -2,6 +2,7 @@ #include <linux/string.h> #include <linux/kernel.h> #include <linux/of.h> +#include <linux/dma-mapping.h> #include <linux/init.h> #include <linux/export.h> #include <linux/mod_devicetable.h> @@ -675,6 +676,8 @@ static struct platform_device * __init scan_one_device(struct device_node *dp, dev_set_name(&op->dev, "root"); else dev_set_name(&op->dev, "%08x", dp->phandle); + op->dev.coherent_dma_mask = DMA_BIT_MASK(32); + op->dev.dma_mask = &op->dev.coherent_dma_mask; if (of_device_register(op)) { printk("%s: Could not register of device.\n", diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index d3149baaa33c..67b3e6b3ce5d 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -24,6 +24,7 @@ #include <asm/cpudata.h> #include <linux/uaccess.h> #include <linux/atomic.h> +#include <linux/sched/clock.h> #include <asm/nmi.h> #include <asm/pcr.h> #include <asm/cacheflush.h> @@ -927,6 +928,8 @@ static void read_in_all_counters(struct cpu_hw_events *cpuc) sparc_perf_event_update(cp, &cp->hw, cpuc->current_idx[i]); cpuc->current_idx[i] = PIC_NO_INDEX; + if (cp->hw.state & PERF_HES_STOPPED) + cp->hw.state |= PERF_HES_ARCH; } } } @@ -959,10 +962,12 @@ static void calculate_single_pcr(struct cpu_hw_events *cpuc) enc = perf_event_get_enc(cpuc->events[i]); cpuc->pcr[0] &= ~mask_for_index(idx); - if (hwc->state & PERF_HES_STOPPED) + if (hwc->state & PERF_HES_ARCH) { cpuc->pcr[0] |= nop_for_index(idx); - else + } else { cpuc->pcr[0] |= event_encoding(enc, idx); + hwc->state = 0; + } } out: cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; @@ -988,6 +993,9 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) cpuc->current_idx[i] = idx; + if (cp->hw.state & PERF_HES_ARCH) + continue; + sparc_pmu_start(cp, PERF_EF_RELOAD); } out: @@ -1079,6 +1087,8 @@ static void sparc_pmu_start(struct perf_event *event, int flags) event->hw.state = 0; sparc_pmu_enable_event(cpuc, &event->hw, idx); + + perf_event_update_userpage(event); } static void sparc_pmu_stop(struct perf_event *event, int flags) @@ -1371,9 +1381,9 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) cpuc->events[n0] = event->hw.event_base; cpuc->current_idx[n0] = PIC_NO_INDEX; - event->hw.state = PERF_HES_UPTODATE; + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; if (!(ef_flags & PERF_EF_START)) - event->hw.state |= PERF_HES_STOPPED; + event->hw.state |= PERF_HES_ARCH; /* * If group events scheduling transaction was started, @@ -1603,6 +1613,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, struct perf_sample_data data; struct cpu_hw_events *cpuc; struct pt_regs *regs; + u64 finish_clock; + u64 start_clock; int i; if (!atomic_read(&active_events)) @@ -1616,6 +1628,8 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, return NOTIFY_DONE; } + start_clock = sched_clock(); + regs = args->regs; cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1654,6 +1668,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, sparc_pmu_stop(event, 0); } + finish_clock = sched_clock(); + + perf_sample_event_took(finish_clock - start_clock); + return NOTIFY_STOP; } diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index f6528884a2c8..4073e2b87dd0 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -84,8 +84,9 @@ __handle_signal: ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 sethi %hi(0xf << 20), %l4 and %l1, %l4, %l4 + andn %l1, %l4, %l1 ba,pt %xcc, __handle_preemption_continue - andn %l1, %l4, %l1 + srl %l4, 20, %l4 /* When returning from a NMI (%pil==15) interrupt we want to * avoid running softirqs, doing IRQ tracing, preempting, etc. diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 12bee14b552c..621a363098ec 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -90,4 +90,4 @@ sys_call_table: /*345*/ .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen /*355*/ .long sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2 -/*360*/ .long sys_statx +/*360*/ .long sys_statx, sys_io_pgetevents diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 387ef993880a..bb68c805b891 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -91,7 +91,7 @@ sys_call_table32: .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen .word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range, compat_sys_preadv2, compat_sys_pwritev2 -/*360*/ .word sys_statx +/*360*/ .word sys_statx, compat_sys_io_pgetevents #endif /* CONFIG_COMPAT */ @@ -173,4 +173,4 @@ sys_call_table: .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen .word sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2 -/*360*/ .word sys_statx +/*360*/ .word sys_statx, sys_io_pgetevents diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c index 635d67ffc9a3..7db5aabe9708 100644 --- a/arch/sparc/kernel/viohs.c +++ b/arch/sparc/kernel/viohs.c @@ -180,11 +180,17 @@ static int send_dreg(struct vio_driver_state *vio) struct vio_dring_register pkt; char all[sizeof(struct vio_dring_register) + (sizeof(struct ldc_trans_cookie) * - dr->ncookies)]; + VIO_MAX_RING_COOKIES)]; } u; + size_t bytes = sizeof(struct vio_dring_register) + + (sizeof(struct ldc_trans_cookie) * + dr->ncookies); int i; - memset(&u, 0, sizeof(u)); + if (WARN_ON(bytes > sizeof(u))) + return -EINVAL; + + memset(&u, 0, bytes); init_tag(&u.pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_DRING_REG); u.pkt.dring_ident = 0; u.pkt.num_descr = dr->num_entries; @@ -206,7 +212,7 @@ static int send_dreg(struct vio_driver_state *vio) (unsigned long long) u.pkt.cookies[i].cookie_size); } - return send_ctrl(vio, &u.pkt.tag, sizeof(u)); + return send_ctrl(vio, &u.pkt.tag, bytes); } static int send_rdx(struct vio_driver_state *vio) diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile index dd0b5a92ffd0..dc85570d8839 100644 --- a/arch/sparc/vdso/Makefile +++ b/arch/sparc/vdso/Makefile @@ -31,23 +31,21 @@ obj-y += $(vdso_img_objs) targets += $(vdso_img_cfiles) targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so) -export CPPFLAGS_vdso.lds += -P -C +CPPFLAGS_vdso.lds += -P -C VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ -Wl,--no-undefined \ -Wl,-z,max-page-size=8192 -Wl,-z,common-page-size=8192 \ $(DISABLE_LTO) -$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE +$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE $(call if_changed,vdso) HOST_EXTRACFLAGS += -I$(srctree)/tools/include hostprogs-y += vdso2c quiet_cmd_vdso2c = VDSO2C $@ -define cmd_vdso2c - $(obj)/vdso2c $< $(<:%.dbg=%) $@ -endef + cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE $(call if_changed,vdso2c) diff --git a/arch/sparc/vdso/vclock_gettime.c b/arch/sparc/vdso/vclock_gettime.c index 3feb3d960ca5..75dca9aab737 100644 --- a/arch/sparc/vdso/vclock_gettime.c +++ b/arch/sparc/vdso/vclock_gettime.c @@ -33,9 +33,19 @@ #define TICK_PRIV_BIT (1ULL << 63) #endif +#ifdef CONFIG_SPARC64 #define SYSCALL_STRING \ "ta 0x6d;" \ - "sub %%g0, %%o0, %%o0;" \ + "bcs,a 1f;" \ + " sub %%g0, %%o0, %%o0;" \ + "1:" +#else +#define SYSCALL_STRING \ + "ta 0x10;" \ + "bcs,a 1f;" \ + " sub %%g0, %%o0, %%o0;" \ + "1:" +#endif #define SYSCALL_CLOBBERS \ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c index f51595f861b8..5eaff3c1aa0c 100644 --- a/arch/sparc/vdso/vma.c +++ b/arch/sparc/vdso/vma.c @@ -262,7 +262,9 @@ static __init int vdso_setup(char *s) unsigned long val; err = kstrtoul(s, 10, &val); + if (err) + return err; vdso_enabled = val; - return err; + return 0; } __setup("vdso=", vdso_setup); diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 28764dacf018..466f66c8a7f8 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -37,6 +37,7 @@ KBUILD_CFLAGS += $(call cc-option,-ffreestanding) KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) +KBUILD_CFLAGS += -Wno-pointer-sign KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index eaa843a52907..a480356e0ed8 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -25,20 +25,6 @@ ENTRY(get_sev_encryption_bit) push %ebx push %ecx push %edx - push %edi - - /* - * RIP-relative addressing is needed to access the encryption bit - * variable. Since we are running in 32-bit mode we need this call/pop - * sequence to get the proper relative addressing. - */ - call 1f -1: popl %edi - subl $1b, %edi - - movl enc_bit(%edi), %eax - cmpl $0, %eax - jge .Lsev_exit /* Check if running under a hypervisor */ movl $1, %eax @@ -69,15 +55,12 @@ ENTRY(get_sev_encryption_bit) movl %ebx, %eax andl $0x3f, %eax /* Return the encryption bit location */ - movl %eax, enc_bit(%edi) jmp .Lsev_exit .Lno_sev: xor %eax, %eax - movl %eax, enc_bit(%edi) .Lsev_exit: - pop %edi pop %edx pop %ecx pop %ebx @@ -113,8 +96,6 @@ ENTRY(set_sev_encryption_mask) ENDPROC(set_sev_encryption_mask) .data -enc_bit: - .int 0xffffffff #ifdef CONFIG_AMD_MEM_ENCRYPT .balign 8 diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c index acd11b3bf639..2a356b948720 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -379,7 +379,6 @@ static int __init crypto_aegis128_aesni_module_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2) || !boot_cpu_has(X86_FEATURE_AES) || - !boot_cpu_has(X86_FEATURE_OSXSAVE) || !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c index 2071c3d1ae07..dbe8bb980da1 100644 --- a/arch/x86/crypto/aegis128l-aesni-glue.c +++ b/arch/x86/crypto/aegis128l-aesni-glue.c @@ -379,7 +379,6 @@ static int __init crypto_aegis128l_aesni_module_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2) || !boot_cpu_has(X86_FEATURE_AES) || - !boot_cpu_has(X86_FEATURE_OSXSAVE) || !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c index b5f2a8fd5a71..8bebda2de92f 100644 --- a/arch/x86/crypto/aegis256-aesni-glue.c +++ b/arch/x86/crypto/aegis256-aesni-glue.c @@ -379,7 +379,6 @@ static int __init crypto_aegis256_aesni_module_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2) || !boot_cpu_has(X86_FEATURE_AES) || - !boot_cpu_has(X86_FEATURE_OSXSAVE) || !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c index 95cf857d2cbb..f40244eaf14d 100644 --- a/arch/x86/crypto/morus1280-sse2-glue.c +++ b/arch/x86/crypto/morus1280-sse2-glue.c @@ -40,7 +40,6 @@ MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350); static int __init crypto_morus1280_sse2_module_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2) || - !boot_cpu_has(X86_FEATURE_OSXSAVE) || !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c index 615fb7bc9a32..9afaf8f8565a 100644 --- a/arch/x86/crypto/morus640-sse2-glue.c +++ b/arch/x86/crypto/morus640-sse2-glue.c @@ -40,7 +40,6 @@ MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400); static int __init crypto_morus640_sse2_module_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2) || - !boot_cpu_has(X86_FEATURE_OSXSAVE) || !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 2767c625a52c..fbbf1ba57ec6 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -389,6 +389,13 @@ * that register for the time this macro runs */ + /* + * The high bits of the CS dword (__csh) are used for + * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case + * hardware didn't do this for us. + */ + andl $(0x0000ffff), PT_CS(%esp) + /* Are we on the entry stack? Bail out if not! */ movl PER_CPU_VAR(cpu_entry_area), %ecx addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx @@ -407,12 +414,6 @@ /* Load top of task-stack into %edi */ movl TSS_entry2task_stack(%edi), %edi - /* - * Clear unused upper bits of the dword containing the word-sized CS - * slot in pt_regs in case hardware didn't clear it for us. - */ - andl $(0x0000ffff), PT_CS(%esp) - /* Special case - entry from kernel mode via entry stack */ #ifdef CONFIG_VM86 movl PT_EFLAGS(%esp), %ecx # mix EFLAGS and CS diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 957dfb693ecc..f95dcb209fdf 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1187,6 +1187,16 @@ ENTRY(paranoid_entry) xorl %ebx, %ebx 1: + /* + * Always stash CR3 in %r14. This value will be restored, + * verbatim, at exit. Needed if paranoid_entry interrupted + * another entry that already switched to the user CR3 value + * but has not yet returned to userspace. + * + * This is also why CS (stashed in the "iret frame" by the + * hardware at entry) can not be used: this may be a return + * to kernel code, but with a user CR3 value. + */ SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 ret @@ -1211,11 +1221,13 @@ ENTRY(paranoid_exit) testl %ebx, %ebx /* swapgs needed? */ jnz .Lparanoid_exit_no_swapgs TRACE_IRQS_IRETQ + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 SWAPGS_UNSAFE_STACK jmp .Lparanoid_exit_restore .Lparanoid_exit_no_swapgs: TRACE_IRQS_IRETQ_DEBUG + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 .Lparanoid_exit_restore: jmp restore_regs_and_return_to_kernel @@ -1626,6 +1638,7 @@ end_repeat_nmi: movq $-1, %rsi call do_nmi + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 testl %ebx, %ebx /* swapgs needed? */ diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index fa3f439f0a92..141d415a8c80 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -68,7 +68,13 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ -fno-omit-frame-pointer -foptimize-sibling-calls \ - -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(RETPOLINE_VDSO_CFLAGS) + -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO + +ifdef CONFIG_RETPOLINE +ifneq ($(RETPOLINE_VDSO_CFLAGS),) + CFL += $(RETPOLINE_VDSO_CFLAGS) +endif +endif $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) @@ -138,7 +144,13 @@ KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector) KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) KBUILD_CFLAGS_32 += -fno-omit-frame-pointer KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) + +ifdef CONFIG_RETPOLINE +ifneq ($(RETPOLINE_VDSO_CFLAGS),) + KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) +endif +endif + $(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) $(obj)/vdso32.so.dbg: FORCE \ diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index f19856d95c60..e48ca3afa091 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -43,8 +43,9 @@ extern u8 hvclock_page notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; - asm("syscall" : "=a" (ret) : - "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); + asm ("syscall" : "=a" (ret), "=m" (*ts) : + "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : + "memory", "rcx", "r11"); return ret; } @@ -52,8 +53,9 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) { long ret; - asm("syscall" : "=a" (ret) : - "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); + asm ("syscall" : "=a" (ret), "=m" (*tv), "=m" (*tz) : + "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : + "memory", "rcx", "r11"); return ret; } @@ -64,13 +66,13 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) { long ret; - asm( + asm ( "mov %%ebx, %%edx \n" - "mov %2, %%ebx \n" + "mov %[clock], %%ebx \n" "call __kernel_vsyscall \n" "mov %%edx, %%ebx \n" - : "=a" (ret) - : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) + : "=a" (ret), "=m" (*ts) + : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts) : "memory", "edx"); return ret; } @@ -79,13 +81,13 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) { long ret; - asm( + asm ( "mov %%ebx, %%edx \n" - "mov %2, %%ebx \n" + "mov %[tv], %%ebx \n" "call __kernel_vsyscall \n" "mov %%edx, %%ebx \n" - : "=a" (ret) - : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) + : "=a" (ret), "=m" (*tv), "=m" (*tz) + : "0" (__NR_gettimeofday), [tv] "g" (tv), "c" (tz) : "memory", "edx"); return ret; } diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 981ba5e8241b..8671de126eac 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -36,6 +36,7 @@ static int num_counters_llc; static int num_counters_nb; +static bool l3_mask; static HLIST_HEAD(uncore_unused_list); @@ -209,6 +210,13 @@ static int amd_uncore_event_init(struct perf_event *event) hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; hwc->idx = -1; + /* + * SliceMask and ThreadMask need to be set for certain L3 events in + * Family 17h. For other events, the two fields do not affect the count. + */ + if (l3_mask) + hwc->config |= (AMD64_L3_SLICE_MASK | AMD64_L3_THREAD_MASK); + if (event->cpu < 0) return -EINVAL; @@ -525,6 +533,7 @@ static int __init amd_uncore_init(void) amd_llc_pmu.name = "amd_l3"; format_attr_event_df.show = &event_show_df; format_attr_event_l3.show = &event_show_l3; + l3_mask = true; } else { num_counters_nb = NUM_COUNTERS_NB; num_counters_llc = NUM_COUNTERS_L2; @@ -532,6 +541,7 @@ static int __init amd_uncore_init(void) amd_llc_pmu.name = "amd_l2"; format_attr_event_df = format_attr_event; format_attr_event_l3 = format_attr_event; + l3_mask = false; } amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df; diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index f3e006bed9a7..c88ed39582a1 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1272,4 +1272,8 @@ void intel_pmu_lbr_init_knl(void) x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_map = snb_lbr_sel_map; + + /* Knights Landing does have MISPREDICT bit */ + if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP) + x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS; } diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 51d7c117e3c7..c07bee31abe8 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3061,7 +3061,7 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = { void bdx_uncore_cpu_init(void) { - int pkg = topology_phys_to_logical_pkg(0); + int pkg = topology_phys_to_logical_pkg(boot_cpu_data.phys_proc_id); if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; @@ -3931,16 +3931,16 @@ static const struct pci_device_id skx_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 5, SKX_PCI_UNCORE_M2PCIE, 3), }, { /* M3UPI0 Link 0 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, SKX_PCI_UNCORE_M3UPI, 0), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 0), }, { /* M3UPI0 Link 1 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 1), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204E), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 2, SKX_PCI_UNCORE_M3UPI, 1), }, { /* M3UPI1 Link 2 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 4, SKX_PCI_UNCORE_M3UPI, 2), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 5, SKX_PCI_UNCORE_M3UPI, 2), }, { /* end: all zeroes */ } }; diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 5b0f613428c2..2c43e3055948 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -95,8 +95,8 @@ static void hv_apic_eoi_write(u32 reg, u32 val) */ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) { - struct ipi_arg_ex **arg; - struct ipi_arg_ex *ipi_arg; + struct hv_send_ipi_ex **arg; + struct hv_send_ipi_ex *ipi_arg; unsigned long flags; int nr_bank = 0; int ret = 1; @@ -105,7 +105,7 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) return false; local_irq_save(flags); - arg = (struct ipi_arg_ex **)this_cpu_ptr(hyperv_pcpu_input_arg); + arg = (struct hv_send_ipi_ex **)this_cpu_ptr(hyperv_pcpu_input_arg); ipi_arg = *arg; if (unlikely(!ipi_arg)) @@ -135,7 +135,7 @@ ipi_mask_ex_done: static bool __send_ipi_mask(const struct cpumask *mask, int vector) { int cur_cpu, vcpu; - struct ipi_arg_non_ex ipi_arg; + struct hv_send_ipi ipi_arg; int ret = 1; trace_hyperv_send_ipi_mask(mask, vector); diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index b143717b92b3..ce84388e540c 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -80,11 +80,11 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v) * true if the result is zero, or false for all * other cases. */ -#define arch_atomic_sub_and_test arch_atomic_sub_and_test static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e); } +#define arch_atomic_sub_and_test arch_atomic_sub_and_test /** * arch_atomic_inc - increment atomic variable @@ -92,12 +92,12 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) * * Atomically increments @v by 1. */ -#define arch_atomic_inc arch_atomic_inc static __always_inline void arch_atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" : "+m" (v->counter)); } +#define arch_atomic_inc arch_atomic_inc /** * arch_atomic_dec - decrement atomic variable @@ -105,12 +105,12 @@ static __always_inline void arch_atomic_inc(atomic_t *v) * * Atomically decrements @v by 1. */ -#define arch_atomic_dec arch_atomic_dec static __always_inline void arch_atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" : "+m" (v->counter)); } +#define arch_atomic_dec arch_atomic_dec /** * arch_atomic_dec_and_test - decrement and test @@ -120,11 +120,11 @@ static __always_inline void arch_atomic_dec(atomic_t *v) * returns true if the result is 0, or false for all other * cases. */ -#define arch_atomic_dec_and_test arch_atomic_dec_and_test static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e); } +#define arch_atomic_dec_and_test arch_atomic_dec_and_test /** * arch_atomic_inc_and_test - increment and test @@ -134,11 +134,11 @@ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) * and returns true if the result is zero, or false for all * other cases. */ -#define arch_atomic_inc_and_test arch_atomic_inc_and_test static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e); } +#define arch_atomic_inc_and_test arch_atomic_inc_and_test /** * arch_atomic_add_negative - add and test if negative @@ -149,11 +149,11 @@ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -#define arch_atomic_add_negative arch_atomic_add_negative static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s); } +#define arch_atomic_add_negative arch_atomic_add_negative /** * arch_atomic_add_return - add integer and return diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index ef959f02d070..6a5b0ec460da 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -205,12 +205,12 @@ static inline long long arch_atomic64_sub(long long i, atomic64_t *v) * * Atomically increments @v by 1. */ -#define arch_atomic64_inc arch_atomic64_inc static inline void arch_atomic64_inc(atomic64_t *v) { __alternative_atomic64(inc, inc_return, /* no output */, "S" (v) : "memory", "eax", "ecx", "edx"); } +#define arch_atomic64_inc arch_atomic64_inc /** * arch_atomic64_dec - decrement atomic64 variable @@ -218,12 +218,12 @@ static inline void arch_atomic64_inc(atomic64_t *v) * * Atomically decrements @v by 1. */ -#define arch_atomic64_dec arch_atomic64_dec static inline void arch_atomic64_dec(atomic64_t *v) { __alternative_atomic64(dec, dec_return, /* no output */, "S" (v) : "memory", "eax", "ecx", "edx"); } +#define arch_atomic64_dec arch_atomic64_dec /** * arch_atomic64_add_unless - add unless the number is a given value @@ -245,7 +245,6 @@ static inline int arch_atomic64_add_unless(atomic64_t *v, long long a, return (int)a; } -#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero static inline int arch_atomic64_inc_not_zero(atomic64_t *v) { int r; @@ -253,8 +252,8 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v) "S" (v) : "ecx", "edx", "memory"); return r; } +#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero -#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive static inline long long arch_atomic64_dec_if_positive(atomic64_t *v) { long long r; @@ -262,6 +261,7 @@ static inline long long arch_atomic64_dec_if_positive(atomic64_t *v) "S" (v) : "ecx", "memory"); return r; } +#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive #undef alternative_atomic64 #undef __alternative_atomic64 diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 4343d9b4f30e..5f851d92eecd 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -71,11 +71,11 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v) * true if the result is zero, or false for all * other cases. */ -#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e); } +#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test /** * arch_atomic64_inc - increment atomic64 variable @@ -83,13 +83,13 @@ static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v) * * Atomically increments @v by 1. */ -#define arch_atomic64_inc arch_atomic64_inc static __always_inline void arch_atomic64_inc(atomic64_t *v) { asm volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) : "m" (v->counter)); } +#define arch_atomic64_inc arch_atomic64_inc /** * arch_atomic64_dec - decrement atomic64 variable @@ -97,13 +97,13 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v) * * Atomically decrements @v by 1. */ -#define arch_atomic64_dec arch_atomic64_dec static __always_inline void arch_atomic64_dec(atomic64_t *v) { asm volatile(LOCK_PREFIX "decq %0" : "=m" (v->counter) : "m" (v->counter)); } +#define arch_atomic64_dec arch_atomic64_dec /** * arch_atomic64_dec_and_test - decrement and test @@ -113,11 +113,11 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v) * returns true if the result is 0, or false for all other * cases. */ -#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test static inline bool arch_atomic64_dec_and_test(atomic64_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e); } +#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test /** * arch_atomic64_inc_and_test - increment and test @@ -127,11 +127,11 @@ static inline bool arch_atomic64_dec_and_test(atomic64_t *v) * and returns true if the result is zero, or false for all * other cases. */ -#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test static inline bool arch_atomic64_inc_and_test(atomic64_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e); } +#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test /** * arch_atomic64_add_negative - add and test if negative @@ -142,11 +142,11 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -#define arch_atomic64_add_negative arch_atomic64_add_negative static inline bool arch_atomic64_add_negative(long i, atomic64_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s); } +#define arch_atomic64_add_negative arch_atomic64_add_negative /** * arch_atomic64_add_return - add and return diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index e203169931c7..6390bd8c141b 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -14,6 +14,16 @@ #ifndef _ASM_X86_FIXMAP_H #define _ASM_X86_FIXMAP_H +/* + * Exposed to assembly code for setting up initial page tables. Cannot be + * calculated in assembly code (fixmap entries are an enum), but is sanity + * checked in the actual fixmap C code to make sure that the fixmap is + * covered fully. + */ +#define FIXMAP_PMD_NUM 2 +/* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */ +#define FIXMAP_PMD_TOP 507 + #ifndef __ASSEMBLY__ #include <linux/kernel.h> #include <asm/acpi.h> diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index a38bf5a1e37a..69dcdf195b61 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -528,7 +528,7 @@ static inline void fpregs_activate(struct fpu *fpu) static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { - if (old_fpu->initialized) { + if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index e977b6b3a538..00e01d215f74 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -726,19 +726,21 @@ struct hv_enlightened_vmcs { #define HV_STIMER_AUTOENABLE (1ULL << 3) #define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F) -struct ipi_arg_non_ex { - u32 vector; - u32 reserved; - u64 cpu_mask; -}; - struct hv_vpset { u64 format; u64 valid_bank_mask; u64 bank_contents[]; }; -struct ipi_arg_ex { +/* HvCallSendSyntheticClusterIpi hypercall */ +struct hv_send_ipi { + u32 vector; + u32 reserved; + u64 cpu_mask; +}; + +/* HvCallSendSyntheticClusterIpiEx hypercall */ +struct hv_send_ipi_ex { u32 vector; u32 reserved; struct hv_vpset vp_set; diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 395c9631e000..75f1e35e7c15 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -22,10 +22,20 @@ enum die_val { DIE_NMIUNKNOWN, }; +enum show_regs_mode { + SHOW_REGS_SHORT, + /* + * For when userspace crashed, but we don't think it's our fault, and + * therefore don't print kernel registers. + */ + SHOW_REGS_USER, + SHOW_REGS_ALL +}; + extern void die(const char *, struct pt_regs *,long); extern int __must_check __die(const char *, struct pt_regs *, long); extern void show_stack_regs(struct pt_regs *regs); -extern void __show_regs(struct pt_regs *regs, int all); +extern void __show_regs(struct pt_regs *regs, enum show_regs_mode); extern void show_iret_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 00ddb0c9e612..09b2e3e2cf1b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -869,6 +869,8 @@ struct kvm_arch { bool x2apic_format; bool x2apic_broadcast_quirk_disabled; + + bool guest_can_read_msr_platform_info; }; struct kvm_vm_stat { @@ -1022,6 +1024,7 @@ struct kvm_x86_ops { void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); + bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); @@ -1055,6 +1058,7 @@ struct kvm_x86_ops { bool (*umip_emulated)(void); int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); + void (*request_immediate_exit)(struct kvm_vcpu *vcpu); void (*sched_in)(struct kvm_vcpu *kvm, int cpu); @@ -1237,19 +1241,12 @@ enum emulation_result { #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) -#define EMULTYPE_RETRY (1 << 3) -#define EMULTYPE_NO_REEXECUTE (1 << 4) -#define EMULTYPE_NO_UD_ON_FAIL (1 << 5) -#define EMULTYPE_VMWARE (1 << 6) -int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, - int emulation_type, void *insn, int insn_len); - -static inline int emulate_instruction(struct kvm_vcpu *vcpu, - int emulation_type) -{ - return x86_emulate_instruction(vcpu, 0, - emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0); -} +#define EMULTYPE_ALLOW_RETRY (1 << 3) +#define EMULTYPE_NO_UD_ON_FAIL (1 << 4) +#define EMULTYPE_VMWARE (1 << 5) +int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); +int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, + void *insn, int insn_len); void kvm_enable_efer_bits(u64); bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); @@ -1450,7 +1447,6 @@ asmlinkage void kvm_spurious_fault(void); ____kvm_handle_fault_on_reboot(insn, "") #define KVM_ARCH_WANT_MMU_NOTIFIER -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); @@ -1463,7 +1459,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, - unsigned long ipi_bitmap_high, int min, + unsigned long ipi_bitmap_high, u32 min, unsigned long icr, int op_64_bit); u64 kvm_get_arch_capabilities(void); @@ -1490,6 +1486,7 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu); int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); +void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu); int kvm_is_in_guest(void); diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index c0643831706e..616f8e637bc3 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -48,10 +48,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); /* Architecture __weak replacement functions */ void __init mem_encrypt_init(void); +void __init mem_encrypt_free_decrypted_mem(void); bool sme_active(void); bool sev_active(void); +#define __bss_decrypted __attribute__((__section__(".bss..decrypted"))) + #else /* !CONFIG_AMD_MEM_ENCRYPT */ #define sme_me_mask 0ULL @@ -77,6 +80,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; static inline int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } +#define __bss_decrypted + #endif /* CONFIG_AMD_MEM_ENCRYPT */ /* @@ -88,6 +93,8 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; #define __sme_pa(x) (__pa(x) | sme_me_mask) #define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask) +extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[]; + #endif /* __ASSEMBLY__ */ #endif /* __X86_MEM_ENCRYPT_H__ */ diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index e9202a0de8f0..1a19d11cfbbd 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -185,22 +185,22 @@ do { \ typeof(var) pfo_ret__; \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_arg(1)",%0" \ + asm volatile(op "b "__percpu_arg(1)",%0"\ : "=q" (pfo_ret__) \ : "m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_arg(1)",%0" \ + asm volatile(op "w "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_arg(1)",%0" \ + asm volatile(op "l "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 8: \ - asm(op "q "__percpu_arg(1)",%0" \ + asm volatile(op "q "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 12f54082f4c8..78241b736f2a 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -46,6 +46,14 @@ #define INTEL_ARCH_EVENT_MASK \ (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT) +#define AMD64_L3_SLICE_SHIFT 48 +#define AMD64_L3_SLICE_MASK \ + ((0xFULL) << AMD64_L3_SLICE_SHIFT) + +#define AMD64_L3_THREAD_SHIFT 56 +#define AMD64_L3_THREAD_MASK \ + ((0xFFULL) << AMD64_L3_THREAD_SHIFT) + #define X86_RAW_EVENT_MASK \ (ARCH_PERFMON_EVENTSEL_EVENT | \ ARCH_PERFMON_EVENTSEL_UMASK | \ diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 24c6cf5f16b7..60d0f9015317 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -19,9 +19,6 @@ static inline void native_set_pte(pte_t *ptep , pte_t pte) static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { -#ifdef CONFIG_PAGE_TABLE_ISOLATION - pmd.pud.p4d.pgd = pti_set_user_pgtbl(&pmdp->pud.p4d.pgd, pmd.pud.p4d.pgd); -#endif *pmdp = pmd; } @@ -61,9 +58,6 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) #ifdef CONFIG_SMP static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) { -#ifdef CONFIG_PAGE_TABLE_ISOLATION - pti_set_user_pgtbl(&xp->pud.p4d.pgd, __pgd(0)); -#endif return __pmd(xchg((pmdval_t *)xp, 0)); } #else @@ -73,9 +67,6 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) #ifdef CONFIG_SMP static inline pud_t native_pudp_get_and_clear(pud_t *xp) { -#ifdef CONFIG_PAGE_TABLE_ISOLATION - pti_set_user_pgtbl(&xp->p4d.pgd, __pgd(0)); -#endif return __pud(xchg((pudval_t *)xp, 0)); } #else diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index e4ffa565a69f..690c0307afed 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1195,7 +1195,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, return xchg(pmdp, pmd); } else { pmd_t old = *pmdp; - *pmdp = pmd; + WRITE_ONCE(*pmdp, pmd); return old; } } diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index f773d5e6c8cc..9c85b54bf03c 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -14,6 +14,7 @@ #include <asm/processor.h> #include <linux/bitops.h> #include <linux/threads.h> +#include <asm/fixmap.h> extern p4d_t level4_kernel_pgt[512]; extern p4d_t level4_ident_pgt[512]; @@ -22,7 +23,7 @@ extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pmd_t level2_fixmap_pgt[512]; extern pmd_t level2_ident_pgt[512]; -extern pte_t level1_fixmap_pgt[512]; +extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM]; extern pgd_t init_top_pgt[]; #define swapper_pg_dir init_top_pgt @@ -55,15 +56,15 @@ struct mm_struct; void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte); void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); -static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) +static inline void native_set_pte(pte_t *ptep, pte_t pte) { - *ptep = native_make_pte(0); + WRITE_ONCE(*ptep, pte); } -static inline void native_set_pte(pte_t *ptep, pte_t pte) +static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - *ptep = pte; + native_set_pte(ptep, native_make_pte(0)); } static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) @@ -73,7 +74,7 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { - *pmdp = pmd; + WRITE_ONCE(*pmdp, pmd); } static inline void native_pmd_clear(pmd_t *pmd) @@ -109,7 +110,7 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) static inline void native_set_pud(pud_t *pudp, pud_t pud) { - *pudp = pud; + WRITE_ONCE(*pudp, pud); } static inline void native_pud_clear(pud_t *pud) @@ -137,13 +138,13 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) pgd_t pgd; if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) { - *p4dp = p4d; + WRITE_ONCE(*p4dp, p4d); return; } pgd = native_make_pgd(native_p4d_val(p4d)); pgd = pti_set_user_pgtbl((pgd_t *)p4dp, pgd); - *p4dp = native_make_p4d(native_pgd_val(pgd)); + WRITE_ONCE(*p4dp, native_make_p4d(native_pgd_val(pgd))); } static inline void native_p4d_clear(p4d_t *p4d) @@ -153,7 +154,7 @@ static inline void native_p4d_clear(p4d_t *p4d) static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { - *pgdp = pti_set_user_pgtbl(pgdp, pgd); + WRITE_ONCE(*pgdp, pti_set_user_pgtbl(pgdp, pgd)); } static inline void native_pgd_clear(pgd_t *pgd) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b64acb08a62b..106b7d0e2dae 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -124,7 +124,7 @@ */ #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) /* diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index a80c0673798f..e60c45fd3679 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -10,8 +10,13 @@ struct cpumask; struct mm_struct; #ifdef CONFIG_X86_UV +#include <linux/efi.h> extern enum uv_system_type get_uv_system_type(void); +static inline bool is_early_uv_system(void) +{ + return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab); +} extern int is_uv_system(void); extern int is_uv_hubless(void); extern void uv_cpu_init(void); @@ -23,6 +28,7 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, #else /* X86_UV */ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } +static inline bool is_early_uv_system(void) { return 0; } static inline int is_uv_system(void) { return 0; } static inline int is_uv_hubless(void) { return 0; } static inline void uv_cpu_init(void) { } diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 86299efa804a..fd23d5778ea1 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -377,6 +377,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) +#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 9f148e3d45b4..7654febd5102 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -413,7 +413,7 @@ static int activate_managed(struct irq_data *irqd) if (WARN_ON_ONCE(cpumask_empty(vector_searchmask))) { /* Something in the core code broke! Survive gracefully */ pr_err("Managed startup for irq %u, but no CPU\n", irqd->irq); - return EINVAL; + return -EINVAL; } ret = assign_managed_vector(irqd, vector_searchmask); diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index ec00d1ff5098..f7151cd03cb0 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1640,6 +1640,7 @@ static int do_open(struct inode *inode, struct file *filp) return 0; } +#ifdef CONFIG_PROC_FS static int proc_apm_show(struct seq_file *m, void *v) { unsigned short bx; @@ -1719,6 +1720,7 @@ static int proc_apm_show(struct seq_file *m, void *v) units); return 0; } +#endif static int apm(void *unused) { diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 22ab408177b2..eeea634bee0a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -922,7 +922,7 @@ static void init_amd(struct cpuinfo_x86 *c) static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* AMD errata T13 (order #21922) */ - if ((c->x86 == 6)) { + if (c->x86 == 6) { /* Duron Rev A0 */ if (c->x86_model == 3 && c->x86_stepping == 0) size = 64; diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index 4e588f36228f..3736f6dc9545 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -382,6 +382,11 @@ static inline bool is_mbm_event(int e) e <= QOS_L3_MBM_LOCAL_EVENT_ID); } +struct rdt_parse_data { + struct rdtgroup *rdtgrp; + char *buf; +}; + /** * struct rdt_resource - attributes of an RDT resource * @rid: The index of the resource @@ -423,16 +428,19 @@ struct rdt_resource { struct rdt_cache cache; struct rdt_membw membw; const char *format_str; - int (*parse_ctrlval) (void *data, struct rdt_resource *r, - struct rdt_domain *d); + int (*parse_ctrlval)(struct rdt_parse_data *data, + struct rdt_resource *r, + struct rdt_domain *d); struct list_head evt_list; int num_rmid; unsigned int mon_scale; unsigned long fflags; }; -int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d); -int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d); +int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, + struct rdt_domain *d); +int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r, + struct rdt_domain *d); extern struct mutex rdtgroup_mutex; @@ -521,14 +529,14 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, int rdtgroup_schemata_show(struct kernfs_open_file *of, struct seq_file *s, void *v); bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, - u32 _cbm, int closid, bool exclusive); + unsigned long cbm, int closid, bool exclusive); unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d, - u32 cbm); + unsigned long cbm); enum rdtgrp_mode rdtgroup_mode_by_closid(int closid); int rdtgroup_tasks_assigned(struct rdtgroup *r); int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp); int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp); -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm); +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm); bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d); int rdt_pseudo_lock_init(void); void rdt_pseudo_lock_release(void); @@ -536,6 +544,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp); void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp); struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r); int update_domains(struct rdt_resource *r, int closid); +int closids_supported(void); void closid_free(int closid); int alloc_rmid(void); void free_rmid(u32 rmid); diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c index af358ca05160..0f53049719cd 100644 --- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c +++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c @@ -64,19 +64,19 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) return true; } -int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d) +int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r, + struct rdt_domain *d) { - unsigned long data; - char *buf = _buf; + unsigned long bw_val; if (d->have_new_ctrl) { rdt_last_cmd_printf("duplicate domain %d\n", d->id); return -EINVAL; } - if (!bw_validate(buf, &data, r)) + if (!bw_validate(data->buf, &bw_val, r)) return -EINVAL; - d->new_ctrl = data; + d->new_ctrl = bw_val; d->have_new_ctrl = true; return 0; @@ -123,18 +123,13 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r) return true; } -struct rdt_cbm_parse_data { - struct rdtgroup *rdtgrp; - char *buf; -}; - /* * Read one cache bit mask (hex). Check that it is valid for the current * resource type. */ -int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d) +int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r, + struct rdt_domain *d) { - struct rdt_cbm_parse_data *data = _data; struct rdtgroup *rdtgrp = data->rdtgrp; u32 cbm_val; @@ -195,11 +190,17 @@ int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d) static int parse_line(char *line, struct rdt_resource *r, struct rdtgroup *rdtgrp) { - struct rdt_cbm_parse_data data; + struct rdt_parse_data data; char *dom = NULL, *id; struct rdt_domain *d; unsigned long dom_id; + if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP && + r->rid == RDT_RESOURCE_MBA) { + rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n"); + return -EINVAL; + } + next: if (!line || line[0] == '\0') return 0; diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c index 40f3903ae5d9..f8c260d522ca 100644 --- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c +++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c @@ -797,25 +797,27 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp) /** * rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked * @d: RDT domain - * @_cbm: CBM to test + * @cbm: CBM to test * - * @d represents a cache instance and @_cbm a capacity bitmask that is - * considered for it. Determine if @_cbm overlaps with any existing + * @d represents a cache instance and @cbm a capacity bitmask that is + * considered for it. Determine if @cbm overlaps with any existing * pseudo-locked region on @d. * - * Return: true if @_cbm overlaps with pseudo-locked region on @d, false + * @cbm is unsigned long, even if only 32 bits are used, to make the + * bitmap functions work correctly. + * + * Return: true if @cbm overlaps with pseudo-locked region on @d, false * otherwise. */ -bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, u32 _cbm) +bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm) { - unsigned long *cbm = (unsigned long *)&_cbm; - unsigned long *cbm_b; unsigned int cbm_len; + unsigned long cbm_b; if (d->plr) { cbm_len = d->plr->r->cache.cbm_len; - cbm_b = (unsigned long *)&d->plr->cbm; - if (bitmap_intersects(cbm, cbm_b, cbm_len)) + cbm_b = d->plr->cbm; + if (bitmap_intersects(&cbm, &cbm_b, cbm_len)) return true; } return false; diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index b799c00bef09..b140c68bc14b 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -97,6 +97,12 @@ void rdt_last_cmd_printf(const char *fmt, ...) * limited as the number of resources grows. */ static int closid_free_map; +static int closid_free_map_len; + +int closids_supported(void) +{ + return closid_free_map_len; +} static void closid_init(void) { @@ -111,6 +117,7 @@ static void closid_init(void) /* CLOSID 0 is always reserved for the default group */ closid_free_map &= ~1; + closid_free_map_len = rdt_min_closid; } static int closid_alloc(void) @@ -802,7 +809,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, sw_shareable = 0; exclusive = 0; seq_printf(seq, "%d=", dom->id); - for (i = 0; i < r->num_closid; i++, ctrl++) { + for (i = 0; i < closids_supported(); i++, ctrl++) { if (!closid_allocated(i)) continue; mode = rdtgroup_mode_by_closid(i); @@ -968,33 +975,34 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of, * is false then overlaps with any resource group or hardware entities * will be considered. * + * @cbm is unsigned long, even if only 32 bits are used, to make the + * bitmap functions work correctly. + * * Return: false if CBM does not overlap, true if it does. */ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d, - u32 _cbm, int closid, bool exclusive) + unsigned long cbm, int closid, bool exclusive) { - unsigned long *cbm = (unsigned long *)&_cbm; - unsigned long *ctrl_b; enum rdtgrp_mode mode; + unsigned long ctrl_b; u32 *ctrl; int i; /* Check for any overlap with regions used by hardware directly */ if (!exclusive) { - if (bitmap_intersects(cbm, - (unsigned long *)&r->cache.shareable_bits, - r->cache.cbm_len)) + ctrl_b = r->cache.shareable_bits; + if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) return true; } /* Check for overlap with other resource groups */ ctrl = d->ctrl_val; - for (i = 0; i < r->num_closid; i++, ctrl++) { - ctrl_b = (unsigned long *)ctrl; + for (i = 0; i < closids_supported(); i++, ctrl++) { + ctrl_b = *ctrl; mode = rdtgroup_mode_by_closid(i); if (closid_allocated(i) && i != closid && mode != RDT_MODE_PSEUDO_LOCKSETUP) { - if (bitmap_intersects(cbm, ctrl_b, r->cache.cbm_len)) { + if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) { if (exclusive) { if (mode == RDT_MODE_EXCLUSIVE) return true; @@ -1024,16 +1032,27 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp) { int closid = rdtgrp->closid; struct rdt_resource *r; + bool has_cache = false; struct rdt_domain *d; for_each_alloc_enabled_rdt_resource(r) { + if (r->rid == RDT_RESOURCE_MBA) + continue; + has_cache = true; list_for_each_entry(d, &r->domains, list) { if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid], - rdtgrp->closid, false)) + rdtgrp->closid, false)) { + rdt_last_cmd_puts("schemata overlaps\n"); return false; + } } } + if (!has_cache) { + rdt_last_cmd_puts("cannot be exclusive without CAT/CDP\n"); + return false; + } + return true; } @@ -1085,7 +1104,6 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of, rdtgrp->mode = RDT_MODE_SHAREABLE; } else if (!strcmp(buf, "exclusive")) { if (!rdtgroup_mode_test_exclusive(rdtgrp)) { - rdt_last_cmd_printf("schemata overlaps\n"); ret = -EINVAL; goto out; } @@ -1121,15 +1139,18 @@ out: * computed by first dividing the total cache size by the CBM length to * determine how many bytes each bit in the bitmask represents. The result * is multiplied with the number of bits set in the bitmask. + * + * @cbm is unsigned long, even if only 32 bits are used to make the + * bitmap functions work correctly. */ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, - struct rdt_domain *d, u32 cbm) + struct rdt_domain *d, unsigned long cbm) { struct cpu_cacheinfo *ci; unsigned int size = 0; int num_b, i; - num_b = bitmap_weight((unsigned long *)&cbm, r->cache.cbm_len); + num_b = bitmap_weight(&cbm, r->cache.cbm_len); ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask)); for (i = 0; i < ci->num_leaves; i++) { if (ci->info_list[i].level == r->cache_level) { @@ -1155,8 +1176,8 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, struct rdt_resource *r; struct rdt_domain *d; unsigned int size; - bool sep = false; - u32 cbm; + bool sep; + u32 ctrl; rdtgrp = rdtgroup_kn_lock_live(of->kn); if (!rdtgrp) { @@ -1174,6 +1195,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, } for_each_alloc_enabled_rdt_resource(r) { + sep = false; seq_printf(s, "%*s:", max_name_width, r->name); list_for_each_entry(d, &r->domains, list) { if (sep) @@ -1181,8 +1203,13 @@ static int rdtgroup_size_show(struct kernfs_open_file *of, if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) { size = 0; } else { - cbm = d->ctrl_val[rdtgrp->closid]; - size = rdtgroup_cbm_to_size(r, d, cbm); + ctrl = (!is_mba_sc(r) ? + d->ctrl_val[rdtgrp->closid] : + d->mbps_val[rdtgrp->closid]); + if (r->rid == RDT_RESOURCE_MBA) + size = ctrl; + else + size = rdtgroup_cbm_to_size(r, d, ctrl); } seq_printf(s, "%d=%u", d->id, size); sep = true; @@ -2330,18 +2357,25 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) u32 used_b = 0, unused_b = 0; u32 closid = rdtgrp->closid; struct rdt_resource *r; + unsigned long tmp_cbm; enum rdtgrp_mode mode; struct rdt_domain *d; int i, ret; u32 *ctrl; for_each_alloc_enabled_rdt_resource(r) { + /* + * Only initialize default allocations for CBM cache + * resources + */ + if (r->rid == RDT_RESOURCE_MBA) + continue; list_for_each_entry(d, &r->domains, list) { d->have_new_ctrl = false; d->new_ctrl = r->cache.shareable_bits; used_b = r->cache.shareable_bits; ctrl = d->ctrl_val; - for (i = 0; i < r->num_closid; i++, ctrl++) { + for (i = 0; i < closids_supported(); i++, ctrl++) { if (closid_allocated(i) && i != closid) { mode = rdtgroup_mode_by_closid(i); if (mode == RDT_MODE_PSEUDO_LOCKSETUP) @@ -2361,9 +2395,14 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) * modify the CBM based on system availability. */ cbm_ensure_valid(&d->new_ctrl, r); - if (bitmap_weight((unsigned long *) &d->new_ctrl, - r->cache.cbm_len) < - r->cache.min_cbm_bits) { + /* + * Assign the u32 CBM to an unsigned long to ensure + * that bitmap_weight() does not access out-of-bound + * memory. + */ + tmp_cbm = d->new_ctrl; + if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < + r->cache.min_cbm_bits) { rdt_last_cmd_printf("no space on %s:%d\n", r->name, d->id); return -ENOSPC; @@ -2373,6 +2412,12 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp) } for_each_alloc_enabled_rdt_resource(r) { + /* + * Only initialize default allocations for CBM cache + * resources + */ + if (r->rid == RDT_RESOURCE_MBA) + continue; ret = update_domains(r, rdtgrp->closid); if (ret < 0) { rdt_last_cmd_puts("failed to initialize allocations\n"); diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 0624957aa068..07b5fc00b188 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -504,6 +504,7 @@ static enum ucode_state apply_microcode_amd(int cpu) struct microcode_amd *mc_amd; struct ucode_cpu_info *uci; struct ucode_patch *p; + enum ucode_state ret; u32 rev, dummy; BUG_ON(raw_smp_processor_id() != cpu); @@ -521,9 +522,8 @@ static enum ucode_state apply_microcode_amd(int cpu) /* need to apply patch? */ if (rev >= mc_amd->hdr.patch_id) { - c->microcode = rev; - uci->cpu_sig.rev = rev; - return UCODE_OK; + ret = UCODE_OK; + goto out; } if (__apply_microcode_amd(mc_amd)) { @@ -531,13 +531,21 @@ static enum ucode_state apply_microcode_amd(int cpu) cpu, mc_amd->hdr.patch_id); return UCODE_ERROR; } - pr_info("CPU%d: new patch_level=0x%08x\n", cpu, - mc_amd->hdr.patch_id); - uci->cpu_sig.rev = mc_amd->hdr.patch_id; - c->microcode = mc_amd->hdr.patch_id; + rev = mc_amd->hdr.patch_id; + ret = UCODE_UPDATED; + + pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev); - return UCODE_UPDATED; +out: + uci->cpu_sig.rev = rev; + c->microcode = rev; + + /* Update boot_cpu_data's revision too, if we're on the BSP: */ + if (c->cpu_index == boot_cpu_data.cpu_index) + boot_cpu_data.microcode = rev; + + return ret; } static int install_equiv_cpu_table(const u8 *buf) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 97ccf4c3b45b..16936a24795c 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -795,6 +795,7 @@ static enum ucode_state apply_microcode_intel(int cpu) struct ucode_cpu_info *uci = ucode_cpu_info + cpu; struct cpuinfo_x86 *c = &cpu_data(cpu); struct microcode_intel *mc; + enum ucode_state ret; static int prev_rev; u32 rev; @@ -817,9 +818,8 @@ static enum ucode_state apply_microcode_intel(int cpu) */ rev = intel_get_microcode_revision(); if (rev >= mc->hdr.rev) { - uci->cpu_sig.rev = rev; - c->microcode = rev; - return UCODE_OK; + ret = UCODE_OK; + goto out; } /* @@ -848,10 +848,17 @@ static enum ucode_state apply_microcode_intel(int cpu) prev_rev = rev; } + ret = UCODE_UPDATED; + +out: uci->cpu_sig.rev = rev; - c->microcode = rev; + c->microcode = rev; + + /* Update boot_cpu_data's revision too, if we're on the BSP: */ + if (c->cpu_index == boot_cpu_data.cpu_index) + boot_cpu_data.microcode = rev; - return UCODE_UPDATED; + return ret; } static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index f56895106ccf..2b5886401e5f 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -146,7 +146,7 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs, * they can be printed in the right context. */ if (!partial && on_stack(info, regs, sizeof(*regs))) { - __show_regs(regs, 0); + __show_regs(regs, SHOW_REGS_SHORT); } else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET, IRET_FRAME_SIZE)) { @@ -344,7 +344,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr) oops_exit(); /* Executive summary in case the oops scrolled away */ - __show_regs(&exec_summary_regs, true); + __show_regs(&exec_summary_regs, SHOW_REGS_ALL); if (!signr) return; @@ -407,14 +407,9 @@ void die(const char *str, struct pt_regs *regs, long err) void show_regs(struct pt_regs *regs) { - bool all = true; - show_regs_print_info(KERN_DEFAULT); - if (IS_ENABLED(CONFIG_X86_32)) - all = !user_mode(regs); - - __show_regs(regs, all); + __show_regs(regs, user_mode(regs) ? SHOW_REGS_USER : SHOW_REGS_ALL); /* * When in-kernel, we also print out the stack at the time of the fault.. diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c index f260e452e4f8..e8c8c5d78dbd 100644 --- a/arch/x86/kernel/eisa.c +++ b/arch/x86/kernel/eisa.c @@ -7,11 +7,17 @@ #include <linux/eisa.h> #include <linux/io.h> +#include <xen/xen.h> + static __init int eisa_bus_probe(void) { - void __iomem *p = ioremap(0x0FFFD9, 4); + void __iomem *p; + + if (xen_pv_domain() && !xen_initial_domain()) + return 0; - if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24)) + p = ioremap(0x0FFFD9, 4); + if (p && readl(p) == 'E' + ('I' << 8) + ('S' << 16) + ('A' << 24)) EISA_bus = 1; iounmap(p); return 0; diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 23f1691670b6..61a949d84dfa 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -314,7 +314,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) * thread's fpu state, reconstruct fxstate from the fsave * header. Validate and sanitize the copied state. */ - struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; int err = 0; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 8047379e575a..ddee1f0870c4 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -35,6 +35,7 @@ #include <asm/bootparam_utils.h> #include <asm/microcode.h> #include <asm/kasan.h> +#include <asm/fixmap.h> /* * Manage page tables very early on. @@ -112,6 +113,7 @@ static bool __head check_la57_support(unsigned long physaddr) unsigned long __head __startup_64(unsigned long physaddr, struct boot_params *bp) { + unsigned long vaddr, vaddr_end; unsigned long load_delta, *p; unsigned long pgtable_flags; pgdval_t *pgd; @@ -165,7 +167,8 @@ unsigned long __head __startup_64(unsigned long physaddr, pud[511] += load_delta; pmd = fixup_pointer(level2_fixmap_pgt, physaddr); - pmd[506] += load_delta; + for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--) + pmd[i] += load_delta; /* * Set up the identity mapping for the switchover. These @@ -235,6 +238,21 @@ unsigned long __head __startup_64(unsigned long physaddr, sme_encrypt_kernel(bp); /* + * Clear the memory encryption mask from the .bss..decrypted section. + * The bss section will be memset to zero later in the initialization so + * there is no need to zero it after changing the memory encryption + * attribute. + */ + if (mem_encrypt_active()) { + vaddr = (unsigned long)__start_bss_decrypted; + vaddr_end = (unsigned long)__end_bss_decrypted; + for (; vaddr < vaddr_end; vaddr += PMD_SIZE) { + i = pmd_index(vaddr); + pmd[i] -= sme_get_me_mask(); + } + } + + /* * Return the SME encryption mask (if SME is active) to be used as a * modifier for the initial pgdir entry programmed into CR3. */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 15ebc2fc166e..a3618cf04cf6 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -24,6 +24,7 @@ #include "../entry/calling.h" #include <asm/export.h> #include <asm/nospec-branch.h> +#include <asm/fixmap.h> #ifdef CONFIG_PARAVIRT #include <asm/asm-offsets.h> @@ -445,13 +446,20 @@ NEXT_PAGE(level2_kernel_pgt) KERNEL_IMAGE_SIZE/PMD_SIZE) NEXT_PAGE(level2_fixmap_pgt) - .fill 506,8,0 - .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC - /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ - .fill 5,8,0 + .fill (512 - 4 - FIXMAP_PMD_NUM),8,0 + pgtno = 0 + .rept (FIXMAP_PMD_NUM) + .quad level1_fixmap_pgt + (pgtno << PAGE_SHIFT) - __START_KERNEL_map \ + + _PAGE_TABLE_NOENC; + pgtno = pgtno + 1 + .endr + /* 6 MB reserved space + a 2MB hole */ + .fill 4,8,0 NEXT_PAGE(level1_fixmap_pgt) + .rept (FIXMAP_PMD_NUM) .fill 512,8,0 + .endr #undef PMDS diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 1e6764648af3..013fe3d21dbb 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -28,6 +28,7 @@ #include <linux/sched/clock.h> #include <linux/mm.h> #include <linux/slab.h> +#include <linux/set_memory.h> #include <asm/hypervisor.h> #include <asm/mem_encrypt.h> @@ -61,9 +62,10 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall); (PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info)) static struct pvclock_vsyscall_time_info - hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __aligned(PAGE_SIZE); -static struct pvclock_wall_clock wall_clock; + hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE); +static struct pvclock_wall_clock wall_clock __bss_decrypted; static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); +static struct pvclock_vsyscall_time_info *hvclock_mem; static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) { @@ -236,6 +238,45 @@ static void kvm_shutdown(void) native_machine_shutdown(); } +static void __init kvmclock_init_mem(void) +{ + unsigned long ncpus; + unsigned int order; + struct page *p; + int r; + + if (HVC_BOOT_ARRAY_SIZE >= num_possible_cpus()) + return; + + ncpus = num_possible_cpus() - HVC_BOOT_ARRAY_SIZE; + order = get_order(ncpus * sizeof(*hvclock_mem)); + + p = alloc_pages(GFP_KERNEL, order); + if (!p) { + pr_warn("%s: failed to alloc %d pages", __func__, (1U << order)); + return; + } + + hvclock_mem = page_address(p); + + /* + * hvclock is shared between the guest and the hypervisor, must + * be mapped decrypted. + */ + if (sev_active()) { + r = set_memory_decrypted((unsigned long) hvclock_mem, + 1UL << order); + if (r) { + __free_pages(p, order); + hvclock_mem = NULL; + pr_warn("kvmclock: set_memory_decrypted() failed. Disabling\n"); + return; + } + } + + memset(hvclock_mem, 0, PAGE_SIZE << order); +} + static int __init kvm_setup_vsyscall_timeinfo(void) { #ifdef CONFIG_X86_64 @@ -250,6 +291,9 @@ static int __init kvm_setup_vsyscall_timeinfo(void) kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; #endif + + kvmclock_init_mem(); + return 0; } early_initcall(kvm_setup_vsyscall_timeinfo); @@ -269,8 +313,10 @@ static int kvmclock_setup_percpu(unsigned int cpu) /* Use the static page for the first CPUs, allocate otherwise */ if (cpu < HVC_BOOT_ARRAY_SIZE) p = &hv_clock_boot[cpu]; + else if (hvclock_mem) + p = hvclock_mem + cpu - HVC_BOOT_ARRAY_SIZE; else - p = kzalloc(sizeof(*p), GFP_KERNEL); + return -ENOMEM; per_cpu(hv_clock_per_cpu, cpu) = p; return p ? 0 : -ENOMEM; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index afdb303285f8..8dc69d82567e 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -91,7 +91,7 @@ unsigned paravirt_patch_call(void *insnbuf, if (len < 5) { #ifdef CONFIG_RETPOLINE - WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr); + WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr); #endif return len; /* call too long for patch site */ } @@ -111,7 +111,7 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target, if (len < 5) { #ifdef CONFIG_RETPOLINE - WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr); + WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr); #endif return len; /* call too long for patch site */ } diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 661583662430..71c0b01d93b1 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -42,10 +42,8 @@ IOMMU_INIT_FINISH(pci_swiotlb_detect_override, int __init pci_swiotlb_detect_4gb(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ -#ifdef CONFIG_X86_64 if (!no_iommu && max_possible_pfn > MAX_DMA32_PFN) swiotlb = 1; -#endif /* * If SME is active then swiotlb will be set to 1 so that bounce diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 2924fd447e61..5046a3c9dec2 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -59,7 +59,7 @@ #include <asm/intel_rdt_sched.h> #include <asm/proto.h> -void __show_regs(struct pt_regs *regs, int all) +void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; unsigned long d0, d1, d2, d3, d6, d7; @@ -85,7 +85,7 @@ void __show_regs(struct pt_regs *regs, int all) printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n", (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags); - if (!all) + if (mode != SHOW_REGS_ALL) return; cr0 = read_cr0(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index a451bc374b9b..ea5ea850348d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -62,7 +62,7 @@ __visible DEFINE_PER_CPU(unsigned long, rsp_scratch); /* Prints also some state that isn't saved in the pt_regs */ -void __show_regs(struct pt_regs *regs, int all) +void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; unsigned long d0, d1, d2, d3, d6, d7; @@ -87,9 +87,17 @@ void __show_regs(struct pt_regs *regs, int all) printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", regs->r13, regs->r14, regs->r15); - if (!all) + if (mode == SHOW_REGS_SHORT) return; + if (mode == SHOW_REGS_USER) { + rdmsrl(MSR_FS_BASE, fs); + rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); + printk(KERN_DEFAULT "FS: %016lx GS: %016lx\n", + fs, shadowgs); + return; + } + asm("movl %%ds,%0" : "=r" (ds)); asm("movl %%cs,%0" : "=r" (cs)); asm("movl %%es,%0" : "=r" (es)); diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index be01328eb755..fddaefc51fb6 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -25,7 +25,7 @@ #include <asm/time.h> #ifdef CONFIG_X86_64 -__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES; +__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES; #endif unsigned long profile_pc(struct pt_regs *regs) diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index 12cbe2b88c0f..738bf42b0218 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -111,8 +111,10 @@ int arch_register_cpu(int num) /* * Currently CPU0 is only hotpluggable on Intel platforms. Other * vendors can add hotplug support later. + * Xen PV guests don't support CPU0 hotplug at all. */ - if (c->x86_vendor != X86_VENDOR_INTEL) + if (c->x86_vendor != X86_VENDOR_INTEL || + boot_cpu_has(X86_FEATURE_XENPV)) cpu0_hotpluggable = 0; /* diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 1463468ba9a0..6d5dc5dabfd7 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -26,6 +26,7 @@ #include <asm/apic.h> #include <asm/intel-family.h> #include <asm/i8259.h> +#include <asm/uv/uv.h> unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -57,7 +58,7 @@ struct cyc2ns { static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); -void cyc2ns_read_begin(struct cyc2ns_data *data) +void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data) { int seq, idx; @@ -74,7 +75,7 @@ void cyc2ns_read_begin(struct cyc2ns_data *data) } while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence))); } -void cyc2ns_read_end(void) +void __always_inline cyc2ns_read_end(void) { preempt_enable_notrace(); } @@ -103,7 +104,7 @@ void cyc2ns_read_end(void) * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -static inline unsigned long long cycles_2_ns(unsigned long long cyc) +static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc) { struct cyc2ns_data data; unsigned long long ns; @@ -1415,7 +1416,7 @@ static bool __init determine_cpu_tsc_frequencies(bool early) static unsigned long __init get_loops_per_jiffy(void) { - unsigned long lpj = tsc_khz * KHZ; + u64 lpj = (u64)tsc_khz * KHZ; do_div(lpj, HZ); return lpj; @@ -1433,6 +1434,9 @@ void __init tsc_early_init(void) { if (!boot_cpu_has(X86_FEATURE_TSC)) return; + /* Don't change UV TSC multi-chassis synchronization */ + if (is_early_uv_system()) + return; if (!determine_cpu_tsc_frequencies(true)) return; loops_per_jiffy = get_loops_per_jiffy(); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 8bde0a419f86..5dd3317d761f 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -65,6 +65,23 @@ jiffies_64 = jiffies; #define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE); #define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE); +/* + * This section contains data which will be mapped as decrypted. Memory + * encryption operates on a page basis. Make this section PMD-aligned + * to avoid splitting the pages while mapping the section early. + * + * Note: We use a separate section so that only this section gets + * decrypted to avoid exposing more than we wish. + */ +#define BSS_DECRYPTED \ + . = ALIGN(PMD_SIZE); \ + __start_bss_decrypted = .; \ + *(.bss..decrypted); \ + . = ALIGN(PAGE_SIZE); \ + __start_bss_decrypted_unused = .; \ + . = ALIGN(PMD_SIZE); \ + __end_bss_decrypted = .; \ + #else #define X86_ALIGN_RODATA_BEGIN @@ -74,6 +91,7 @@ jiffies_64 = jiffies; #define ALIGN_ENTRY_TEXT_BEGIN #define ALIGN_ENTRY_TEXT_END +#define BSS_DECRYPTED #endif @@ -355,6 +373,7 @@ SECTIONS __bss_start = .; *(.bss..page_aligned) *(.bss) + BSS_DECRYPTED . = ALIGN(PAGE_SIZE); __bss_stop = .; } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0cefba28c864..fbb0e6df121b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -548,7 +548,7 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, } int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, - unsigned long ipi_bitmap_high, int min, + unsigned long ipi_bitmap_high, u32 min, unsigned long icr, int op_64_bit) { int i; @@ -571,18 +571,31 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, rcu_read_lock(); map = rcu_dereference(kvm->arch.apic_map); + if (min > map->max_apic_id) + goto out; /* Bits above cluster_size are masked in the caller. */ - for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) { - vcpu = map->phys_map[min + i]->vcpu; - count += kvm_apic_set_irq(vcpu, &irq, NULL); + for_each_set_bit(i, &ipi_bitmap_low, + min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { + if (map->phys_map[min + i]) { + vcpu = map->phys_map[min + i]->vcpu; + count += kvm_apic_set_irq(vcpu, &irq, NULL); + } } min += cluster_size; - for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) { - vcpu = map->phys_map[min + i]->vcpu; - count += kvm_apic_set_irq(vcpu, &irq, NULL); + + if (min > map->max_apic_id) + goto out; + + for_each_set_bit(i, &ipi_bitmap_high, + min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) { + if (map->phys_map[min + i]) { + vcpu = map->phys_map[min + i]->vcpu; + count += kvm_apic_set_irq(vcpu, &irq, NULL); + } } +out: rcu_read_unlock(); return count; } @@ -1331,9 +1344,8 @@ EXPORT_SYMBOL_GPL(kvm_lapic_reg_read); static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) { - return kvm_apic_hw_enabled(apic) && - addr >= apic->base_address && - addr < apic->base_address + LAPIC_MMIO_LENGTH; + return addr >= apic->base_address && + addr < apic->base_address + LAPIC_MMIO_LENGTH; } static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, @@ -1345,6 +1357,15 @@ static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, if (!apic_mmio_in_range(apic, address)) return -EOPNOTSUPP; + if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) { + if (!kvm_check_has_quirk(vcpu->kvm, + KVM_X86_QUIRK_LAPIC_MMIO_HOLE)) + return -EOPNOTSUPP; + + memset(data, 0xff, len); + return 0; + } + kvm_lapic_reg_read(apic, offset, len, data); return 0; @@ -1904,6 +1925,14 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, if (!apic_mmio_in_range(apic, address)) return -EOPNOTSUPP; + if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) { + if (!kvm_check_has_quirk(vcpu->kvm, + KVM_X86_QUIRK_LAPIC_MMIO_HOLE)) + return -EOPNOTSUPP; + + return 0; + } + /* * APIC register must be aligned on 128-bits boundary. * 32/64/128 bits registers must be accessed thru 32 bits. diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a282321329b5..51b953ad9d4e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -249,6 +249,17 @@ static u64 __read_mostly shadow_nonpresent_or_rsvd_mask; */ static const u64 shadow_nonpresent_or_rsvd_mask_len = 5; +/* + * In some cases, we need to preserve the GFN of a non-present or reserved + * SPTE when we usurp the upper five bits of the physical address space to + * defend against L1TF, e.g. for MMIO SPTEs. To preserve the GFN, we'll + * shift bits of the GFN that overlap with shadow_nonpresent_or_rsvd_mask + * left into the reserved bits, i.e. the GFN in the SPTE will be split into + * high and low parts. This mask covers the lower bits of the GFN. + */ +static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; + + static void mmu_spte_set(u64 *sptep, u64 spte); static union kvm_mmu_page_role kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu); @@ -357,9 +368,7 @@ static bool is_mmio_spte(u64 spte) static gfn_t get_mmio_spte_gfn(u64 spte) { - u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask | - shadow_nonpresent_or_rsvd_mask; - u64 gpa = spte & ~mask; + u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask; gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len) & shadow_nonpresent_or_rsvd_mask; @@ -423,6 +432,8 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); static void kvm_mmu_reset_all_pte_masks(void) { + u8 low_phys_bits; + shadow_user_mask = 0; shadow_accessed_mask = 0; shadow_dirty_mask = 0; @@ -437,12 +448,17 @@ static void kvm_mmu_reset_all_pte_masks(void) * appropriate mask to guard against L1TF attacks. Otherwise, it is * assumed that the CPU is not vulnerable to L1TF. */ + low_phys_bits = boot_cpu_data.x86_phys_bits; if (boot_cpu_data.x86_phys_bits < - 52 - shadow_nonpresent_or_rsvd_mask_len) + 52 - shadow_nonpresent_or_rsvd_mask_len) { shadow_nonpresent_or_rsvd_mask = rsvd_bits(boot_cpu_data.x86_phys_bits - shadow_nonpresent_or_rsvd_mask_len, boot_cpu_data.x86_phys_bits - 1); + low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len; + } + shadow_nonpresent_or_rsvd_lower_gfn_mask = + GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT); } static int is_cpuid_PSE36(void) @@ -899,7 +915,7 @@ static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu) { /* * Make sure the write to vcpu->mode is not reordered in front of - * reads to sptes. If it does, kvm_commit_zap_page() can see us + * reads to sptes. If it does, kvm_mmu_commit_zap_page() can see us * OUTSIDE_GUEST_MODE and proceed to free the shadow page table. */ smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE); @@ -1853,11 +1869,6 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); } -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) -{ - return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); -} - int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) { return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp); @@ -5217,7 +5228,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu) int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, void *insn, int insn_len) { - int r, emulation_type = EMULTYPE_RETRY; + int r, emulation_type = 0; enum emulation_result er; bool direct = vcpu->arch.mmu.direct_map; @@ -5230,10 +5241,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, r = RET_PF_INVALID; if (unlikely(error_code & PFERR_RSVD_MASK)) { r = handle_mmio_page_fault(vcpu, cr2, direct); - if (r == RET_PF_EMULATE) { - emulation_type = 0; + if (r == RET_PF_EMULATE) goto emulate; - } } if (r == RET_PF_INVALID) { @@ -5260,8 +5269,19 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, return 1; } - if (mmio_info_in_cache(vcpu, cr2, direct)) - emulation_type = 0; + /* + * vcpu->arch.mmu.page_fault returned RET_PF_EMULATE, but we can still + * optimistically try to just unprotect the page and let the processor + * re-execute the instruction that caused the page fault. Do not allow + * retrying MMIO emulation, as it's not only pointless but could also + * cause us to enter an infinite loop because the processor will keep + * faulting on the non-existent MMIO address. Retrying an instruction + * from a nested guest is also pointless and dangerous as we are only + * explicitly shadowing L1's page tables, i.e. unprotecting something + * for L1 isn't going to magically fix whatever issue cause L2 to fail. + */ + if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu)) + emulation_type = EMULTYPE_ALLOW_RETRY; emulate: /* * On AMD platforms, under certain conditions insn_len may be zero on #NPF. @@ -5413,7 +5433,12 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu) { MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa)); - kvm_init_mmu(vcpu, true); + /* + * kvm_mmu_setup() is called only on vCPU initialization. + * Therefore, no need to reset mmu roots as they are not yet + * initialized. + */ + kvm_init_mmu(vcpu, false); } static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6276140044d0..61ccfb13899e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -436,14 +436,18 @@ static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm) static inline bool svm_sev_enabled(void) { - return max_sev_asid; + return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0; } static inline bool sev_guest(struct kvm *kvm) { +#ifdef CONFIG_KVM_AMD_SEV struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; return sev->active; +#else + return false; +#endif } static inline int sev_get_asid(struct kvm *kvm) @@ -776,7 +780,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) } if (!svm->next_rip) { - if (emulate_instruction(vcpu, EMULTYPE_SKIP) != + if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) != EMULATE_DONE) printk(KERN_DEBUG "%s: NOP\n", __func__); return; @@ -1226,8 +1230,7 @@ static __init int sev_hardware_setup(void) min_sev_asid = cpuid_edx(0x8000001F); /* Initialize SEV ASID bitmap */ - sev_asid_bitmap = kcalloc(BITS_TO_LONGS(max_sev_asid), - sizeof(unsigned long), GFP_KERNEL); + sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); if (!sev_asid_bitmap) return 1; @@ -1405,7 +1408,7 @@ static __exit void svm_hardware_unsetup(void) int cpu; if (svm_sev_enabled()) - kfree(sev_asid_bitmap); + bitmap_free(sev_asid_bitmap); for_each_possible_cpu(cpu) svm_cpu_uninit(cpu); @@ -2715,7 +2718,7 @@ static int gp_interception(struct vcpu_svm *svm) WARN_ON_ONCE(!enable_vmware_backdoor); - er = emulate_instruction(vcpu, + er = kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); if (er == EMULATE_USER_EXIT) return 0; @@ -2819,7 +2822,7 @@ static int io_interception(struct vcpu_svm *svm) string = (io_info & SVM_IOIO_STR_MASK) != 0; in = (io_info & SVM_IOIO_TYPE_MASK) != 0; if (string) - return emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; @@ -3861,7 +3864,7 @@ static int iret_interception(struct vcpu_svm *svm) static int invlpg_interception(struct vcpu_svm *svm) { if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) - return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1); return kvm_skip_emulated_instruction(&svm->vcpu); @@ -3869,13 +3872,13 @@ static int invlpg_interception(struct vcpu_svm *svm) static int emulate_on_interception(struct vcpu_svm *svm) { - return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; } static int rsm_interception(struct vcpu_svm *svm) { - return x86_emulate_instruction(&svm->vcpu, 0, 0, - rsm_ins_bytes, 2) == EMULATE_DONE; + return kvm_emulate_instruction_from_buffer(&svm->vcpu, + rsm_ins_bytes, 2) == EMULATE_DONE; } static int rdpmc_interception(struct vcpu_svm *svm) @@ -4700,7 +4703,7 @@ static int avic_unaccelerated_access_interception(struct vcpu_svm *svm) ret = avic_unaccel_trap_write(svm); } else { /* Handling Fault */ - ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE); + ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE); } return ret; @@ -6747,7 +6750,7 @@ e_free: static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) { unsigned long vaddr, vaddr_end, next_vaddr; - unsigned long dst_vaddr, dst_vaddr_end; + unsigned long dst_vaddr; struct page **src_p, **dst_p; struct kvm_sev_dbg debug; unsigned long n; @@ -6763,7 +6766,6 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) size = debug.len; vaddr_end = vaddr + size; dst_vaddr = debug.dst_uaddr; - dst_vaddr_end = dst_vaddr + size; for (; vaddr < vaddr_end; vaddr = next_vaddr) { int len, s_off, d_off; @@ -7150,6 +7152,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .check_intercept = svm_check_intercept, .handle_external_intr = svm_handle_external_intr, + .request_immediate_exit = __kvm_request_immediate_exit, + .sched_in = svm_sched_in, .pmu_ops = &amd_pmu_ops, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1d26f3c4985b..e665aa7167cf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -121,7 +121,6 @@ module_param_named(pml, enable_pml, bool, S_IRUGO); #define MSR_BITMAP_MODE_X2APIC 1 #define MSR_BITMAP_MODE_X2APIC_APICV 2 -#define MSR_BITMAP_MODE_LM 4 #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL @@ -397,6 +396,7 @@ struct loaded_vmcs { int cpu; bool launched; bool nmi_known_unmasked; + bool hv_timer_armed; /* Support for vnmi-less CPUs */ int soft_vnmi_blocked; ktime_t entry_time; @@ -856,6 +856,7 @@ struct nested_vmx { /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ u64 vmcs01_debugctl; + u64 vmcs01_guest_bndcfgs; u16 vpid02; u16 last_vpid; @@ -1019,6 +1020,8 @@ struct vcpu_vmx { int ple_window; bool ple_window_dirty; + bool req_immediate_exit; + /* Support for PML */ #define PML_ENTITY_NUM 512 struct page *pml_pg; @@ -1569,8 +1572,12 @@ static int vmx_hv_remote_flush_tlb(struct kvm *kvm) goto out; } + /* + * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs the address of the + * base of EPT PML4 table, strip off EPT configuration information. + */ ret = hyperv_flush_guest_mapping( - to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer); + to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer & PAGE_MASK); out: spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); @@ -2864,6 +2871,8 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) u16 fs_sel, gs_sel; int i; + vmx->req_immediate_exit = false; + if (vmx->loaded_cpu_state) return; @@ -2894,8 +2903,7 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); } - if (is_long_mode(&vmx->vcpu)) - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); #else savesegment(fs, fs_sel); savesegment(gs, gs_sel); @@ -2946,8 +2954,7 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) vmx->loaded_cpu_state = NULL; #ifdef CONFIG_X86_64 - if (is_long_mode(&vmx->vcpu)) - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); #endif if (host_state->ldt_sel || (host_state->gs_sel & 7)) { kvm_load_ldt(host_state->ldt_sel); @@ -2975,24 +2982,19 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) #ifdef CONFIG_X86_64 static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) { - if (is_long_mode(&vmx->vcpu)) { - preempt_disable(); - if (vmx->loaded_cpu_state) - rdmsrl(MSR_KERNEL_GS_BASE, - vmx->msr_guest_kernel_gs_base); - preempt_enable(); - } + preempt_disable(); + if (vmx->loaded_cpu_state) + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); + preempt_enable(); return vmx->msr_guest_kernel_gs_base; } static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) { - if (is_long_mode(&vmx->vcpu)) { - preempt_disable(); - if (vmx->loaded_cpu_state) - wrmsrl(MSR_KERNEL_GS_BASE, data); - preempt_enable(); - } + preempt_disable(); + if (vmx->loaded_cpu_state) + wrmsrl(MSR_KERNEL_GS_BASE, data); + preempt_enable(); vmx->msr_guest_kernel_gs_base = data; } #endif @@ -3528,9 +3530,6 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; - if (kvm_mpx_supported()) - msrs->exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; - /* We support free control of debug control saving. */ msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; @@ -3547,8 +3546,6 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) VM_ENTRY_LOAD_IA32_PAT; msrs->entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); - if (kvm_mpx_supported()) - msrs->entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; /* We support free control of debug control loading. */ msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS; @@ -3596,12 +3593,12 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) msrs->secondary_ctls_high); msrs->secondary_ctls_low = 0; msrs->secondary_ctls_high &= - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_DESC | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_WBINVD_EXITING; + /* * We can emulate "VMCS shadowing," even if the hardware * doesn't support it. @@ -3658,6 +3655,10 @@ static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) msrs->secondary_ctls_high |= SECONDARY_EXEC_UNRESTRICTED_GUEST; + if (flexpriority_enabled) + msrs->secondary_ctls_high |= + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + /* miscellaneous data */ rdmsr(MSR_IA32_VMX_MISC, msrs->misc_low, @@ -5068,19 +5069,6 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) if (!msr) return; - /* - * MSR_KERNEL_GS_BASE is not intercepted when the guest is in - * 64-bit mode as a 64-bit kernel may frequently access the - * MSR. This means we need to manually save/restore the MSR - * when switching between guest and host state, but only if - * the guest is in 64-bit mode. Sync our cached value if the - * guest is transitioning to 32-bit mode and the CPU contains - * guest state, i.e. the cache is stale. - */ -#ifdef CONFIG_X86_64 - if (!(efer & EFER_LMA)) - (void)vmx_read_guest_kernel_gs_base(vmx); -#endif vcpu->arch.efer = efer; if (efer & EFER_LMA) { vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); @@ -5393,9 +5381,10 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) * To use VMXON (and later other VMX instructions), a guest * must first be able to turn on cr4.VMXE (see handle_vmon()). * So basically the check on whether to allow nested VMX - * is here. + * is here. We operate under the default treatment of SMM, + * so VMX cannot be enabled under SMM. */ - if (!nested_vmx_allowed(vcpu)) + if (!nested_vmx_allowed(vcpu) || is_smm(vcpu)) return 1; } @@ -6072,9 +6061,6 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) mode |= MSR_BITMAP_MODE_X2APIC_APICV; } - if (is_long_mode(vcpu)) - mode |= MSR_BITMAP_MODE_LM; - return mode; } @@ -6115,9 +6101,6 @@ static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu) if (!changed) return; - vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, - !(mode & MSR_BITMAP_MODE_LM)); - if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); @@ -6183,6 +6166,32 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) nested_mark_vmcs12_pages_dirty(vcpu); } +static u8 vmx_get_rvi(void) +{ + return vmcs_read16(GUEST_INTR_STATUS) & 0xff; +} + +static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + void *vapic_page; + u32 vppr; + int rvi; + + if (WARN_ON_ONCE(!is_guest_mode(vcpu)) || + !nested_cpu_has_vid(get_vmcs12(vcpu)) || + WARN_ON_ONCE(!vmx->nested.virtual_apic_page)) + return false; + + rvi = vmx_get_rvi(); + + vapic_page = kmap(vmx->nested.virtual_apic_page); + vppr = *((u32 *)(vapic_page + APIC_PROCPRI)); + kunmap(vmx->nested.virtual_apic_page); + + return ((rvi & 0xf0) > (vppr & 0xf0)); +} + static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, bool nested) { @@ -6983,7 +6992,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, * Cause the #SS fault with 0 error code in VM86 mode. */ if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { - if (emulate_instruction(vcpu, 0) == EMULATE_DONE) { + if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) { if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; return kvm_vcpu_halt(vcpu); @@ -7054,7 +7063,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) { WARN_ON_ONCE(!enable_vmware_backdoor); - er = emulate_instruction(vcpu, + er = kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); if (er == EMULATE_USER_EXIT) return 0; @@ -7157,7 +7166,7 @@ static int handle_io(struct kvm_vcpu *vcpu) ++vcpu->stat.io_exits; if (string) - return emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; port = exit_qualification >> 16; size = (exit_qualification & 7) + 1; @@ -7231,7 +7240,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) static int handle_desc(struct kvm_vcpu *vcpu) { WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP)); - return emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; } static int handle_cr(struct kvm_vcpu *vcpu) @@ -7480,7 +7489,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu) static int handle_invd(struct kvm_vcpu *vcpu) { - return emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; } static int handle_invlpg(struct kvm_vcpu *vcpu) @@ -7547,7 +7556,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } } - return emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; } static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) @@ -7704,8 +7713,8 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) return kvm_skip_emulated_instruction(vcpu); else - return x86_emulate_instruction(vcpu, gpa, EMULTYPE_SKIP, - NULL, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) == + EMULATE_DONE; } return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); @@ -7748,7 +7757,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (kvm_test_request(KVM_REQ_EVENT, vcpu)) return 1; - err = emulate_instruction(vcpu, 0); + err = kvm_emulate_instruction(vcpu, 0); if (err == EMULATE_USER_EXIT) { ++vcpu->stat.mmio_exits; @@ -7966,6 +7975,9 @@ static __init int hardware_setup(void) kvm_x86_ops->enable_log_dirty_pt_masked = NULL; } + if (!cpu_has_vmx_preemption_timer()) + kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit; + if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) { u64 vmx_msr; @@ -9208,7 +9220,8 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) static int handle_preemption_timer(struct kvm_vcpu *vcpu) { - kvm_lapic_expired_hv_timer(vcpu); + if (!to_vmx(vcpu)->req_immediate_exit) + kvm_lapic_expired_hv_timer(vcpu); return 1; } @@ -10214,15 +10227,16 @@ static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) if (!lapic_in_kernel(vcpu)) return; + if (!flexpriority_enabled && + !cpu_has_vmx_virtualize_x2apic_mode()) + return; + /* Postpone execution until vmcs01 is the current VMCS. */ if (is_guest_mode(vcpu)) { to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true; return; } - if (!cpu_need_tpr_shadow(vcpu)) - return; - sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); @@ -10344,6 +10358,14 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) return max_irr; } +static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu) +{ + u8 rvi = vmx_get_rvi(); + u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI); + + return ((rvi & 0xf0) > (vppr & 0xf0)); +} + static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { if (!kvm_vcpu_apicv_active(vcpu)) @@ -10595,24 +10617,43 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) msrs[i].host, false); } -static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu) +static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val) +{ + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val); + if (!vmx->loaded_vmcs->hv_timer_armed) + vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, + PIN_BASED_VMX_PREEMPTION_TIMER); + vmx->loaded_vmcs->hv_timer_armed = true; +} + +static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u64 tscl; u32 delta_tsc; - if (vmx->hv_deadline_tsc == -1) + if (vmx->req_immediate_exit) { + vmx_arm_hv_timer(vmx, 0); return; + } - tscl = rdtsc(); - if (vmx->hv_deadline_tsc > tscl) - /* sure to be 32 bit only because checked on set_hv_timer */ - delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >> - cpu_preemption_timer_multi); - else - delta_tsc = 0; + if (vmx->hv_deadline_tsc != -1) { + tscl = rdtsc(); + if (vmx->hv_deadline_tsc > tscl) + /* set_hv_timer ensures the delta fits in 32-bits */ + delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >> + cpu_preemption_timer_multi); + else + delta_tsc = 0; - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc); + vmx_arm_hv_timer(vmx, delta_tsc); + return; + } + + if (vmx->loaded_vmcs->hv_timer_armed) + vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL, + PIN_BASED_VMX_PREEMPTION_TIMER); + vmx->loaded_vmcs->hv_timer_armed = false; } static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) @@ -10672,7 +10713,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) atomic_switch_perf_msrs(vmx); - vmx_arm_hv_timer(vcpu); + vmx_update_hv_timer(vcpu); /* * If this vCPU has touched SPEC_CTRL, restore the guest's value if @@ -11214,6 +11255,23 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) #undef cr4_fixed1_update } +static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (kvm_mpx_supported()) { + bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX); + + if (mpx_enabled) { + vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; + vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; + } else { + vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS; + vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS; + } + } +} + static void vmx_cpuid_update(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -11230,8 +11288,10 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; - if (nested_vmx_allowed(vcpu)) + if (nested_vmx_allowed(vcpu)) { nested_vmx_cr_fixed1_bits_update(vcpu); + nested_vmx_entry_exit_ctls_update(vcpu); + } } static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) @@ -11427,16 +11487,18 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value; struct vcpu_vmx *vmx = to_vmx(vcpu); - if (vcpu->arch.virtual_tsc_khz == 0) - return; - - /* Make sure short timeouts reliably trigger an immediate vmexit. - * hrtimer_start does not guarantee this. */ - if (preemption_timeout <= 1) { + /* + * A timer value of zero is architecturally guaranteed to cause + * a VMExit prior to executing any instructions in the guest. + */ + if (preemption_timeout == 0) { vmx_preemption_timer_fn(&vmx->nested.preemption_timer); return; } + if (vcpu->arch.virtual_tsc_khz == 0) + return; + preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; preemption_timeout *= 1000000; do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz); @@ -11646,11 +11708,15 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu, * bits 15:8 should be zero in posted_intr_nv, * the descriptor address has been already checked * in nested_get_vmcs12_pages. + * + * bits 5:0 of posted_intr_desc_addr should be zero. */ if (nested_cpu_has_posted_intr(vmcs12) && (!nested_cpu_has_vid(vmcs12) || !nested_exit_intr_ack_set(vcpu) || - vmcs12->posted_intr_nv & 0xff00)) + (vmcs12->posted_intr_nv & 0xff00) || + (vmcs12->posted_intr_desc_addr & 0x3f) || + (!page_address_valid(vcpu, vmcs12->posted_intr_desc_addr)))) return -EINVAL; /* tpr shadow is needed by all apicv features. */ @@ -11993,8 +12059,13 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) set_cr4_guest_host_mask(vmx); - if (vmx_mpx_supported()) - vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); + if (kvm_mpx_supported()) { + if (vmx->nested.nested_run_pending && + (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) + vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); + else + vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs); + } if (enable_vpid) { if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) @@ -12076,11 +12147,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, exec_control = vmcs12->pin_based_vm_exec_control; - /* Preemption timer setting is only taken from vmcs01. */ - exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; + /* Preemption timer setting is computed directly in vmx_vcpu_run. */ exec_control |= vmcs_config.pin_based_exec_ctrl; - if (vmx->hv_deadline_tsc == -1) - exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; + exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; + vmx->loaded_vmcs->hv_timer_armed = false; /* Posted interrupts setting is only taken from vmcs12. */ if (nested_cpu_has_posted_intr(vmcs12)) { @@ -12318,6 +12388,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; @@ -12537,12 +12610,21 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual) struct vmcs12 *vmcs12 = get_vmcs12(vcpu); bool from_vmentry = !!exit_qual; u32 dummy_exit_qual; + bool evaluate_pending_interrupts; int r = 0; + evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) & + (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING); + if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) + evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); + enter_guest_mode(vcpu); if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); + if (kvm_mpx_supported() && + !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) + vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); vmx_segment_cache_clear(vmx); @@ -12575,6 +12657,23 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual) } /* + * If L1 had a pending IRQ/NMI until it executed + * VMLAUNCH/VMRESUME which wasn't delivered because it was + * disallowed (e.g. interrupts disabled), L0 needs to + * evaluate if this pending event should cause an exit from L2 + * to L1 or delivered directly to L2 (e.g. In case L1 don't + * intercept EXTERNAL_INTERRUPT). + * + * Usually this would be handled by the processor noticing an + * IRQ/NMI window request, or checking RVI during evaluation of + * pending virtual interrupts. However, this setting was done + * on VMCS01 and now VMCS02 is active instead. Thus, we force L0 + * to perform pending event evaluation by requesting a KVM_REQ_EVENT. + */ + if (unlikely(evaluate_pending_interrupts)) + kvm_make_request(KVM_REQ_EVENT, vcpu); + + /* * Note no nested_vmx_succeed or nested_vmx_fail here. At this point * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet * returned as far as L1 is concerned. It will only return (and set @@ -12841,6 +12940,11 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) return 0; } +static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) +{ + to_vmx(vcpu)->req_immediate_exit = true; +} + static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) { ktime_t remaining = @@ -13231,12 +13335,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); - if (vmx->hv_deadline_tsc == -1) - vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL, - PIN_BASED_VMX_PREEMPTION_TIMER); - else - vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, - PIN_BASED_VMX_PREEMPTION_TIMER); + if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); @@ -13440,18 +13539,12 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc) return -ERANGE; vmx->hv_deadline_tsc = tscl + delta_tsc; - vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, - PIN_BASED_VMX_PREEMPTION_TIMER); - return delta_tsc == 0; } static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - vmx->hv_deadline_tsc = -1; - vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL, - PIN_BASED_VMX_PREEMPTION_TIMER); + to_vmx(vcpu)->hv_deadline_tsc = -1; } #endif @@ -13932,6 +14025,14 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON)) return -EINVAL; + /* + * SMM temporarily disables VMX, so we cannot be in guest mode, + * nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags + * must be zero. + */ + if (is_smm(vcpu) ? kvm_state->flags : kvm_state->vmx.smm.flags) + return -EINVAL; + if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && !(kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)) return -EINVAL; @@ -13988,9 +14089,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, check_vmentry_postreqs(vcpu, vmcs12, &exit_qual)) return -EINVAL; - if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING) - vmx->nested.nested_run_pending = 1; - vmx->nested.dirty_vmcs12 = true; ret = enter_vmx_non_root_mode(vcpu, NULL); if (ret) @@ -14078,6 +14176,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .apicv_post_state_restore = vmx_apicv_post_state_restore, .hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_isr_update = vmx_hwapic_isr_update, + .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, .sync_pir_to_irr = vmx_sync_pir_to_irr, .deliver_posted_interrupt = vmx_deliver_posted_interrupt, @@ -14111,6 +14210,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .umip_emulated = vmx_umip_emulated, .check_nested_events = vmx_check_nested_events, + .request_immediate_exit = vmx_request_immediate_exit, .sched_in = vmx_sched_in, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 506bd2b4b8bb..ca717737347e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -628,7 +628,7 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu) gfn_t gfn; int r; - if (is_long_mode(vcpu) || !is_pae(vcpu)) + if (is_long_mode(vcpu) || !is_pae(vcpu) || !is_paging(vcpu)) return false; if (!test_bit(VCPU_EXREG_PDPTR, @@ -2537,7 +2537,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_PLATFORM_INFO: if (!msr_info->host_initiated || - data & ~MSR_PLATFORM_INFO_CPUID_FAULT || (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) && cpuid_fault_enabled(vcpu))) return 1; @@ -2780,6 +2779,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.osvw.status; break; case MSR_PLATFORM_INFO: + if (!msr_info->host_initiated && + !vcpu->kvm->arch.guest_can_read_msr_platform_info) + return 1; msr_info->data = vcpu->arch.msr_platform_info; break; case MSR_MISC_FEATURES_ENABLES: @@ -2927,6 +2929,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SPLIT_IRQCHIP: case KVM_CAP_IMMEDIATE_EXIT: case KVM_CAP_GET_MSR_FEATURES: + case KVM_CAP_MSR_PLATFORM_INFO: r = 1; break; case KVM_CAP_SYNC_REGS: @@ -4007,19 +4010,23 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size)); + r = -EFAULT; if (get_user(user_data_size, &user_kvm_nested_state->size)) - return -EFAULT; + break; r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state, user_data_size); if (r < 0) - return r; + break; if (r > user_data_size) { if (put_user(r, &user_kvm_nested_state->size)) - return -EFAULT; - return -E2BIG; + r = -EFAULT; + else + r = -E2BIG; + break; } + r = 0; break; } @@ -4031,19 +4038,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (!kvm_x86_ops->set_nested_state) break; + r = -EFAULT; if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state))) - return -EFAULT; + break; + r = -EINVAL; if (kvm_state.size < sizeof(kvm_state)) - return -EINVAL; + break; if (kvm_state.flags & ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE)) - return -EINVAL; + break; /* nested_run_pending implies guest_mode. */ if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING) - return -EINVAL; + break; r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state); break; @@ -4350,6 +4359,10 @@ split_irqchip_unlock: kvm->arch.pause_in_guest = true; r = 0; break; + case KVM_CAP_MSR_PLATFORM_INFO: + kvm->arch.guest_can_read_msr_platform_info = cap->args[0]; + r = 0; + break; default: r = -EINVAL; break; @@ -4685,7 +4698,7 @@ static void kvm_init_msr_list(void) */ switch (msrs_to_save[i]) { case MSR_IA32_BNDCFGS: - if (!kvm_x86_ops->mpx_supported()) + if (!kvm_mpx_supported()) continue; break; case MSR_TSC_AUX: @@ -4987,7 +5000,7 @@ int handle_ud(struct kvm_vcpu *vcpu) emul_type = 0; } - er = emulate_instruction(vcpu, emul_type); + er = kvm_emulate_instruction(vcpu, emul_type); if (er == EMULATE_USER_EXIT) return 0; if (er != EMULATE_DONE) @@ -5870,7 +5883,10 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, gpa_t gpa = cr2; kvm_pfn_t pfn; - if (emulation_type & EMULTYPE_NO_REEXECUTE) + if (!(emulation_type & EMULTYPE_ALLOW_RETRY)) + return false; + + if (WARN_ON_ONCE(is_guest_mode(vcpu))) return false; if (!vcpu->arch.mmu.direct_map) { @@ -5958,7 +5974,10 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, */ vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0; - if (!(emulation_type & EMULTYPE_RETRY)) + if (!(emulation_type & EMULTYPE_ALLOW_RETRY)) + return false; + + if (WARN_ON_ONCE(is_guest_mode(vcpu))) return false; if (x86_page_table_writing_insn(ctxt)) @@ -6276,7 +6295,19 @@ restart: return r; } -EXPORT_SYMBOL_GPL(x86_emulate_instruction); + +int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) +{ + return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); +} +EXPORT_SYMBOL_GPL(kvm_emulate_instruction); + +int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, + void *insn, int insn_len) +{ + return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len); +} +EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer); static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) @@ -7343,6 +7374,12 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page); +void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu) +{ + smp_send_reschedule(vcpu->cpu); +} +EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit); + /* * Returns 1 to let vcpu_run() continue the guest execution loop without * exiting to the userspace. Otherwise, the value will be returned to the @@ -7547,7 +7584,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (req_immediate_exit) { kvm_make_request(KVM_REQ_EVENT, vcpu); - smp_send_reschedule(vcpu->cpu); + kvm_x86_ops->request_immediate_exit(vcpu); } trace_kvm_entry(vcpu->vcpu_id); @@ -7734,7 +7771,7 @@ static inline int complete_emulated_io(struct kvm_vcpu *vcpu) { int r; vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); + r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (r != EMULATE_DONE) return 0; @@ -7811,6 +7848,29 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) return 0; } +/* Swap (qemu) user FPU context for the guest FPU context. */ +static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + copy_fpregs_to_fpstate(&vcpu->arch.user_fpu); + /* PKRU is separately restored in kvm_x86_ops->run. */ + __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state, + ~XFEATURE_MASK_PKRU); + preempt_enable(); + trace_kvm_fpu(1); +} + +/* When vcpu_run ends, restore user space FPU context. */ +static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); + copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state); + preempt_enable(); + ++vcpu->stat.fpu_reload; + trace_kvm_fpu(0); +} + int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; @@ -8159,7 +8219,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) kvm_update_cpuid(vcpu); idx = srcu_read_lock(&vcpu->kvm->srcu); - if (!is_long_mode(vcpu) && is_pae(vcpu)) { + if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu)) { load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); mmu_reset_needed = 1; } @@ -8388,29 +8448,6 @@ static void fx_init(struct kvm_vcpu *vcpu) vcpu->arch.cr0 |= X86_CR0_ET; } -/* Swap (qemu) user FPU context for the guest FPU context. */ -void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) -{ - preempt_disable(); - copy_fpregs_to_fpstate(&vcpu->arch.user_fpu); - /* PKRU is separately restored in kvm_x86_ops->run. */ - __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state, - ~XFEATURE_MASK_PKRU); - preempt_enable(); - trace_kvm_fpu(1); -} - -/* When vcpu_run ends, restore user space FPU context. */ -void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) -{ - preempt_disable(); - copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); - copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state); - preempt_enable(); - ++vcpu->stat.fpu_reload; - trace_kvm_fpu(0); -} - void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask; @@ -8834,6 +8871,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); pvclock_update_vm_gtod_copy(kvm); + kvm->arch.guest_can_read_msr_platform_info = true; + INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn); @@ -9182,6 +9221,13 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, kvm_page_track_flush_slot(kvm, slot); } +static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) +{ + return (is_guest_mode(vcpu) && + kvm_x86_ops->guest_apic_has_interrupt && + kvm_x86_ops->guest_apic_has_interrupt(vcpu)); +} + static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) { if (!list_empty_careful(&vcpu->async_pf.done)) @@ -9206,7 +9252,8 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) return true; if (kvm_arch_interrupt_allowed(vcpu) && - kvm_cpu_has_interrupt(vcpu)) + (kvm_cpu_has_interrupt(vcpu) || + kvm_guest_apic_has_interrupt(vcpu))) return true; if (kvm_hv_has_stimer_pending(vcpu)) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 257f27620bc2..67b9568613f3 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -274,6 +274,8 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int page_num); bool kvm_vector_hashing_enabled(void); +int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, + int emulation_type, void *insn, int insn_len); #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \ diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 7a8fc26c1115..faca978ebf9d 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -815,10 +815,14 @@ void free_kernel_image_pages(void *begin, void *end) set_memory_np_noalias(begin_ul, len_pages); } +void __weak mem_encrypt_free_decrypted_mem(void) { } + void __ref free_initmem(void) { e820__reallocate_tables(); + mem_encrypt_free_decrypted_mem(); + free_kernel_image_pages(&__init_begin, &__init_end); } diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index b2de398d1fd3..006f373f54ab 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -348,6 +348,30 @@ bool sev_active(void) EXPORT_SYMBOL(sev_active); /* Architecture __weak replacement functions */ +void __init mem_encrypt_free_decrypted_mem(void) +{ + unsigned long vaddr, vaddr_end, npages; + int r; + + vaddr = (unsigned long)__start_bss_decrypted_unused; + vaddr_end = (unsigned long)__end_bss_decrypted; + npages = (vaddr_end - vaddr) >> PAGE_SHIFT; + + /* + * The unused memory range was mapped decrypted, change the encryption + * attribute from decrypted to encrypted before freeing it. + */ + if (mem_encrypt_active()) { + r = set_memory_encrypted(vaddr, npages); + if (r) { + pr_warn("failed to free unused decrypted pages\n"); + return; + } + } + + free_init_pages("unused decrypted", vaddr, vaddr_end); +} + void __init mem_encrypt_init(void) { if (!sme_me_mask) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index e848a4811785..59274e2c1ac4 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -115,6 +115,8 @@ static inline void pgd_list_del(pgd_t *pgd) #define UNSHARED_PTRS_PER_PGD \ (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) +#define MAX_UNSHARED_PTRS_PER_PGD \ + max_t(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD) static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) @@ -181,6 +183,7 @@ static void pgd_dtor(pgd_t *pgd) * and initialize the kernel pmds here. */ #define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD +#define MAX_PREALLOCATED_PMDS MAX_UNSHARED_PTRS_PER_PGD /* * We allocate separate PMDs for the kernel part of the user page-table @@ -189,6 +192,7 @@ static void pgd_dtor(pgd_t *pgd) */ #define PREALLOCATED_USER_PMDS (static_cpu_has(X86_FEATURE_PTI) ? \ KERNEL_PGD_PTRS : 0) +#define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) { @@ -210,7 +214,9 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) /* No need to prepopulate any pagetable entries in non-PAE modes. */ #define PREALLOCATED_PMDS 0 +#define MAX_PREALLOCATED_PMDS 0 #define PREALLOCATED_USER_PMDS 0 +#define MAX_PREALLOCATED_USER_PMDS 0 #endif /* CONFIG_X86_PAE */ static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) @@ -269,7 +275,7 @@ static void mop_up_one_pmd(struct mm_struct *mm, pgd_t *pgdp) if (pgd_val(pgd) != 0) { pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); - *pgdp = native_make_pgd(0); + pgd_clear(pgdp); paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); pmd_free(mm, pmd); @@ -428,8 +434,8 @@ static inline void _pgd_free(pgd_t *pgd) pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; - pmd_t *u_pmds[PREALLOCATED_USER_PMDS]; - pmd_t *pmds[PREALLOCATED_PMDS]; + pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS]; + pmd_t *pmds[MAX_PREALLOCATED_PMDS]; pgd = _pgd_alloc(); @@ -494,7 +500,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, int changed = !pte_same(*ptep, entry); if (changed && dirty) - *ptep = entry; + set_pte(ptep, entry); return changed; } @@ -509,7 +515,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, VM_BUG_ON(address & ~HPAGE_PMD_MASK); if (changed && dirty) { - *pmdp = entry; + set_pmd(pmdp, entry); /* * We had a write-protection fault here and changed the pmd * to to more permissive. No need to flush the TLB for that, @@ -529,7 +535,7 @@ int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address, VM_BUG_ON(address & ~HPAGE_PUD_MASK); if (changed && dirty) { - *pudp = entry; + set_pud(pudp, entry); /* * We had a write-protection fault here and changed the pud * to to more permissive. No need to flush the TLB for that, @@ -637,6 +643,15 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) { unsigned long address = __fix_to_virt(idx); +#ifdef CONFIG_X86_64 + /* + * Ensure that the static initial page tables are covering the + * fixmap completely. + */ + BUILD_BUG_ON(__end_of_permanent_fixed_addresses > + (FIXMAP_PMD_NUM * PTRS_PER_PTE)); +#endif + if (idx >= __end_of_fixed_addresses) { BUG(); return; diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 05ca14222463..9959657127f4 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -85,10 +85,9 @@ pgd_t * __init efi_call_phys_prolog(void) void __init efi_call_phys_epilog(pgd_t *save_pgd) { + load_fixmap_gdt(0); load_cr3(save_pgd); __flush_tlb_all(); - - load_fixmap_gdt(0); } void __init efi_runtime_update_mappings(void) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 2fe5c9b1816b..dd461c0167ef 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1907,7 +1907,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) /* L3_k[511] -> level2_fixmap_pgt */ convert_pfn_mfn(level3_kernel_pgt); - /* L3_k[511][506] -> level1_fixmap_pgt */ + /* L3_k[511][508-FIXMAP_PMD_NUM ... 507] -> level1_fixmap_pgt */ convert_pfn_mfn(level2_fixmap_pgt); /* We get [511][511] and have Xen's version of level2_kernel_pgt */ @@ -1952,7 +1952,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); - set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO); + + for (i = 0; i < FIXMAP_PMD_NUM; i++) { + set_page_prot(level1_fixmap_pgt + i * PTRS_PER_PTE, + PAGE_KERNEL_RO); + } /* Pin down new L4 */ pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 7d00d4ad44d4..95997e6c0696 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -478,7 +478,7 @@ static void xen_convert_regs(const struct xen_pmu_regs *xen_regs, irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) { int err, ret = IRQ_NONE; - struct pt_regs regs; + struct pt_regs regs = {0}; const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); uint8_t xenpmu_flags = get_xenpmu_flags(); diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 04d038f3b6fa..b9ad83a0ee5d 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -4,6 +4,7 @@ config ZONE_DMA config XTENSA def_bool y + select ARCH_HAS_SG_CHAIN select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_NO_COHERENT_DMA_MMAP if !MMU diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile index 295c120ed099..d67e30faff9c 100644 --- a/arch/xtensa/Makefile +++ b/arch/xtensa/Makefile @@ -64,11 +64,7 @@ endif vardirs := $(patsubst %,arch/xtensa/variants/%/,$(variant-y)) plfdirs := $(patsubst %,arch/xtensa/platforms/%/,$(platform-y)) -ifeq ($(KBUILD_SRC),) -KBUILD_CPPFLAGS += $(patsubst %,-I%include,$(vardirs) $(plfdirs)) -else KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(vardirs) $(plfdirs)) -endif KBUILD_DEFCONFIG := iss_defconfig diff --git a/arch/xtensa/platforms/iss/setup.c b/arch/xtensa/platforms/iss/setup.c index f4bbb28026f8..58709e89a8ed 100644 --- a/arch/xtensa/platforms/iss/setup.c +++ b/arch/xtensa/platforms/iss/setup.c @@ -78,23 +78,28 @@ static struct notifier_block iss_panic_block = { void __init platform_setup(char **p_cmdline) { + static void *argv[COMMAND_LINE_SIZE / sizeof(void *)] __initdata; + static char cmdline[COMMAND_LINE_SIZE] __initdata; int argc = simc_argc(); int argv_size = simc_argv_size(); if (argc > 1) { - void **argv = alloc_bootmem(argv_size); - char *cmdline = alloc_bootmem(argv_size); - int i; + if (argv_size > sizeof(argv)) { + pr_err("%s: command line too long: argv_size = %d\n", + __func__, argv_size); + } else { + int i; - cmdline[0] = 0; - simc_argv((void *)argv); + cmdline[0] = 0; + simc_argv((void *)argv); - for (i = 1; i < argc; ++i) { - if (i > 1) - strcat(cmdline, " "); - strcat(cmdline, argv[i]); + for (i = 1; i < argc; ++i) { + if (i > 1) + strcat(cmdline, " "); + strcat(cmdline, argv[i]); + } + *p_cmdline = cmdline; } - *p_cmdline = cmdline; } atomic_notifier_chain_register(&panic_notifier_list, &iss_panic_block); |