summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig6
-rw-r--r--arch/alpha/include/asm/processor.h2
-rw-r--r--arch/alpha/kernel/process.c5
-rw-r--r--arch/arc/Kconfig1
-rw-r--r--arch/arc/include/asm/cacheflush.h1
-rw-r--r--arch/arc/include/asm/pgtable.h5
-rw-r--r--arch/arc/include/asm/processor.h2
-rw-r--r--arch/arc/kernel/irq.c10
-rw-r--r--arch/arc/kernel/stacktrace.c4
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/boot/compressed/decompress.c3
-rw-r--r--arch/arm/boot/dts/bcm2711-rpi-4-b.dts11
-rw-r--r--arch/arm/boot/dts/bcm2711.dtsi12
-rw-r--r--arch/arm/boot/dts/bcm2835-common.dtsi8
-rw-r--r--arch/arm/boot/dts/bcm283x.dtsi8
-rw-r--r--arch/arm/boot/dts/spear3xx.dtsi2
-rw-r--r--arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts2
-rw-r--r--arch/arm/configs/multi_v7_defconfig1
-rw-r--r--arch/arm/configs/oxnas_v6_defconfig1
-rw-r--r--arch/arm/configs/shmobile_defconfig1
-rw-r--r--arch/arm/include/asm/arch_timer.h37
-rw-r--r--arch/arm/include/asm/cacheflush.h1
-rw-r--r--arch/arm/include/asm/processor.h2
-rw-r--r--arch/arm/include/asm/uaccess.h4
-rw-r--r--arch/arm/kernel/entry-armv.S5
-rw-r--r--arch/arm/kernel/head.S4
-rw-r--r--arch/arm/kernel/irq.c14
-rw-r--r--arch/arm/kernel/process.c4
-rw-r--r--arch/arm/kernel/traps.c2
-rw-r--r--arch/arm/kernel/vmlinux-xip.lds.S6
-rw-r--r--arch/arm/mach-bcm/Kconfig4
-rw-r--r--arch/arm/mach-imx/avic.c2
-rw-r--r--arch/arm/mach-imx/src.c40
-rw-r--r--arch/arm/mach-imx/tzic.c2
-rw-r--r--arch/arm/mach-omap1/irq.c2
-rw-r--r--arch/arm/mach-omap2/Kconfig1
-rw-r--r--arch/arm/mach-s3c/irq-s3c24xx.c2
-rw-r--r--arch/arm/mm/proc-macros.S1
-rw-r--r--arch/arm/probes/kprobes/core.c2
-rw-r--r--arch/arm/tools/syscall.tbl1
-rw-r--r--arch/arm64/Kconfig10
-rw-r--r--arch/arm64/Kconfig.platforms4
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts8
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi8
-rw-r--r--arch/arm64/boot/dts/qcom/sm8250.dtsi3
-rw-r--r--arch/arm64/configs/defconfig1
-rw-r--r--arch/arm64/include/asm/arch_timer.h52
-rw-r--r--arch/arm64/include/asm/processor.h2
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h2
-rw-r--r--arch/arm64/kernel/entry-common.c52
-rw-r--r--arch/arm64/kernel/process.c4
-rw-r--r--arch/arm64/kernel/topology.c2
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/gfp.h1
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c13
-rw-r--r--arch/arm64/kvm/hyp/nvhe/page_alloc.c15
-rw-r--r--arch/arm64/kvm/mmu.c6
-rw-r--r--arch/arm64/mm/hugetlbpage.c2
-rw-r--r--arch/arm64/net/bpf_jit_comp.c5
-rw-r--r--arch/csky/Kconfig4
-rw-r--r--arch/csky/include/asm/bitops.h1
-rw-r--r--arch/csky/include/asm/processor.h2
-rw-r--r--arch/csky/kernel/entry.S2
-rw-r--r--arch/csky/kernel/irq.c5
-rw-r--r--arch/csky/kernel/ptrace.c3
-rw-r--r--arch/csky/kernel/signal.c4
-rw-r--r--arch/csky/kernel/stacktrace.c5
-rw-r--r--arch/h8300/include/asm/irq.h2
-rw-r--r--arch/h8300/include/asm/processor.h2
-rw-r--r--arch/h8300/kernel/irq.c1
-rw-r--r--arch/h8300/kernel/process.c5
-rw-r--r--arch/hexagon/include/asm/processor.h2
-rw-r--r--arch/hexagon/kernel/process.c4
-rw-r--r--arch/ia64/include/asm/processor.h2
-rw-r--r--arch/ia64/include/asm/spinlock.h23
-rw-r--r--arch/ia64/kernel/process.c5
-rw-r--r--arch/m68k/emu/nfblock.c12
-rw-r--r--arch/m68k/include/asm/cacheflush_mm.h1
-rw-r--r--arch/m68k/include/asm/processor.h2
-rw-r--r--arch/m68k/kernel/process.c4
-rw-r--r--arch/microblaze/include/asm/processor.h2
-rw-r--r--arch/microblaze/kernel/process.c2
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/mips/cavium-octeon/octeon-irq.c5
-rw-r--r--arch/mips/include/asm/cacheflush.h2
-rw-r--r--arch/mips/include/asm/processor.h2
-rw-r--r--arch/mips/kernel/irq.c8
-rw-r--r--arch/mips/kernel/process.c8
-rw-r--r--arch/mips/kernel/smp-bmips.c3
-rw-r--r--arch/mips/loongson64/smp.c1
-rw-r--r--arch/mips/rb532/prom.c1
-rw-r--r--arch/mips/sibyte/common/cfe.c1
-rw-r--r--arch/mips/sibyte/swarm/setup.c1
-rw-r--r--arch/nds32/Kconfig1
-rw-r--r--arch/nds32/include/asm/cacheflush.h1
-rw-r--r--arch/nds32/include/asm/processor.h2
-rw-r--r--arch/nds32/kernel/ftrace.c2
-rw-r--r--arch/nds32/kernel/process.c7
-rw-r--r--arch/nios2/include/asm/cacheflush.h3
-rw-r--r--arch/nios2/include/asm/irqflags.h4
-rw-r--r--arch/nios2/include/asm/processor.h2
-rw-r--r--arch/nios2/include/asm/registers.h2
-rw-r--r--arch/nios2/kernel/process.c5
-rw-r--r--arch/nios2/platform/Kconfig.platform1
-rw-r--r--arch/openrisc/Kconfig1
-rw-r--r--arch/openrisc/include/asm/processor.h2
-rw-r--r--arch/openrisc/include/asm/spinlock.h3
-rw-r--r--arch/openrisc/kernel/entry.S4
-rw-r--r--arch/openrisc/kernel/irq.c5
-rw-r--r--arch/openrisc/kernel/process.c2
-rw-r--r--arch/openrisc/mm/init.c1
-rw-r--r--arch/parisc/include/asm/cacheflush.h3
-rw-r--r--arch/parisc/include/asm/processor.h2
-rw-r--r--arch/parisc/include/asm/spinlock.h15
-rw-r--r--arch/parisc/kernel/process.c5
-rw-r--r--arch/powerpc/include/asm/mem_encrypt.h5
-rw-r--r--arch/powerpc/include/asm/processor.h2
-rw-r--r--arch/powerpc/include/asm/simple_spinlock.h21
-rw-r--r--arch/powerpc/kernel/idle_book3s.S10
-rw-r--r--arch/powerpc/kernel/process.c9
-rw-r--r--arch/powerpc/kernel/smp.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S28
-rw-r--r--arch/powerpc/perf/isa207-common.c26
-rw-r--r--arch/powerpc/perf/isa207-common.h2
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c1
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig1
-rw-r--r--arch/powerpc/platforms/pseries/Makefile2
-rw-r--r--arch/powerpc/platforms/pseries/cc_platform.c26
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c27
-rw-r--r--arch/powerpc/platforms/pseries/svm.c5
-rw-r--r--arch/powerpc/sysdev/xive/common.c3
-rw-r--r--arch/riscv/Kconfig7
-rw-r--r--arch/riscv/include/asm/kasan.h3
-rw-r--r--arch/riscv/include/asm/processor.h2
-rw-r--r--arch/riscv/kernel/entry.S3
-rw-r--r--arch/riscv/kernel/head.S1
-rw-r--r--arch/riscv/kernel/smp.c9
-rw-r--r--arch/riscv/kernel/stacktrace.c12
-rw-r--r--arch/riscv/mm/kasan_init.c14
-rw-r--r--arch/riscv/net/bpf_jit_core.c8
-rw-r--r--arch/s390/include/asm/mem_encrypt.h2
-rw-r--r--arch/s390/include/asm/processor.h2
-rw-r--r--arch/s390/include/asm/spinlock.h8
-rw-r--r--arch/s390/kernel/process.c4
-rw-r--r--arch/s390/kvm/gaccess.c12
-rw-r--r--arch/s390/kvm/intercept.c4
-rw-r--r--arch/s390/kvm/interrupt.c5
-rw-r--r--arch/s390/kvm/kvm-s390.c1
-rw-r--r--arch/s390/lib/string.c15
-rw-r--r--arch/sh/include/asm/cacheflush.h3
-rw-r--r--arch/sh/include/asm/processor_32.h2
-rw-r--r--arch/sh/kernel/process_32.c5
-rw-r--r--arch/sparc/include/asm/processor_32.h2
-rw-r--r--arch/sparc/include/asm/processor_64.h2
-rw-r--r--arch/sparc/kernel/process_32.c5
-rw-r--r--arch/sparc/kernel/process_64.c5
-rw-r--r--arch/um/drivers/ubd_kern.c14
-rw-r--r--arch/um/include/asm/processor-generic.h2
-rw-r--r--arch/um/kernel/process.c5
-rw-r--r--arch/um/kernel/um_arch.c4
-rw-r--r--arch/x86/Kconfig35
-rw-r--r--arch/x86/Kconfig.cpu13
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/boot/genimage.sh15
-rw-r--r--arch/x86/boot/mtools.conf.in5
-rw-r--r--arch/x86/crypto/sm4-aesni-avx-asm_64.S6
-rw-r--r--arch/x86/crypto/sm4-aesni-avx2-asm_64.S6
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--arch/x86/events/core.c6
-rw-r--r--arch/x86/events/intel/bts.c6
-rw-r--r--arch/x86/events/intel/core.c21
-rw-r--r--arch/x86/events/intel/ds.c5
-rw-r--r--arch/x86/events/intel/uncore_discovery.h2
-rw-r--r--arch/x86/events/intel/uncore_snbep.c16
-rw-r--r--arch/x86/events/msr.c1
-rw-r--r--arch/x86/events/perf_event.h2
-rw-r--r--arch/x86/ia32/ia32_signal.c15
-rw-r--r--arch/x86/include/asm/GEN-for-each-reg.h14
-rw-r--r--arch/x86/include/asm/alternative.h1
-rw-r--r--arch/x86/include/asm/asm-prototypes.h18
-rw-r--r--arch/x86/include/asm/asm.h55
-rw-r--r--arch/x86/include/asm/cpufeature.h13
-rw-r--r--arch/x86/include/asm/cpufeatures.h2
-rw-r--r--arch/x86/include/asm/extable.h44
-rw-r--r--arch/x86/include/asm/extable_fixup_types.h22
-rw-r--r--arch/x86/include/asm/fpu/api.h58
-rw-r--r--arch/x86/include/asm/fpu/internal.h540
-rw-r--r--arch/x86/include/asm/fpu/sched.h68
-rw-r--r--arch/x86/include/asm/fpu/signal.h13
-rw-r--r--arch/x86/include/asm/fpu/types.h214
-rw-r--r--arch/x86/include/asm/fpu/xcr.h11
-rw-r--r--arch/x86/include/asm/fpu/xstate.h90
-rw-r--r--arch/x86/include/asm/ia32.h2
-rw-r--r--arch/x86/include/asm/io.h8
-rw-r--r--arch/x86/include/asm/irq_stack.h5
-rw-r--r--arch/x86/include/asm/kexec.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h12
-rw-r--r--arch/x86/include/asm/mce.h12
-rw-r--r--arch/x86/include/asm/mem_encrypt.h12
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/msr.h4
-rw-r--r--arch/x86/include/asm/nospec-branch.h72
-rw-r--r--arch/x86/include/asm/page_32.h2
-rw-r--r--arch/x86/include/asm/paravirt.h31
-rw-r--r--arch/x86/include/asm/pkru.h2
-rw-r--r--arch/x86/include/asm/processor.h14
-rw-r--r--arch/x86/include/asm/proto.h2
-rw-r--r--arch/x86/include/asm/ptrace.h2
-rw-r--r--arch/x86/include/asm/segment.h2
-rw-r--r--arch/x86/include/asm/smp.h7
-rw-r--r--arch/x86/include/asm/topology.h3
-rw-r--r--arch/x86/include/asm/trace/fpu.h4
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/include/asm/xen/hypercall.h6
-rw-r--r--arch/x86/include/uapi/asm/prctl.h4
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/alternative.c191
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c27
-rw-r--r--arch/x86/kernel/cc_platform.c69
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/bugs.c9
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c1
-rw-r--r--arch/x86/kernel/cpu/common.c51
-rw-r--r--arch/x86/kernel/cpu/cpu.h1
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c2
-rw-r--r--arch/x86/kernel/cpu/hygon.c2
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c10
-rw-r--r--arch/x86/kernel/cpu/mce/core.c292
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h71
-rw-r--r--arch/x86/kernel/cpu/mce/p5.c6
-rw-r--r--arch/x86/kernel/cpu/mce/severity.c33
-rw-r--r--arch/x86/kernel/cpu/mce/winchip.c6
-rw-r--r--arch/x86/kernel/cpu/vortex.c39
-rw-r--r--arch/x86/kernel/crash_dump_64.c4
-rw-r--r--arch/x86/kernel/devicetree.c5
-rw-r--r--arch/x86/kernel/fpu/bugs.c2
-rw-r--r--arch/x86/kernel/fpu/context.h83
-rw-r--r--arch/x86/kernel/fpu/core.c392
-rw-r--r--arch/x86/kernel/fpu/init.c76
-rw-r--r--arch/x86/kernel/fpu/internal.h28
-rw-r--r--arch/x86/kernel/fpu/legacy.h115
-rw-r--r--arch/x86/kernel/fpu/regset.c36
-rw-r--r--arch/x86/kernel/fpu/signal.c285
-rw-r--r--arch/x86/kernel/fpu/xstate.c898
-rw-r--r--arch/x86/kernel/fpu/xstate.h278
-rw-r--r--arch/x86/kernel/head64.c9
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/irqflags.S2
-rw-r--r--arch/x86/kernel/itmt.c2
-rw-r--r--arch/x86/kernel/kvm.c3
-rw-r--r--arch/x86/kernel/kvmclock.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c19
-rw-r--r--arch/x86/kernel/module.c9
-rw-r--r--arch/x86/kernel/paravirt.c45
-rw-r--r--arch/x86/kernel/pci-swiotlb.c9
-rw-r--r--arch/x86/kernel/process.c92
-rw-r--r--arch/x86/kernel/process_32.c5
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S2
-rw-r--r--arch/x86/kernel/sev-shared.c2
-rw-r--r--arch/x86/kernel/sev.c8
-rw-r--r--arch/x86/kernel/signal.c83
-rw-r--r--arch/x86/kernel/smpboot.c48
-rw-r--r--arch/x86/kernel/traps.c40
-rw-r--r--arch/x86/kernel/umip.c8
-rw-r--r--arch/x86/kernel/vmlinux.lds.S14
-rw-r--r--arch/x86/kvm/lapic.c20
-rw-r--r--arch/x86/kvm/mmu/mmu.c6
-rw-r--r--arch/x86/kvm/svm/sev.c31
-rw-r--r--arch/x86/kvm/svm/svm.c10
-rw-r--r--arch/x86/kvm/svm/svm.h10
-rw-r--r--arch/x86/kvm/svm/svm_ops.h4
-rw-r--r--arch/x86/kvm/vmx/evmcs.h4
-rw-r--r--arch/x86/kvm/vmx/vmx.c34
-rw-r--r--arch/x86/kvm/x86.c447
-rw-r--r--arch/x86/kvm/xen.c27
-rw-r--r--arch/x86/lib/copy_mc_64.S8
-rw-r--r--arch/x86/lib/copy_user_64.S13
-rw-r--r--arch/x86/lib/insn.c5
-rw-r--r--arch/x86/lib/retpoline.S56
-rw-r--r--arch/x86/math-emu/fpu_aux.c2
-rw-r--r--arch/x86/math-emu/fpu_entry.c6
-rw-r--r--arch/x86/math-emu/fpu_system.h2
-rw-r--r--arch/x86/mm/extable.c135
-rw-r--r--arch/x86/mm/ioremap.c18
-rw-r--r--arch/x86/mm/mem_encrypt.c55
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c9
-rw-r--r--arch/x86/mm/pat/set_memory.c3
-rw-r--r--arch/x86/net/bpf_jit_comp.c171
-rw-r--r--arch/x86/net/bpf_jit_comp32.c22
-rw-r--r--arch/x86/platform/efi/efi_64.c9
-rw-r--r--arch/x86/power/cpu.c2
-rw-r--r--arch/x86/realmode/init.c8
-rw-r--r--arch/x86/xen/enlighten_pv.c70
-rw-r--r--arch/x86/xen/irq.c31
-rw-r--r--arch/x86/xen/mmu_pv.c93
-rw-r--r--arch/x86/xen/xen-asm.S79
-rw-r--r--arch/x86/xen/xen-head.S34
-rw-r--r--arch/xtensa/include/asm/cacheflush.h5
-rw-r--r--arch/xtensa/include/asm/processor.h2
-rw-r--r--arch/xtensa/kernel/process.c5
-rw-r--r--arch/xtensa/platforms/iss/simdisk.c16
306 files changed, 4404 insertions, 2965 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 8df1c7102643..dcd84e3ae7d4 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1234,6 +1234,9 @@ config RELR
config ARCH_HAS_MEM_ENCRYPT
bool
+config ARCH_HAS_CC_PLATFORM
+ bool
+
config HAVE_SPARSE_SYSCALL_NR
bool
help
@@ -1288,6 +1291,9 @@ config ARCH_HAS_ELFCORE_COMPAT
config ARCH_HAS_PARANOID_L1D_FLUSH
bool
+config DYNAMIC_SIGFRAME
+ bool
+
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h
index 6100431da07a..090499c99c1c 100644
--- a/arch/alpha/include/asm/processor.h
+++ b/arch/alpha/include/asm/processor.h
@@ -42,7 +42,7 @@ extern void start_thread(struct pt_regs *, unsigned long, unsigned long);
struct task_struct;
extern void release_thread(struct task_struct *);
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc)
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index a5123ea426ce..5f8527081da9 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -376,12 +376,11 @@ thread_saved_pc(struct task_struct *t)
}
unsigned long
-get_wchan(struct task_struct *p)
+__get_wchan(struct task_struct *p)
{
unsigned long schedule_frame;
unsigned long pc;
- if (!p || p == current || task_is_running(p))
- return 0;
+
/*
* This one depends on the frame size of schedule(). Do a
* "disass schedule" in gdb to find the frame size. Also, the
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 3a5a80f302e1..b4ae6058902a 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -40,7 +40,6 @@ config ARC
select HAVE_KRETPROBES
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_PERF_EVENTS
- select HANDLE_DOMAIN_IRQ
select IRQ_DOMAIN
select MODULES_USE_ELF_RELA
select OF
diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h
index e201b4b1655a..e8c2c7469e10 100644
--- a/arch/arc/include/asm/cacheflush.h
+++ b/arch/arc/include/asm/cacheflush.h
@@ -36,6 +36,7 @@ void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr);
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
void flush_dcache_page(struct page *page);
+void flush_dcache_folio(struct folio *folio);
void dma_cache_wback_inv(phys_addr_t start, unsigned long sz);
void dma_cache_inv(phys_addr_t start, unsigned long sz);
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 9320b04c04bf..4cf45a99fd79 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -26,11 +26,6 @@ extern char empty_zero_page[PAGE_SIZE];
extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
-/* Macro to mark a page protection as uncacheable */
-#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE))
-
-extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
-
/* to cope with aliasing VIPT cache */
#define HAVE_ARCH_UNMAPPED_AREA
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index f28afcf5c6d1..54db9d7bb562 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -70,7 +70,7 @@ struct task_struct;
extern void start_thread(struct pt_regs * regs, unsigned long pc,
unsigned long usp);
-extern unsigned int get_wchan(struct task_struct *p);
+extern unsigned int __get_wchan(struct task_struct *p);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/arc/kernel/irq.c b/arch/arc/kernel/irq.c
index ef909dd4b40c..dd09b58ff82d 100644
--- a/arch/arc/kernel/irq.c
+++ b/arch/arc/kernel/irq.c
@@ -6,6 +6,8 @@
#include <linux/interrupt.h>
#include <linux/irqchip.h>
#include <asm/mach_desc.h>
+
+#include <asm/irq_regs.h>
#include <asm/smp.h>
/*
@@ -39,5 +41,11 @@ void __init init_IRQ(void)
*/
void arch_do_IRQ(unsigned int hwirq, struct pt_regs *regs)
{
- handle_domain_irq(NULL, hwirq, regs);
+ struct pt_regs *old_regs;
+
+ irq_enter();
+ old_regs = set_irq_regs(regs);
+ generic_handle_domain_irq(NULL, hwirq);
+ set_irq_regs(old_regs);
+ irq_exit();
}
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index c376ff3147e7..5372dc04e784 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -15,7 +15,7 @@
* = specifics of data structs where trace is saved(CONFIG_STACKTRACE etc)
*
* vineetg: March 2009
- * -Implemented correct versions of thread_saved_pc() and get_wchan()
+ * -Implemented correct versions of thread_saved_pc() and __get_wchan()
*
* rajeshwarr: 2008
* -Initial implementation
@@ -248,7 +248,7 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
* Of course just returning schedule( ) would be pointless so unwind until
* the function is not in schedular code
*/
-unsigned int get_wchan(struct task_struct *tsk)
+unsigned int __get_wchan(struct task_struct *tsk)
{
return arc_unwind_core(tsk, NULL, __get_first_nonsched, NULL);
}
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 59baf6c132a7..2c056c0e834e 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -64,7 +64,6 @@ config ARM
select GENERIC_PCI_IOMAP
select GENERIC_SCHED_CLOCK
select GENERIC_SMP_IDLE_THREAD
- select HANDLE_DOMAIN_IRQ
select HARDIRQS_SW_RESEND
select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
@@ -92,6 +91,7 @@ config ARM
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
select HAVE_FUNCTION_TRACER if !XIP_KERNEL
+ select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)
select HAVE_IRQ_TIME_ACCOUNTING
diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c
index aa075d8372ea..74255e819831 100644
--- a/arch/arm/boot/compressed/decompress.c
+++ b/arch/arm/boot/compressed/decompress.c
@@ -47,7 +47,10 @@ extern char * strchrnul(const char *, int);
#endif
#ifdef CONFIG_KERNEL_XZ
+/* Prevent KASAN override of string helpers in decompressor */
+#undef memmove
#define memmove memmove
+#undef memcpy
#define memcpy memcpy
#include "../../../../lib/decompress_unxz.c"
#endif
diff --git a/arch/arm/boot/dts/bcm2711-rpi-4-b.dts b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts
index f24bdd0870a5..72ce80fbf266 100644
--- a/arch/arm/boot/dts/bcm2711-rpi-4-b.dts
+++ b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts
@@ -40,8 +40,8 @@
regulator-always-on;
regulator-settling-time-us = <5000>;
gpios = <&expgpio 4 GPIO_ACTIVE_HIGH>;
- states = <1800000 0x1
- 3300000 0x0>;
+ states = <1800000 0x1>,
+ <3300000 0x0>;
status = "okay";
};
@@ -217,15 +217,16 @@
};
&pcie0 {
- pci@1,0 {
+ pci@0,0 {
+ device_type = "pci";
#address-cells = <3>;
#size-cells = <2>;
ranges;
reg = <0 0 0 0 0>;
- usb@1,0 {
- reg = <0x10000 0 0 0 0>;
+ usb@0,0 {
+ reg = <0 0 0 0 0>;
resets = <&reset RASPBERRYPI_FIRMWARE_RESET_ID_USB>;
};
};
diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi
index b8a4096192aa..3b60297af7f6 100644
--- a/arch/arm/boot/dts/bcm2711.dtsi
+++ b/arch/arm/boot/dts/bcm2711.dtsi
@@ -300,6 +300,14 @@
status = "disabled";
};
+ vec: vec@7ec13000 {
+ compatible = "brcm,bcm2711-vec";
+ reg = <0x7ec13000 0x1000>;
+ clocks = <&clocks BCM2835_CLOCK_VEC>;
+ interrupts = <GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
dvp: clock@7ef00000 {
compatible = "brcm,brcm2711-dvp";
reg = <0x7ef00000 0x10>;
@@ -532,8 +540,8 @@
compatible = "brcm,genet-mdio-v5";
reg = <0xe14 0x8>;
reg-names = "mdio";
- #address-cells = <0x0>;
- #size-cells = <0x1>;
+ #address-cells = <0x1>;
+ #size-cells = <0x0>;
};
};
};
diff --git a/arch/arm/boot/dts/bcm2835-common.dtsi b/arch/arm/boot/dts/bcm2835-common.dtsi
index 4119271c979d..c25e797b9060 100644
--- a/arch/arm/boot/dts/bcm2835-common.dtsi
+++ b/arch/arm/boot/dts/bcm2835-common.dtsi
@@ -106,6 +106,14 @@
status = "okay";
};
+ vec: vec@7e806000 {
+ compatible = "brcm,bcm2835-vec";
+ reg = <0x7e806000 0x1000>;
+ clocks = <&clocks BCM2835_CLOCK_VEC>;
+ interrupts = <2 27>;
+ status = "disabled";
+ };
+
pixelvalve@7e807000 {
compatible = "brcm,bcm2835-pixelvalve2";
reg = <0x7e807000 0x100>;
diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index 0f3be55201a5..a3e06b680947 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -464,14 +464,6 @@
status = "disabled";
};
- vec: vec@7e806000 {
- compatible = "brcm,bcm2835-vec";
- reg = <0x7e806000 0x1000>;
- clocks = <&clocks BCM2835_CLOCK_VEC>;
- interrupts = <2 27>;
- status = "disabled";
- };
-
usb: usb@7e980000 {
compatible = "brcm,bcm2835-usb";
reg = <0x7e980000 0x10000>;
diff --git a/arch/arm/boot/dts/spear3xx.dtsi b/arch/arm/boot/dts/spear3xx.dtsi
index f266b7b03482..cc88ebe7a60c 100644
--- a/arch/arm/boot/dts/spear3xx.dtsi
+++ b/arch/arm/boot/dts/spear3xx.dtsi
@@ -47,7 +47,7 @@
};
gmac: eth@e0800000 {
- compatible = "st,spear600-gmac";
+ compatible = "snps,dwmac-3.40a";
reg = <0xe0800000 0x8000>;
interrupts = <23 22>;
interrupt-names = "macirq", "eth_wake_irq";
diff --git a/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts b/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts
index 8077f1716fbc..ecb91fb899ff 100644
--- a/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts
+++ b/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts
@@ -112,7 +112,7 @@
pinctrl-names = "default";
pinctrl-0 = <&gmac_rgmii_pins>;
phy-handle = <&phy1>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
status = "okay";
};
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index b4f74454f20f..33572998dbbe 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -197,7 +197,6 @@ CONFIG_PCI_EPF_TEST=m
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_OMAP_OCP2SCP=y
-CONFIG_SIMPLE_PM_BUS=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/arm/configs/oxnas_v6_defconfig b/arch/arm/configs/oxnas_v6_defconfig
index cae0db6b4eaf..de37f7e90999 100644
--- a/arch/arm/configs/oxnas_v6_defconfig
+++ b/arch/arm/configs/oxnas_v6_defconfig
@@ -46,7 +46,6 @@ CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_DMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=64
-CONFIG_SIMPLE_PM_BUS=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
CONFIG_MTD_BLOCK=y
diff --git a/arch/arm/configs/shmobile_defconfig b/arch/arm/configs/shmobile_defconfig
index d9a27e4e0914..18d2a960b2d2 100644
--- a/arch/arm/configs/shmobile_defconfig
+++ b/arch/arm/configs/shmobile_defconfig
@@ -40,7 +40,6 @@ CONFIG_PCI_RCAR_GEN2=y
CONFIG_PCIE_RCAR_HOST=y
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_SIMPLE_PM_BUS=y
CONFIG_MTD=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index 99175812d903..bb129b6d2366 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -7,6 +7,7 @@
#include <asm/hwcap.h>
#include <linux/clocksource.h>
#include <linux/init.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/types.h>
#include <clocksource/arm_arch_timer.h>
@@ -24,29 +25,35 @@ int arch_timer_arch_init(void);
* the code. At least it does so with a recent GCC (4.6.3).
*/
static __always_inline
-void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
+void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u64 val)
{
if (access == ARCH_TIMER_PHYS_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
- asm volatile("mcr p15, 0, %0, c14, c2, 1" : : "r" (val));
+ asm volatile("mcr p15, 0, %0, c14, c2, 1" : : "r" ((u32)val));
+ isb();
break;
- case ARCH_TIMER_REG_TVAL:
- asm volatile("mcr p15, 0, %0, c14, c2, 0" : : "r" (val));
+ case ARCH_TIMER_REG_CVAL:
+ asm volatile("mcrr p15, 2, %Q0, %R0, c14" : : "r" (val));
break;
+ default:
+ BUILD_BUG();
}
} else if (access == ARCH_TIMER_VIRT_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
- asm volatile("mcr p15, 0, %0, c14, c3, 1" : : "r" (val));
+ asm volatile("mcr p15, 0, %0, c14, c3, 1" : : "r" ((u32)val));
+ isb();
break;
- case ARCH_TIMER_REG_TVAL:
- asm volatile("mcr p15, 0, %0, c14, c3, 0" : : "r" (val));
+ case ARCH_TIMER_REG_CVAL:
+ asm volatile("mcrr p15, 3, %Q0, %R0, c14" : : "r" (val));
break;
+ default:
+ BUILD_BUG();
}
+ } else {
+ BUILD_BUG();
}
-
- isb();
}
static __always_inline
@@ -59,19 +66,19 @@ u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
case ARCH_TIMER_REG_CTRL:
asm volatile("mrc p15, 0, %0, c14, c2, 1" : "=r" (val));
break;
- case ARCH_TIMER_REG_TVAL:
- asm volatile("mrc p15, 0, %0, c14, c2, 0" : "=r" (val));
- break;
+ default:
+ BUILD_BUG();
}
} else if (access == ARCH_TIMER_VIRT_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
asm volatile("mrc p15, 0, %0, c14, c3, 1" : "=r" (val));
break;
- case ARCH_TIMER_REG_TVAL:
- asm volatile("mrc p15, 0, %0, c14, c3, 0" : "=r" (val));
- break;
+ default:
+ BUILD_BUG();
}
+ } else {
+ BUILD_BUG();
}
return val;
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 5e56288e343b..e68fb879e4f9 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -290,6 +290,7 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr
*/
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *);
+void flush_dcache_folio(struct folio *folio);
#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
static inline void flush_kernel_vmap_range(void *addr, int size)
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 9e6b97286307..6af68edfa53a 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -84,7 +84,7 @@ struct task_struct;
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
#define task_pt_regs(p) \
((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 084d1c07c2d0..36fbc3329252 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -176,6 +176,7 @@ extern int __get_user_64t_4(void *);
register unsigned long __l asm("r1") = __limit; \
register int __e asm("r0"); \
unsigned int __ua_flags = uaccess_save_and_enable(); \
+ int __tmp_e; \
switch (sizeof(*(__p))) { \
case 1: \
if (sizeof((x)) >= 8) \
@@ -203,9 +204,10 @@ extern int __get_user_64t_4(void *);
break; \
default: __e = __get_user_bad(); break; \
} \
+ __tmp_e = __e; \
uaccess_restore(__ua_flags); \
x = (typeof(*(p))) __r2; \
- __e; \
+ __tmp_e; \
})
#define get_user(x, p) \
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 241b73d64df7..3d0b6169ab86 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -38,14 +38,11 @@
*/
.macro irq_handler
#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER
- ldr r1, =handle_arch_irq
mov r0, sp
- badr lr, 9997f
- ldr pc, [r1]
+ bl generic_handle_arch_irq
#else
arch_irq_handler_default
#endif
-9997:
.endm
.macro pabt_helper
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 29070eb8df7d..3fc7f9750ce4 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -253,7 +253,7 @@ __create_page_tables:
add r0, r4, #KERNEL_OFFSET >> (SECTION_SHIFT - PMD_ORDER)
ldr r6, =(_end - 1)
adr_l r5, kernel_sec_start @ _pa(kernel_sec_start)
-#ifdef CONFIG_CPU_ENDIAN_BE8
+#if defined CONFIG_CPU_ENDIAN_BE8 || defined CONFIG_CPU_ENDIAN_BE32
str r8, [r5, #4] @ Save physical start of kernel (BE)
#else
str r8, [r5] @ Save physical start of kernel (LE)
@@ -266,7 +266,7 @@ __create_page_tables:
bls 1b
eor r3, r3, r7 @ Remove the MMU flags
adr_l r5, kernel_sec_end @ _pa(kernel_sec_end)
-#ifdef CONFIG_CPU_ENDIAN_BE8
+#if defined CONFIG_CPU_ENDIAN_BE8 || defined CONFIG_CPU_ENDIAN_BE32
str r3, [r5, #4] @ Save physical end of kernel (BE)
#else
str r3, [r5] @ Save physical end of kernel (LE)
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 20ab1e607522..b79975bd988c 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -63,11 +63,8 @@ int arch_show_interrupts(struct seq_file *p, int prec)
*/
void handle_IRQ(unsigned int irq, struct pt_regs *regs)
{
- struct pt_regs *old_regs = set_irq_regs(regs);
struct irq_desc *desc;
- irq_enter();
-
/*
* Some hardware gives randomly wrong interrupts. Rather
* than crashing, do something sensible.
@@ -81,9 +78,6 @@ void handle_IRQ(unsigned int irq, struct pt_regs *regs)
handle_irq_desc(desc);
else
ack_bad_irq(irq);
-
- irq_exit();
- set_irq_regs(old_regs);
}
/*
@@ -92,7 +86,15 @@ void handle_IRQ(unsigned int irq, struct pt_regs *regs)
asmlinkage void __exception_irq_entry
asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
{
+ struct pt_regs *old_regs;
+
+ irq_enter();
+ old_regs = set_irq_regs(regs);
+
handle_IRQ(irq, regs);
+
+ set_irq_regs(old_regs);
+ irq_exit();
}
void __init init_IRQ(void)
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 0e2d3051741e..96f577e59595 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -276,13 +276,11 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
return 0;
}
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
{
struct stackframe frame;
unsigned long stack_page;
int count = 0;
- if (!p || p == current || task_is_running(p))
- return 0;
frame.fp = thread_saved_fp(p);
frame.sp = thread_saved_sp(p);
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 4a7edc6e848f..195dff58bafc 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -136,7 +136,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
for (p = first, i = 0; i < 8 && p < top; i++, p += 4) {
if (p >= bottom && p < top) {
unsigned long val;
- if (get_kernel_nofault(val, (unsigned long *)p))
+ if (!get_kernel_nofault(val, (unsigned long *)p))
sprintf(str + i * 9, " %08lx", val);
else
sprintf(str + i * 9, " ????????");
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
index 50136828f5b5..f14c2360ea0b 100644
--- a/arch/arm/kernel/vmlinux-xip.lds.S
+++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -40,6 +40,10 @@ SECTIONS
ARM_DISCARD
*(.alt.smp.init)
*(.pv_table)
+#ifndef CONFIG_ARM_UNWIND
+ *(.ARM.exidx) *(.ARM.exidx.*)
+ *(.ARM.extab) *(.ARM.extab.*)
+#endif
}
. = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR);
@@ -172,7 +176,7 @@ ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA")
#endif
-#ifdef CONFIG_ARM_MPU
+#if defined(CONFIG_ARM_MPU) && !defined(CONFIG_COMPILE_TEST)
/*
* Due to PMSAv7 restriction on base address and size we have to
* enforce minimal alignment restrictions. It was seen that weaker
diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig
index 2890e61b2b46..bd3f82788ebc 100644
--- a/arch/arm/mach-bcm/Kconfig
+++ b/arch/arm/mach-bcm/Kconfig
@@ -161,7 +161,6 @@ config ARCH_BCM2835
select ARM_TIMER_SP804
select HAVE_ARM_ARCH_TIMER if ARCH_MULTI_V7
select BCM2835_TIMER
- select BRCMSTB_L2_IRQ
select PINCTRL
select PINCTRL_BCM2835
select MFD_CORE
@@ -209,9 +208,6 @@ config ARCH_BRCMSTB
select ARM_GIC
select ARM_ERRATA_798181 if SMP
select HAVE_ARM_ARCH_TIMER
- select BCM7038_L1_IRQ
- select BRCMSTB_L2_IRQ
- select BCM7120_L2_IRQ
select ZONE_DMA if ARM_LPAE
select SOC_BRCMSTB
select SOC_BUS
diff --git a/arch/arm/mach-imx/avic.c b/arch/arm/mach-imx/avic.c
index 21bce4049cec..cf6546ddc7a3 100644
--- a/arch/arm/mach-imx/avic.c
+++ b/arch/arm/mach-imx/avic.c
@@ -154,7 +154,7 @@ static void __exception_irq_entry avic_handle_irq(struct pt_regs *regs)
if (nivector == 0xffff)
break;
- handle_domain_irq(domain, nivector, regs);
+ generic_handle_domain_irq(domain, nivector);
} while (1);
}
diff --git a/arch/arm/mach-imx/src.c b/arch/arm/mach-imx/src.c
index 95fd1fbb0826..59a8e8cc4469 100644
--- a/arch/arm/mach-imx/src.c
+++ b/arch/arm/mach-imx/src.c
@@ -9,6 +9,7 @@
#include <linux/iopoll.h>
#include <linux/of.h>
#include <linux/of_address.h>
+#include <linux/platform_device.h>
#include <linux/reset-controller.h>
#include <linux/smp.h>
#include <asm/smp_plat.h>
@@ -81,11 +82,6 @@ static const struct reset_control_ops imx_src_ops = {
.reset = imx_src_reset_module,
};
-static struct reset_controller_dev imx_reset_controller = {
- .ops = &imx_src_ops,
- .nr_resets = ARRAY_SIZE(sw_reset_bits),
-};
-
static void imx_gpcv2_set_m_core_pgc(bool enable, u32 offset)
{
writel_relaxed(enable, gpc_base + offset);
@@ -177,10 +173,6 @@ void __init imx_src_init(void)
src_base = of_iomap(np, 0);
WARN_ON(!src_base);
- imx_reset_controller.of_node = np;
- if (IS_ENABLED(CONFIG_RESET_CONTROLLER))
- reset_controller_register(&imx_reset_controller);
-
/*
* force warm reset sources to generate cold reset
* for a more reliable restart
@@ -214,3 +206,33 @@ void __init imx7_src_init(void)
if (!gpc_base)
return;
}
+
+static const struct of_device_id imx_src_dt_ids[] = {
+ { .compatible = "fsl,imx51-src" },
+ { /* sentinel */ }
+};
+
+static int imx_src_probe(struct platform_device *pdev)
+{
+ struct reset_controller_dev *rcdev;
+
+ rcdev = devm_kzalloc(&pdev->dev, sizeof(*rcdev), GFP_KERNEL);
+ if (!rcdev)
+ return -ENOMEM;
+
+ rcdev->ops = &imx_src_ops;
+ rcdev->dev = &pdev->dev;
+ rcdev->of_node = pdev->dev.of_node;
+ rcdev->nr_resets = ARRAY_SIZE(sw_reset_bits);
+
+ return devm_reset_controller_register(&pdev->dev, rcdev);
+}
+
+static struct platform_driver imx_src_driver = {
+ .driver = {
+ .name = "imx-src",
+ .of_match_table = imx_src_dt_ids,
+ },
+ .probe = imx_src_probe,
+};
+builtin_platform_driver(imx_src_driver);
diff --git a/arch/arm/mach-imx/tzic.c b/arch/arm/mach-imx/tzic.c
index 479a01bdac56..8b3d98d288d9 100644
--- a/arch/arm/mach-imx/tzic.c
+++ b/arch/arm/mach-imx/tzic.c
@@ -134,7 +134,7 @@ static void __exception_irq_entry tzic_handle_irq(struct pt_regs *regs)
while (stat) {
handled = 1;
irqofs = fls(stat) - 1;
- handle_domain_irq(domain, irqofs + i * 32, regs);
+ generic_handle_domain_irq(domain, irqofs + i * 32);
stat &= ~(1 << irqofs);
}
}
diff --git a/arch/arm/mach-omap1/irq.c b/arch/arm/mach-omap1/irq.c
index b11edc8a46f0..ee6a93083154 100644
--- a/arch/arm/mach-omap1/irq.c
+++ b/arch/arm/mach-omap1/irq.c
@@ -165,7 +165,7 @@ asmlinkage void __exception_irq_entry omap1_handle_irq(struct pt_regs *regs)
}
irq:
if (irqnr)
- handle_domain_irq(domain, irqnr, regs);
+ generic_handle_domain_irq(domain, irqnr);
else
break;
} while (irqnr);
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index 7f13adf26e61..02c253de9b6e 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -112,7 +112,6 @@ config ARCH_OMAP2PLUS
select PM_GENERIC_DOMAINS
select PM_GENERIC_DOMAINS_OF
select RESET_CONTROLLER
- select SIMPLE_PM_BUS
select SOC_BUS
select TI_SYSC
select OMAP_IRQCHIP
diff --git a/arch/arm/mach-s3c/irq-s3c24xx.c b/arch/arm/mach-s3c/irq-s3c24xx.c
index 3edc5f614eef..45dfd546e6fa 100644
--- a/arch/arm/mach-s3c/irq-s3c24xx.c
+++ b/arch/arm/mach-s3c/irq-s3c24xx.c
@@ -354,7 +354,7 @@ static inline int s3c24xx_handle_intc(struct s3c_irq_intc *intc,
if (!(pnd & (1 << offset)))
offset = __ffs(pnd);
- handle_domain_irq(intc->domain, intc_offset + offset, regs);
+ generic_handle_domain_irq(intc->domain, intc_offset + offset);
return true;
}
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index e2c743aa2eb2..d9f7dfe2a7ed 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -340,6 +340,7 @@ ENTRY(\name\()_cache_fns)
.macro define_tlb_functions name:req, flags_up:req, flags_smp
.type \name\()_tlb_fns, #object
+ .align 2
ENTRY(\name\()_tlb_fns)
.long \name\()_flush_user_tlb_range
.long \name\()_flush_kern_tlb_range
diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index 27e0af78e88b..9d8634e2f12f 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -439,7 +439,7 @@ static struct undef_hook kprobes_arm_break_hook = {
#endif /* !CONFIG_THUMB2_KERNEL */
-int __init arch_init_kprobes()
+int __init arch_init_kprobes(void)
{
arm_probes_decode_init();
#ifdef CONFIG_THUMB2_KERNEL
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index e842209e135d..543100151f2b 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -462,3 +462,4 @@
446 common landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
+449 common futex_waitv sys_futex_waitv
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index fee914c716aa..bc8d3702f4ed 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -133,7 +133,6 @@ config ARM64
select GENERIC_TIME_VSYSCALL
select GENERIC_GETTIMEOFDAY
select GENERIC_VDSO_TIME_NS
- select HANDLE_DOMAIN_IRQ
select HARDIRQS_SW_RESEND
select HAVE_MOVE_PMD
select HAVE_MOVE_PUD
@@ -989,6 +988,15 @@ config SCHED_MC
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
+config SCHED_CLUSTER
+ bool "Cluster scheduler support"
+ help
+ Cluster scheduler support improves the CPU scheduler's decision
+ making when dealing with machines that have clusters of CPUs.
+ Cluster usually means a couple of CPUs which are placed closely
+ by sharing mid-level caches, last-level cache tags or internal
+ busses.
+
config SCHED_SMT
bool "SMT scheduler support"
help
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index b0ce18d4cc98..96a81967c3bf 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -44,7 +44,6 @@ config ARCH_BCM2835
select ARM_AMBA
select ARM_GIC
select ARM_TIMER_SP804
- select BRCMSTB_L2_IRQ
help
This enables support for the Broadcom BCM2837 and BCM2711 SoC.
These SoCs are used in the Raspberry Pi 3 and 4 devices.
@@ -82,8 +81,6 @@ config ARCH_BITMAIN
config ARCH_BRCMSTB
bool "Broadcom Set-Top-Box SoCs"
select ARCH_HAS_RESET_CONTROLLER
- select BCM7038_L1_IRQ
- select BRCMSTB_L2_IRQ
select GENERIC_IRQ_CHIP
select PINCTRL
help
@@ -167,7 +164,6 @@ config ARCH_MEDIATEK
config ARCH_MESON
bool "Amlogic Platforms"
select COMMON_CLK
- select MESON_IRQ_GPIO
help
This enables support for the arm64 based Amlogic SoCs
such as the s905, S905X/D, S912, A113X/D or S905X/D2
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts
index 02f8e72f0cad..05486cccee1c 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts
@@ -75,7 +75,7 @@
pinctrl-0 = <&emac_rgmii_pins>;
phy-supply = <&reg_gmac_3v3>;
phy-handle = <&ext_rgmii_phy>;
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
status = "okay";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts
index d17abb515835..e99e7644ff39 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts
@@ -70,7 +70,9 @@
regulator-name = "rst-usb-eth2";
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_usb_eth2>;
- gpio = <&gpio3 2 GPIO_ACTIVE_LOW>;
+ gpio = <&gpio3 2 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ regulator-always-on;
};
reg_vdd_5v: regulator-5v {
@@ -95,7 +97,7 @@
clocks = <&osc_can>;
interrupt-parent = <&gpio4>;
interrupts = <28 IRQ_TYPE_EDGE_FALLING>;
- spi-max-frequency = <100000>;
+ spi-max-frequency = <10000000>;
vdd-supply = <&reg_vdd_3v3>;
xceiver-supply = <&reg_vdd_5v>;
};
@@ -111,7 +113,7 @@
&fec1 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_enet>;
- phy-connection-type = "rgmii";
+ phy-connection-type = "rgmii-rxid";
phy-handle = <&ethphy>;
status = "okay";
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi
index 9db9b90bf2bc..42bbbb3f532b 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi
@@ -91,10 +91,12 @@
reg_vdd_soc: BUCK1 {
regulator-name = "buck1";
regulator-min-microvolt = <800000>;
- regulator-max-microvolt = <900000>;
+ regulator-max-microvolt = <850000>;
regulator-boot-on;
regulator-always-on;
regulator-ramp-delay = <3125>;
+ nxp,dvs-run-voltage = <850000>;
+ nxp,dvs-standby-voltage = <800000>;
};
reg_vdd_arm: BUCK2 {
@@ -111,7 +113,7 @@
reg_vdd_dram: BUCK3 {
regulator-name = "buck3";
regulator-min-microvolt = <850000>;
- regulator-max-microvolt = <900000>;
+ regulator-max-microvolt = <950000>;
regulator-boot-on;
regulator-always-on;
};
@@ -150,7 +152,7 @@
reg_vdd_snvs: LDO2 {
regulator-name = "ldo2";
- regulator-min-microvolt = <850000>;
+ regulator-min-microvolt = <800000>;
regulator-max-microvolt = <900000>;
regulator-boot-on;
regulator-always-on;
diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi
index 8c15d9fed08f..d12e4cbfc852 100644
--- a/arch/arm64/boot/dts/qcom/sm8250.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi
@@ -2590,9 +2590,10 @@
power-domains = <&dispcc MDSS_GDSC>;
clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
+ <&gcc GCC_DISP_HF_AXI_CLK>,
<&gcc GCC_DISP_SF_AXI_CLK>,
<&dispcc DISP_CC_MDSS_MDP_CLK>;
- clock-names = "iface", "nrt_bus", "core";
+ clock-names = "iface", "bus", "nrt_bus", "core";
assigned-clocks = <&dispcc DISP_CC_MDSS_MDP_CLK>;
assigned-clock-rates = <460000000>;
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 156d96afbbfc..545197bc0501 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -245,7 +245,6 @@ CONFIG_DEVTMPFS_MOUNT=y
CONFIG_FW_LOADER_USER_HELPER=y
CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y
CONFIG_HISILICON_LPC=y
-CONFIG_SIMPLE_PM_BUS=y
CONFIG_FSL_MC_BUS=y
CONFIG_TEGRA_ACONNECT=m
CONFIG_GNSS=m
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index 88d20f04c64a..519ac1f7f859 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -32,7 +32,7 @@
({ \
const struct arch_timer_erratum_workaround *__wa; \
__wa = __this_cpu_read(timer_unstable_counter_workaround); \
- (__wa && __wa->h) ? __wa->h : arch_timer_##h; \
+ (__wa && __wa->h) ? ({ isb(); __wa->h;}) : arch_timer_##h; \
})
#else
@@ -52,8 +52,6 @@ struct arch_timer_erratum_workaround {
enum arch_timer_erratum_match_type match_type;
const void *id;
const char *desc;
- u32 (*read_cntp_tval_el0)(void);
- u32 (*read_cntv_tval_el0)(void);
u64 (*read_cntpct_el0)(void);
u64 (*read_cntvct_el0)(void);
int (*set_next_event_phys)(unsigned long, struct clock_event_device *);
@@ -64,24 +62,15 @@ struct arch_timer_erratum_workaround {
DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *,
timer_unstable_counter_workaround);
-/* inline sysreg accessors that make erratum_handler() work */
-static inline notrace u32 arch_timer_read_cntp_tval_el0(void)
-{
- return read_sysreg(cntp_tval_el0);
-}
-
-static inline notrace u32 arch_timer_read_cntv_tval_el0(void)
-{
- return read_sysreg(cntv_tval_el0);
-}
-
static inline notrace u64 arch_timer_read_cntpct_el0(void)
{
+ isb();
return read_sysreg(cntpct_el0);
}
static inline notrace u64 arch_timer_read_cntvct_el0(void)
{
+ isb();
return read_sysreg(cntvct_el0);
}
@@ -102,51 +91,58 @@ static inline notrace u64 arch_timer_read_cntvct_el0(void)
* the code.
*/
static __always_inline
-void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u32 val)
+void arch_timer_reg_write_cp15(int access, enum arch_timer_reg reg, u64 val)
{
if (access == ARCH_TIMER_PHYS_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
write_sysreg(val, cntp_ctl_el0);
+ isb();
break;
- case ARCH_TIMER_REG_TVAL:
- write_sysreg(val, cntp_tval_el0);
+ case ARCH_TIMER_REG_CVAL:
+ write_sysreg(val, cntp_cval_el0);
break;
+ default:
+ BUILD_BUG();
}
} else if (access == ARCH_TIMER_VIRT_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
write_sysreg(val, cntv_ctl_el0);
+ isb();
break;
- case ARCH_TIMER_REG_TVAL:
- write_sysreg(val, cntv_tval_el0);
+ case ARCH_TIMER_REG_CVAL:
+ write_sysreg(val, cntv_cval_el0);
break;
+ default:
+ BUILD_BUG();
}
+ } else {
+ BUILD_BUG();
}
-
- isb();
}
static __always_inline
-u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
+u64 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
{
if (access == ARCH_TIMER_PHYS_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
return read_sysreg(cntp_ctl_el0);
- case ARCH_TIMER_REG_TVAL:
- return arch_timer_reg_read_stable(cntp_tval_el0);
+ default:
+ BUILD_BUG();
}
} else if (access == ARCH_TIMER_VIRT_ACCESS) {
switch (reg) {
case ARCH_TIMER_REG_CTRL:
return read_sysreg(cntv_ctl_el0);
- case ARCH_TIMER_REG_TVAL:
- return arch_timer_reg_read_stable(cntv_tval_el0);
+ default:
+ BUILD_BUG();
}
}
- BUG();
+ BUILD_BUG();
+ unreachable();
}
static inline u32 arch_timer_get_cntfrq(void)
@@ -169,7 +165,6 @@ static __always_inline u64 __arch_counter_get_cntpct_stable(void)
{
u64 cnt;
- isb();
cnt = arch_timer_reg_read_stable(cntpct_el0);
arch_counter_enforce_ordering(cnt);
return cnt;
@@ -189,7 +184,6 @@ static __always_inline u64 __arch_counter_get_cntvct_stable(void)
{
u64 cnt;
- isb();
cnt = arch_timer_reg_read_stable(cntvct_el0);
arch_counter_enforce_ordering(cnt);
return cnt;
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index ee2bdc1b9f5b..55ca034238ea 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -257,7 +257,7 @@ struct task_struct;
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
void update_sctlr_el1(u64 sctlr);
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 3cb206aea3db..6bdb5f5db438 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -38,7 +38,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 449
+#define __NR_compat_syscalls 450
#endif
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 844f6ae58662..41ea1195e44b 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -903,6 +903,8 @@ __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
#define __NR_process_mrelease 448
__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
+#define __NR_futex_waitv 449
+__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 32f9796c4ffe..f7408edf8571 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -17,6 +17,7 @@
#include <asm/daifflags.h>
#include <asm/esr.h>
#include <asm/exception.h>
+#include <asm/irq_regs.h>
#include <asm/kprobes.h>
#include <asm/mmu.h>
#include <asm/processor.h>
@@ -219,22 +220,6 @@ static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
lockdep_hardirqs_on(CALLER_ADDR0);
}
-static void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
-{
- if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
- arm64_enter_nmi(regs);
- else
- enter_from_kernel_mode(regs);
-}
-
-static void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
-{
- if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
- arm64_exit_nmi(regs);
- else
- exit_to_kernel_mode(regs);
-}
-
static void __sched arm64_preempt_schedule_irq(void)
{
lockdep_assert_irqs_disabled();
@@ -263,10 +248,14 @@ static void __sched arm64_preempt_schedule_irq(void)
static void do_interrupt_handler(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
if (on_thread_stack())
call_on_irq_stack(regs, handler);
else
handler(regs);
+
+ set_irq_regs(old_regs);
}
extern void (*handle_arch_irq)(struct pt_regs *);
@@ -432,13 +421,22 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
}
}
-static void noinstr el1_interrupt(struct pt_regs *regs,
- void (*handler)(struct pt_regs *))
+static __always_inline void __el1_pnmi(struct pt_regs *regs,
+ void (*handler)(struct pt_regs *))
{
- write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
+ arm64_enter_nmi(regs);
+ do_interrupt_handler(regs, handler);
+ arm64_exit_nmi(regs);
+}
+
+static __always_inline void __el1_irq(struct pt_regs *regs,
+ void (*handler)(struct pt_regs *))
+{
+ enter_from_kernel_mode(regs);
- enter_el1_irq_or_nmi(regs);
+ irq_enter_rcu();
do_interrupt_handler(regs, handler);
+ irq_exit_rcu();
/*
* Note: thread_info::preempt_count includes both thread_info::count
@@ -449,7 +447,17 @@ static void noinstr el1_interrupt(struct pt_regs *regs,
READ_ONCE(current_thread_info()->preempt_count) == 0)
arm64_preempt_schedule_irq();
- exit_el1_irq_or_nmi(regs);
+ exit_to_kernel_mode(regs);
+}
+static void noinstr el1_interrupt(struct pt_regs *regs,
+ void (*handler)(struct pt_regs *))
+{
+ write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
+
+ if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
+ __el1_pnmi(regs, handler);
+ else
+ __el1_irq(regs, handler);
}
asmlinkage void noinstr el1h_64_irq_handler(struct pt_regs *regs)
@@ -667,7 +675,9 @@ static void noinstr el0_interrupt(struct pt_regs *regs,
if (regs->pc & BIT(55))
arm64_apply_bp_hardening();
+ irq_enter_rcu();
do_interrupt_handler(regs, handler);
+ irq_exit_rcu();
exit_to_user_mode(regs);
}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 40adb8cdbf5a..aacf2f5559a8 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -528,13 +528,11 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
return last;
}
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
{
struct stackframe frame;
unsigned long stack_page, ret = 0;
int count = 0;
- if (!p || p == current || task_is_running(p))
- return 0;
stack_page = (unsigned long)try_get_task_stack(p);
if (!stack_page)
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 4dd14a6620c1..9ab78ad826e2 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -103,6 +103,8 @@ int __init parse_acpi_topology(void)
cpu_topology[cpu].thread_id = -1;
cpu_topology[cpu].core_id = topology_id;
}
+ topology_id = find_acpi_cpu_topology_cluster(cpu);
+ cpu_topology[cpu].cluster_id = topology_id;
topology_id = find_acpi_cpu_topology_package(cpu);
cpu_topology[cpu].package_id = topology_id;
diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
index fb0f523d1492..0a048dc06a7d 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
@@ -24,6 +24,7 @@ struct hyp_pool {
/* Allocation */
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order);
+void hyp_split_page(struct hyp_page *page);
void hyp_get_page(struct hyp_pool *pool, void *addr);
void hyp_put_page(struct hyp_pool *pool, void *addr);
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index bacd493a4eac..34eeb524b686 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -35,7 +35,18 @@ const u8 pkvm_hyp_id = 1;
static void *host_s2_zalloc_pages_exact(size_t size)
{
- return hyp_alloc_pages(&host_s2_pool, get_order(size));
+ void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
+
+ hyp_split_page(hyp_virt_to_page(addr));
+
+ /*
+ * The size of concatenated PGDs is always a power of two of PAGE_SIZE,
+ * so there should be no need to free any of the tail pages to make the
+ * allocation exact.
+ */
+ WARN_ON(size != (PAGE_SIZE << get_order(size)));
+
+ return addr;
}
static void *host_s2_zalloc_page(void *pool)
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index 41fc25bdfb34..0bd7701ad1df 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -152,6 +152,7 @@ static inline void hyp_page_ref_inc(struct hyp_page *p)
static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
{
+ BUG_ON(!p->refcount);
p->refcount--;
return (p->refcount == 0);
}
@@ -193,6 +194,20 @@ void hyp_get_page(struct hyp_pool *pool, void *addr)
hyp_spin_unlock(&pool->lock);
}
+void hyp_split_page(struct hyp_page *p)
+{
+ unsigned short order = p->order;
+ unsigned int i;
+
+ p->order = 0;
+ for (i = 1; i < (1 << order); i++) {
+ struct hyp_page *tail = p + i;
+
+ tail->order = 0;
+ hyp_set_page_refcounted(tail);
+ }
+}
+
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order)
{
unsigned short i = order;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 1a94a7ca48f2..69bd1732a299 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1529,8 +1529,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* when updating the PG_mte_tagged page flag, see
* sanitise_mte_tags for more details.
*/
- if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED)
- return -EINVAL;
+ if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) {
+ ret = -EINVAL;
+ break;
+ }
if (vma->vm_flags & VM_PFNMAP) {
/* IO region dirty page logging not allowed */
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 23505fc35324..a8158c948966 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -43,7 +43,7 @@ void __init arm64_hugetlb_cma_reserve(void)
#ifdef CONFIG_ARM64_4K_PAGES
order = PUD_SHIFT - PAGE_SHIFT;
#else
- order = CONT_PMD_SHIFT + PMD_SHIFT - PAGE_SHIFT;
+ order = CONT_PMD_SHIFT - PAGE_SHIFT;
#endif
/*
* HugeTLB CMA reservation is required for gigantic
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 41c23f474ea6..803e7773fa86 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1136,6 +1136,11 @@ out:
return prog;
}
+u64 bpf_jit_alloc_exec_limit(void)
+{
+ return BPF_JIT_REGION_SIZE;
+}
+
void *bpf_jit_alloc_exec(unsigned long size)
{
return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 9d4d898df76b..aed2b3e734ee 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -8,7 +8,7 @@ config CSKY
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_QUEUED_RWLOCKS
- select ARCH_WANT_FRAME_POINTERS if !CPU_CK610
+ select ARCH_WANT_FRAME_POINTERS if !CPU_CK610 && $(cc-option,-mbacktrace)
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
select COMMON_CLK
select CLKSRC_MMIO
@@ -17,7 +17,6 @@ config CSKY
select CSKY_APB_INTC
select DMA_DIRECT_REMAP
select IRQ_DOMAIN
- select HANDLE_DOMAIN_IRQ
select DW_APB_TIMER_OF
select GENERIC_IOREMAP
select GENERIC_LIB_ASHLDI3
@@ -241,6 +240,7 @@ endchoice
menuconfig HAVE_TCM
bool "Tightly-Coupled/Sram Memory"
+ depends on !COMPILE_TEST
help
The implementation are not only used by TCM (Tightly-Coupled Meory)
but also used by sram on SOC bus. It follow existed linux tcm
diff --git a/arch/csky/include/asm/bitops.h b/arch/csky/include/asm/bitops.h
index 91818787d860..02b72a000767 100644
--- a/arch/csky/include/asm/bitops.h
+++ b/arch/csky/include/asm/bitops.h
@@ -74,7 +74,6 @@ static __always_inline unsigned long __fls(unsigned long x)
* bug fix, why only could use atomic!!!!
*/
#include <asm-generic/bitops/non-atomic.h>
-#define __clear_bit(nr, vaddr) clear_bit(nr, vaddr)
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic.h>
diff --git a/arch/csky/include/asm/processor.h b/arch/csky/include/asm/processor.h
index 9e933021fe8e..817dd60ff152 100644
--- a/arch/csky/include/asm/processor.h
+++ b/arch/csky/include/asm/processor.h
@@ -81,7 +81,7 @@ static inline void release_thread(struct task_struct *dead_task)
extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc)
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->usp)
diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S
index 00e3c8ebf9b8..a4ababf25e24 100644
--- a/arch/csky/kernel/entry.S
+++ b/arch/csky/kernel/entry.S
@@ -249,7 +249,7 @@ ENTRY(csky_irq)
mov a0, sp
- jbsr csky_do_IRQ
+ jbsr generic_handle_arch_irq
jmpi ret_from_exception
diff --git a/arch/csky/kernel/irq.c b/arch/csky/kernel/irq.c
index 03a1930f1cbb..fcdaf3156286 100644
--- a/arch/csky/kernel/irq.c
+++ b/arch/csky/kernel/irq.c
@@ -15,8 +15,3 @@ void __init init_IRQ(void)
setup_smp_ipi();
#endif
}
-
-asmlinkage void __irq_entry csky_do_IRQ(struct pt_regs *regs)
-{
- handle_arch_irq(regs);
-}
diff --git a/arch/csky/kernel/ptrace.c b/arch/csky/kernel/ptrace.c
index 0105ac81b432..1a5f54e0d272 100644
--- a/arch/csky/kernel/ptrace.c
+++ b/arch/csky/kernel/ptrace.c
@@ -99,7 +99,8 @@ static int gpr_set(struct task_struct *target,
if (ret)
return ret;
- regs.sr = task_pt_regs(target)->sr;
+ /* BIT(0) of regs.sr is Condition Code/Carry bit */
+ regs.sr = (regs.sr & BIT(0)) | (task_pt_regs(target)->sr & ~BIT(0));
#ifdef CONFIG_CPU_HAS_HILO
regs.dcsr = task_pt_regs(target)->dcsr;
#endif
diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c
index bc4238b9f709..c7b763d2f526 100644
--- a/arch/csky/kernel/signal.c
+++ b/arch/csky/kernel/signal.c
@@ -52,10 +52,14 @@ static long restore_sigcontext(struct pt_regs *regs,
struct sigcontext __user *sc)
{
int err = 0;
+ unsigned long sr = regs->sr;
/* sc_pt_regs is structured the same as the start of pt_regs */
err |= __copy_from_user(regs, &sc->sc_pt_regs, sizeof(struct pt_regs));
+ /* BIT(0) of regs->sr is Condition Code/Carry bit */
+ regs->sr = (sr & ~1) | (regs->sr & 1);
+
/* Restore the floating-point state. */
err |= restore_fpu_state(sc);
diff --git a/arch/csky/kernel/stacktrace.c b/arch/csky/kernel/stacktrace.c
index 1b280ef08004..9f78f5d21511 100644
--- a/arch/csky/kernel/stacktrace.c
+++ b/arch/csky/kernel/stacktrace.c
@@ -111,12 +111,11 @@ static bool save_wchan(unsigned long pc, void *arg)
return false;
}
-unsigned long get_wchan(struct task_struct *task)
+unsigned long __get_wchan(struct task_struct *task)
{
unsigned long pc = 0;
- if (likely(task && task != current && !task_is_running(task)))
- walk_stackframe(task, NULL, save_wchan, &pc);
+ walk_stackframe(task, NULL, save_wchan, &pc);
return pc;
}
diff --git a/arch/h8300/include/asm/irq.h b/arch/h8300/include/asm/irq.h
index 5fc5b436dde9..776cf06d7a59 100644
--- a/arch/h8300/include/asm/irq.h
+++ b/arch/h8300/include/asm/irq.h
@@ -2,8 +2,6 @@
#ifndef _H8300_IRQ_H_
#define _H8300_IRQ_H_
-#include <linux/irqchip.h>
-
#if defined(CONFIG_CPU_H8300H)
#define NR_IRQS 64
#define IRQ_CHIP h8300h_irq_chip
diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h
index a060b41b2d31..141a23eb62b7 100644
--- a/arch/h8300/include/asm/processor.h
+++ b/arch/h8300/include/asm/processor.h
@@ -105,7 +105,7 @@ static inline void release_thread(struct task_struct *dead_task)
{
}
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) \
({ \
diff --git a/arch/h8300/kernel/irq.c b/arch/h8300/kernel/irq.c
index 834e4d7b1bcf..8ad6d702cd0b 100644
--- a/arch/h8300/kernel/irq.c
+++ b/arch/h8300/kernel/irq.c
@@ -8,6 +8,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
+#include <linux/irqchip.h>
#include <linux/irqdomain.h>
#include <linux/of_irq.h>
#include <asm/traps.h>
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 2ac27e4248a4..8833fa4f5d51 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -128,15 +128,12 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
return 0;
}
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
{
unsigned long fp, pc;
unsigned long stack_page;
int count = 0;
- if (!p || p == current || task_is_running(p))
- return 0;
-
stack_page = (unsigned long)p;
fp = ((struct pt_regs *)p->thread.ksp)->er6;
do {
diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h
index 9f0cc99420be..615f7e49968e 100644
--- a/arch/hexagon/include/asm/processor.h
+++ b/arch/hexagon/include/asm/processor.h
@@ -64,7 +64,7 @@ struct thread_struct {
extern void release_thread(struct task_struct *dead_task);
/* Get wait channel for task P. */
-extern unsigned long get_wchan(struct task_struct *p);
+extern unsigned long __get_wchan(struct task_struct *p);
/* The following stuff is pretty HEXAGON specific. */
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index 6a6835fb4242..232dfd8956aa 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -130,13 +130,11 @@ void flush_thread(void)
* is an identification of the point at which the scheduler
* was invoked by a blocked thread.
*/
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
{
unsigned long fp, pc;
unsigned long stack_page;
int count = 0;
- if (!p || p == current || task_is_running(p))
- return 0;
stack_page = (unsigned long)task_stack_page(p);
fp = ((struct hexagon_switch_stack *)p->thread.switch_sp)->fp;
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index 2d8bcdc27d7f..45365c2ef598 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -330,7 +330,7 @@ struct task_struct;
#define release_thread(dead_task)
/* Get wait channel for task P. */
-extern unsigned long get_wchan (struct task_struct *p);
+extern unsigned long __get_wchan (struct task_struct *p);
/* Return instruction pointer of blocked task TSK. */
#define KSTK_EIP(tsk) \
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 864775970c50..0e5c1ad3239c 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -124,18 +124,13 @@ static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
__ticket_spin_unlock(lock);
}
-static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
- unsigned long flags)
-{
- arch_spin_lock(lock);
-}
-#define arch_spin_lock_flags arch_spin_lock_flags
-
#ifdef ASM_SUPPORTED
static __always_inline void
-arch_read_lock_flags(arch_rwlock_t *lock, unsigned long flags)
+arch_read_lock(arch_rwlock_t *lock)
{
+ unsigned long flags = 0;
+
__asm__ __volatile__ (
"tbit.nz p6, p0 = %1,%2\n"
"br.few 3f\n"
@@ -157,13 +152,8 @@ arch_read_lock_flags(arch_rwlock_t *lock, unsigned long flags)
: "p6", "p7", "r2", "memory");
}
-#define arch_read_lock_flags arch_read_lock_flags
-#define arch_read_lock(lock) arch_read_lock_flags(lock, 0)
-
#else /* !ASM_SUPPORTED */
-#define arch_read_lock_flags(rw, flags) arch_read_lock(rw)
-
#define arch_read_lock(rw) \
do { \
arch_rwlock_t *__read_lock_ptr = (rw); \
@@ -186,8 +176,10 @@ do { \
#ifdef ASM_SUPPORTED
static __always_inline void
-arch_write_lock_flags(arch_rwlock_t *lock, unsigned long flags)
+arch_write_lock(arch_rwlock_t *lock)
{
+ unsigned long flags = 0;
+
__asm__ __volatile__ (
"tbit.nz p6, p0 = %1, %2\n"
"mov ar.ccv = r0\n"
@@ -210,9 +202,6 @@ arch_write_lock_flags(arch_rwlock_t *lock, unsigned long flags)
: "ar.ccv", "p6", "p7", "r2", "r29", "memory");
}
-#define arch_write_lock_flags arch_write_lock_flags
-#define arch_write_lock(rw) arch_write_lock_flags(rw, 0)
-
#define arch_write_trylock(rw) \
({ \
register long result; \
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index e56d63f4abf9..834df24a88f1 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -523,15 +523,12 @@ exit_thread (struct task_struct *tsk)
}
unsigned long
-get_wchan (struct task_struct *p)
+__get_wchan (struct task_struct *p)
{
struct unw_frame_info info;
unsigned long ip;
int count = 0;
- if (!p || p == current || task_is_running(p))
- return 0;
-
/*
* Note: p may not be a blocked task (it could be current or
* another process running on some other CPU. Rather than
diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index 9a8394e96388..9c57b245dc12 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -58,7 +58,7 @@ struct nfhd_device {
struct gendisk *disk;
};
-static blk_qc_t nfhd_submit_bio(struct bio *bio)
+static void nfhd_submit_bio(struct bio *bio)
{
struct nfhd_device *dev = bio->bi_bdev->bd_disk->private_data;
struct bio_vec bvec;
@@ -76,7 +76,6 @@ static blk_qc_t nfhd_submit_bio(struct bio *bio)
sec += len;
}
bio_endio(bio);
- return BLK_QC_T_NONE;
}
static int nfhd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
@@ -100,6 +99,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
{
struct nfhd_device *dev;
int dev_id = id - NFHD_DEV_OFFSET;
+ int err = -ENOMEM;
pr_info("nfhd%u: found device with %u blocks (%u bytes)\n", dev_id,
blocks, bsize);
@@ -130,16 +130,20 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
sprintf(dev->disk->disk_name, "nfhd%u", dev_id);
set_capacity(dev->disk, (sector_t)blocks * (bsize / 512));
blk_queue_logical_block_size(dev->disk->queue, bsize);
- add_disk(dev->disk);
+ err = add_disk(dev->disk);
+ if (err)
+ goto out_cleanup_disk;
list_add_tail(&dev->list, &nfhd_list);
return 0;
+out_cleanup_disk:
+ blk_cleanup_disk(dev->disk);
free_dev:
kfree(dev);
out:
- return -ENOMEM;
+ return err;
}
static int __init nfhd_init(void)
diff --git a/arch/m68k/include/asm/cacheflush_mm.h b/arch/m68k/include/asm/cacheflush_mm.h
index 1ac55e7b47f0..8ab46625ddd3 100644
--- a/arch/m68k/include/asm/cacheflush_mm.h
+++ b/arch/m68k/include/asm/cacheflush_mm.h
@@ -250,6 +250,7 @@ static inline void __flush_page_to_ram(void *vaddr)
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
#define flush_dcache_page(page) __flush_page_to_ram(page_address(page))
+void flush_dcache_folio(struct folio *folio);
#define flush_dcache_mmap_lock(mapping) do { } while (0)
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
#define flush_icache_page(vma, page) __flush_page_to_ram(page_address(page))
diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h
index f4d82c619a5c..ffeda9aa526a 100644
--- a/arch/m68k/include/asm/processor.h
+++ b/arch/m68k/include/asm/processor.h
@@ -150,7 +150,7 @@ static inline void release_thread(struct task_struct *dead_task)
{
}
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) \
({ \
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 1ab692b952cd..a6030dbaa089 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -263,13 +263,11 @@ int dump_fpu (struct pt_regs *regs, struct user_m68kfp_struct *fpu)
}
EXPORT_SYMBOL(dump_fpu);
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
{
unsigned long fp, pc;
unsigned long stack_page;
int count = 0;
- if (!p || p == current || task_is_running(p))
- return 0;
stack_page = (unsigned long)task_stack_page(p);
fp = ((struct switch_stack *)p->thread.ksp)->a6;
diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h
index 06c6e493590a..7e9e92670df3 100644
--- a/arch/microblaze/include/asm/processor.h
+++ b/arch/microblaze/include/asm/processor.h
@@ -68,7 +68,7 @@ static inline void release_thread(struct task_struct *dead_task)
{
}
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
/* The size allocated for kernel stacks. This _must_ be a power of two! */
# define KERNEL_STACK_SIZE 0x2000
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 62aa237180b6..5e2b91c1e8ce 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -112,7 +112,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
return 0;
}
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
{
/* TBD (used by procfs) */
return 0;
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 6b8f591c5054..95fc35669860 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -47,7 +47,6 @@ config MIPS
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
select GUP_GET_PTE_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT
- select HANDLE_DOMAIN_IRQ
select HAVE_ARCH_COMPILER_H
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KGDB if MIPS_FP_SUPPORT
@@ -1782,6 +1781,7 @@ config CPU_BMIPS
select CPU_HAS_PREFETCH
select CPU_SUPPORTS_CPUFREQ
select MIPS_EXTERNAL_TIMER
+ select GENERIC_IRQ_MIGRATION if HOTPLUG_CPU
help
Support for BMIPS32/3300/4350/4380 and BMIPS5000 processors.
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index be5d4afcd30f..844f882096e6 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -2609,7 +2609,10 @@ static void octeon_irq_ciu3_ip2(void)
else
hw = intsn;
- ret = handle_domain_irq(domain, hw, NULL);
+ irq_enter();
+ ret = generic_handle_domain_irq(domain, hw);
+ irq_exit();
+
if (ret < 0) {
union cvmx_ciu3_iscx_w1c isc_w1c;
u64 isc_w1c_addr = ciu3_addr + CIU3_ISC_W1C(intsn);
diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h
index b3dc9c589442..f207388541d5 100644
--- a/arch/mips/include/asm/cacheflush.h
+++ b/arch/mips/include/asm/cacheflush.h
@@ -61,6 +61,8 @@ static inline void flush_dcache_page(struct page *page)
SetPageDcacheDirty(page);
}
+void flush_dcache_folio(struct folio *folio);
+
#define flush_dcache_mmap_lock(mapping) do { } while (0)
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 0c3550c82b72..252ed38ce8c5 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -369,7 +369,7 @@ static inline void flush_thread(void)
{
}
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
#define __KSTK_TOS(tsk) ((unsigned long)task_stack_page(tsk) + \
THREAD_SIZE - 32 - sizeof(struct pt_regs))
diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c
index d20e002b3246..5e11582fe308 100644
--- a/arch/mips/kernel/irq.c
+++ b/arch/mips/kernel/irq.c
@@ -111,15 +111,9 @@ void __irq_entry do_IRQ(unsigned int irq)
#ifdef CONFIG_IRQ_DOMAIN
void __irq_entry do_domain_IRQ(struct irq_domain *domain, unsigned int hwirq)
{
- struct irq_desc *desc;
-
irq_enter();
check_stack_overflow();
-
- desc = irq_resolve_mapping(domain, hwirq);
- if (likely(desc))
- handle_irq_desc(desc);
-
+ generic_handle_domain_irq(domain, hwirq);
irq_exit();
}
#endif
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 95aa86fa6077..cbff1b974f88 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -511,7 +511,7 @@ static int __init frame_info_init(void)
/*
* Without schedule() frame info, result given by
- * thread_saved_pc() and get_wchan() are not reliable.
+ * thread_saved_pc() and __get_wchan() are not reliable.
*/
if (schedule_mfi.pc_offset < 0)
printk("Can't analyze schedule() prologue at %p\n", schedule);
@@ -652,9 +652,9 @@ unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
#endif
/*
- * get_wchan - a maintenance nightmare^W^Wpain in the ass ...
+ * __get_wchan - a maintenance nightmare^W^Wpain in the ass ...
*/
-unsigned long get_wchan(struct task_struct *task)
+unsigned long __get_wchan(struct task_struct *task)
{
unsigned long pc = 0;
#ifdef CONFIG_KALLSYMS
@@ -662,8 +662,6 @@ unsigned long get_wchan(struct task_struct *task)
unsigned long ra = 0;
#endif
- if (!task || task == current || task_is_running(task))
- goto out;
if (!task_stack_page(task))
goto out;
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index b6ef5f7312cf..f5d7bfa3472a 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -26,6 +26,7 @@
#include <linux/bug.h>
#include <linux/kernel.h>
#include <linux/kexec.h>
+#include <linux/irq.h>
#include <asm/time.h>
#include <asm/processor.h>
@@ -373,7 +374,7 @@ static int bmips_cpu_disable(void)
set_cpu_online(cpu, false);
calculate_cpu_foreign_map();
- irq_cpu_offline();
+ irq_migrate_all_off_this_cpu();
clear_c0_status(IE_IRQ5);
local_flush_tlb_all();
diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c
index 09ebe84a17fe..660e1de4412a 100644
--- a/arch/mips/loongson64/smp.c
+++ b/arch/mips/loongson64/smp.c
@@ -550,7 +550,6 @@ static int loongson3_cpu_disable(void)
set_cpu_online(cpu, false);
calculate_cpu_foreign_map();
local_irq_save(flags);
- irq_cpu_offline();
clear_c0_status(ST0_IM);
local_irq_restore(flags);
local_flush_tlb_all();
diff --git a/arch/mips/rb532/prom.c b/arch/mips/rb532/prom.c
index 23ad8dd9aa5e..b11693715547 100644
--- a/arch/mips/rb532/prom.c
+++ b/arch/mips/rb532/prom.c
@@ -16,7 +16,6 @@
#include <linux/console.h>
#include <linux/memblock.h>
#include <linux/ioport.h>
-#include <linux/blkdev.h>
#include <asm/bootinfo.h>
#include <asm/mach-rc32434/ddr.h>
diff --git a/arch/mips/sibyte/common/cfe.c b/arch/mips/sibyte/common/cfe.c
index a3323f8dcc1b..1a504294d85f 100644
--- a/arch/mips/sibyte/common/cfe.c
+++ b/arch/mips/sibyte/common/cfe.c
@@ -7,7 +7,6 @@
#include <linux/kernel.h>
#include <linux/linkage.h>
#include <linux/mm.h>
-#include <linux/blkdev.h>
#include <linux/memblock.h>
#include <linux/pm.h>
#include <linux/smp.h>
diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c
index 538a2791b48c..f07b15dd1c1a 100644
--- a/arch/mips/sibyte/swarm/setup.c
+++ b/arch/mips/sibyte/swarm/setup.c
@@ -11,7 +11,6 @@
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/memblock.h>
-#include <linux/blkdev.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/screen_info.h>
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index aea26e739543..4d1421b18734 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -27,7 +27,6 @@ config NDS32
select GENERIC_LIB_MULDI3
select GENERIC_LIB_UCMPDI2
select GENERIC_TIME_VSYSCALL
- select HANDLE_DOMAIN_IRQ
select HAVE_ARCH_TRACEHOOK
select HAVE_DEBUG_KMEMLEAK
select HAVE_EXIT_THREAD
diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h
index c2a222ebfa2a..3fc0bb7d6487 100644
--- a/arch/nds32/include/asm/cacheflush.h
+++ b/arch/nds32/include/asm/cacheflush.h
@@ -27,6 +27,7 @@ void flush_cache_vunmap(unsigned long start, unsigned long end);
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
void flush_dcache_page(struct page *page);
+void flush_dcache_folio(struct folio *folio);
void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long vaddr, void *dst, void *src, int len);
void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
diff --git a/arch/nds32/include/asm/processor.h b/arch/nds32/include/asm/processor.h
index b82369c7659d..e6bfc74972bb 100644
--- a/arch/nds32/include/asm/processor.h
+++ b/arch/nds32/include/asm/processor.h
@@ -83,7 +83,7 @@ extern struct task_struct *last_task_used_math;
/* Prepare to copy thread state - unlazy all lazy status */
#define prepare_to_copy(tsk) do { } while (0)
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
#define cpu_relax() barrier()
diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c
index 0e23e3a8df6b..d55b73b18149 100644
--- a/arch/nds32/kernel/ftrace.c
+++ b/arch/nds32/kernel/ftrace.c
@@ -6,7 +6,7 @@
#ifndef CONFIG_DYNAMIC_FTRACE
extern void (*ftrace_trace_function)(unsigned long, unsigned long,
- struct ftrace_ops*, struct pt_regs*);
+ struct ftrace_ops*, struct ftrace_regs*);
extern void ftrace_graph_caller(void);
noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip,
diff --git a/arch/nds32/kernel/process.c b/arch/nds32/kernel/process.c
index 391895b54d13..49fab9e39cbf 100644
--- a/arch/nds32/kernel/process.c
+++ b/arch/nds32/kernel/process.c
@@ -233,15 +233,12 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t * fpu)
EXPORT_SYMBOL(dump_fpu);
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
{
unsigned long fp, lr;
unsigned long stack_start, stack_end;
int count = 0;
- if (!p || p == current || task_is_running(p))
- return 0;
-
if (IS_ENABLED(CONFIG_FRAME_POINTER)) {
stack_start = (unsigned long)end_of_stack(p);
stack_end = (unsigned long)task_stack_page(p) + THREAD_SIZE;
@@ -258,5 +255,3 @@ unsigned long get_wchan(struct task_struct *p)
}
return 0;
}
-
-EXPORT_SYMBOL(get_wchan);
diff --git a/arch/nios2/include/asm/cacheflush.h b/arch/nios2/include/asm/cacheflush.h
index 18eb9f69f806..1999561b22aa 100644
--- a/arch/nios2/include/asm/cacheflush.h
+++ b/arch/nios2/include/asm/cacheflush.h
@@ -28,7 +28,8 @@ extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
extern void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
unsigned long pfn);
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
-extern void flush_dcache_page(struct page *page);
+void flush_dcache_page(struct page *page);
+void flush_dcache_folio(struct folio *folio);
extern void flush_icache_range(unsigned long start, unsigned long end);
extern void flush_icache_page(struct vm_area_struct *vma, struct page *page);
diff --git a/arch/nios2/include/asm/irqflags.h b/arch/nios2/include/asm/irqflags.h
index b3ec3e510706..25acf27862f9 100644
--- a/arch/nios2/include/asm/irqflags.h
+++ b/arch/nios2/include/asm/irqflags.h
@@ -9,7 +9,7 @@
static inline unsigned long arch_local_save_flags(void)
{
- return RDCTL(CTL_STATUS);
+ return RDCTL(CTL_FSTATUS);
}
/*
@@ -18,7 +18,7 @@ static inline unsigned long arch_local_save_flags(void)
*/
static inline void arch_local_irq_restore(unsigned long flags)
{
- WRCTL(CTL_STATUS, flags);
+ WRCTL(CTL_FSTATUS, flags);
}
static inline void arch_local_irq_disable(void)
diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h
index 94bcb86f679f..b8125dfbcad2 100644
--- a/arch/nios2/include/asm/processor.h
+++ b/arch/nios2/include/asm/processor.h
@@ -69,7 +69,7 @@ static inline void release_thread(struct task_struct *dead_task)
{
}
-extern unsigned long get_wchan(struct task_struct *p);
+extern unsigned long __get_wchan(struct task_struct *p);
#define task_pt_regs(p) \
((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1)
diff --git a/arch/nios2/include/asm/registers.h b/arch/nios2/include/asm/registers.h
index 183c720e454d..95b67dd16f81 100644
--- a/arch/nios2/include/asm/registers.h
+++ b/arch/nios2/include/asm/registers.h
@@ -11,7 +11,7 @@
#endif
/* control register numbers */
-#define CTL_STATUS 0
+#define CTL_FSTATUS 0
#define CTL_ESTATUS 1
#define CTL_BSTATUS 2
#define CTL_IENABLE 3
diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
index 9ff37ba2bb60..f8ea522a1588 100644
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -217,15 +217,12 @@ void dump(struct pt_regs *fp)
pr_emerg("\n\n");
}
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
{
unsigned long fp, pc;
unsigned long stack_page;
int count = 0;
- if (!p || p == current || task_is_running(p))
- return 0;
-
stack_page = (unsigned long)p;
fp = ((struct switch_stack *)p->thread.ksp)->fp; /* ;dgt2 */
do {
diff --git a/arch/nios2/platform/Kconfig.platform b/arch/nios2/platform/Kconfig.platform
index 9e32fb7f3d4c..e849daff6fd1 100644
--- a/arch/nios2/platform/Kconfig.platform
+++ b/arch/nios2/platform/Kconfig.platform
@@ -37,6 +37,7 @@ config NIOS2_DTB_PHYS_ADDR
config NIOS2_DTB_SOURCE_BOOL
bool "Compile and link device tree into kernel image"
+ depends on !COMPILE_TEST
help
This allows you to specify a dts (device tree source) file
which will be compiled and linked into the kernel image.
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index e804026b4797..c2491b295d60 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -13,7 +13,6 @@ config OPENRISC
select OF
select OF_EARLY_FLATTREE
select IRQ_DOMAIN
- select HANDLE_DOMAIN_IRQ
select GPIOLIB
select HAVE_ARCH_TRACEHOOK
select SPARSE_IRQ
diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h
index ad53b3184885..aa1699c18add 100644
--- a/arch/openrisc/include/asm/processor.h
+++ b/arch/openrisc/include/asm/processor.h
@@ -73,7 +73,7 @@ struct thread_struct {
void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp);
void release_thread(struct task_struct *);
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
#define cpu_relax() barrier()
diff --git a/arch/openrisc/include/asm/spinlock.h b/arch/openrisc/include/asm/spinlock.h
index a8940bdfcb7e..264944a71535 100644
--- a/arch/openrisc/include/asm/spinlock.h
+++ b/arch/openrisc/include/asm/spinlock.h
@@ -19,9 +19,6 @@
#include <asm/qrwlock.h>
-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
#define arch_spin_relax(lock) cpu_relax()
#define arch_read_relax(lock) cpu_relax()
#define arch_write_relax(lock) cpu_relax()
diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S
index edaa775a648e..59c6d3aa7081 100644
--- a/arch/openrisc/kernel/entry.S
+++ b/arch/openrisc/kernel/entry.S
@@ -569,8 +569,8 @@ EXCEPTION_ENTRY(_external_irq_handler)
#endif
CLEAR_LWA_FLAG(r3)
l.addi r3,r1,0
- l.movhi r8,hi(do_IRQ)
- l.ori r8,r8,lo(do_IRQ)
+ l.movhi r8,hi(generic_handle_arch_irq)
+ l.ori r8,r8,lo(generic_handle_arch_irq)
l.jalr r8
l.nop
l.j _ret_from_intr
diff --git a/arch/openrisc/kernel/irq.c b/arch/openrisc/kernel/irq.c
index c38fa863afa8..f38e10962a84 100644
--- a/arch/openrisc/kernel/irq.c
+++ b/arch/openrisc/kernel/irq.c
@@ -36,8 +36,3 @@ void __init init_IRQ(void)
{
irqchip_init();
}
-
-void __irq_entry do_IRQ(struct pt_regs *regs)
-{
- handle_arch_irq(regs);
-}
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index b0698d9ce14f..3c0c91bcdcba 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -263,7 +263,7 @@ void dump_elf_thread(elf_greg_t *dest, struct pt_regs* regs)
dest[35] = 0;
}
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
{
/* TODO */
diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index cfef61a7b6c2..97305bde1b16 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -25,7 +25,6 @@
#include <linux/memblock.h>
#include <linux/init.h>
#include <linux/delay.h>
-#include <linux/blkdev.h> /* for initrd_* */
#include <linux/pagemap.h>
#include <asm/pgalloc.h>
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index eef0096db5f8..da0cd4b3a28f 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -49,7 +49,8 @@ void invalidate_kernel_vmap_range(void *vaddr, int size);
#define flush_cache_vunmap(start, end) flush_cache_all()
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
-extern void flush_dcache_page(struct page *page);
+void flush_dcache_page(struct page *page);
+void flush_dcache_folio(struct folio *folio);
#define flush_dcache_mmap_lock(mapping) xa_lock_irq(&mapping->i_pages)
#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&mapping->i_pages)
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index eeb7da064289..85a2dbfe5278 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -273,7 +273,7 @@ struct mm_struct;
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
-extern unsigned long get_wchan(struct task_struct *p);
+extern unsigned long __get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) ((tsk)->thread.regs.iaoq[0])
#define KSTK_ESP(tsk) ((tsk)->thread.regs.gr[30])
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index fa5ee8a45dbd..a6e5d66a7656 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -23,21 +23,6 @@ static inline void arch_spin_lock(arch_spinlock_t *x)
continue;
}
-static inline void arch_spin_lock_flags(arch_spinlock_t *x,
- unsigned long flags)
-{
- volatile unsigned int *a;
-
- a = __ldcw_align(x);
- while (__ldcw(a) == 0)
- while (*a == 0)
- if (flags & PSW_SM_I) {
- local_irq_enable();
- local_irq_disable();
- }
-}
-#define arch_spin_lock_flags arch_spin_lock_flags
-
static inline void arch_spin_unlock(arch_spinlock_t *x)
{
volatile unsigned int *a;
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 38ec4ae81239..4f562dee65a2 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -240,15 +240,12 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
}
unsigned long
-get_wchan(struct task_struct *p)
+__get_wchan(struct task_struct *p)
{
struct unwind_frame_info info;
unsigned long ip;
int count = 0;
- if (!p || p == current || task_is_running(p))
- return 0;
-
/*
* These bracket the sleeping functions..
*/
diff --git a/arch/powerpc/include/asm/mem_encrypt.h b/arch/powerpc/include/asm/mem_encrypt.h
index ba9dab07c1be..2f26b8fc8d29 100644
--- a/arch/powerpc/include/asm/mem_encrypt.h
+++ b/arch/powerpc/include/asm/mem_encrypt.h
@@ -10,11 +10,6 @@
#include <asm/svm.h>
-static inline bool mem_encrypt_active(void)
-{
- return is_secure_guest();
-}
-
static inline bool force_dma_unencrypted(struct device *dev)
{
return is_secure_guest();
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index f348e564f7dd..e39bd0ff69f3 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -300,7 +300,7 @@ struct thread_struct {
#define task_pt_regs(tsk) ((tsk)->thread.regs)
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0)
#define KSTK_ESP(tsk) ((tsk)->thread.regs? (tsk)->thread.regs->gpr[1]: 0)
diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h
index 8985791a2ba5..7ae6aeef8464 100644
--- a/arch/powerpc/include/asm/simple_spinlock.h
+++ b/arch/powerpc/include/asm/simple_spinlock.h
@@ -123,27 +123,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
}
}
-static inline
-void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
-{
- unsigned long flags_dis;
-
- while (1) {
- if (likely(__arch_spin_trylock(lock) == 0))
- break;
- local_save_flags(flags_dis);
- local_irq_restore(flags);
- do {
- HMT_low();
- if (is_shared_processor())
- splpar_spin_yield(lock);
- } while (unlikely(lock->slock != 0));
- HMT_medium();
- local_irq_restore(flags_dis);
- }
-}
-#define arch_spin_lock_flags arch_spin_lock_flags
-
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
__asm__ __volatile__("# arch_spin_unlock\n\t"
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index abb719b21cae..3d97fb833834 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -126,14 +126,16 @@ _GLOBAL(idle_return_gpr_loss)
/*
* This is the sequence required to execute idle instructions, as
* specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0.
- *
- * The 0(r1) slot is used to save r2 in isa206, so use that here.
+ * We have to store a GPR somewhere, ptesync, then reload it, and create
+ * a false dependency on the result of the load. It doesn't matter which
+ * GPR we store, or where we store it. We have already stored r2 to the
+ * stack at -8(r1) in isa206_idle_insn_mayloss, so use that.
*/
#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \
/* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
- std r2,0(r1); \
+ std r2,-8(r1); \
ptesync; \
- ld r2,0(r1); \
+ ld r2,-8(r1); \
236: cmpd cr0,r2,r2; \
bne 236b; \
IDLE_INST; \
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 50436b52c213..406d7ee9e322 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -2111,14 +2111,11 @@ int validate_sp(unsigned long sp, struct task_struct *p,
EXPORT_SYMBOL(validate_sp);
-static unsigned long __get_wchan(struct task_struct *p)
+static unsigned long ___get_wchan(struct task_struct *p)
{
unsigned long ip, sp;
int count = 0;
- if (!p || p == current || task_is_running(p))
- return 0;
-
sp = p->thread.ksp;
if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD))
return 0;
@@ -2137,14 +2134,14 @@ static unsigned long __get_wchan(struct task_struct *p)
return 0;
}
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
{
unsigned long ret;
if (!try_get_task_stack(p))
return 0;
- ret = __get_wchan(p);
+ ret = ___get_wchan(p);
put_task_stack(p);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9cc7d3dbf439..605bab448f84 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1730,8 +1730,6 @@ void __cpu_die(unsigned int cpu)
void arch_cpu_idle_dead(void)
{
- sched_preempt_enable_no_resched();
-
/*
* Disable on the down path. This will be re-enabled by
* start_secondary() via start_secondary_resume() below
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 90484425a1e6..eb776d0c5d8e 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -255,13 +255,16 @@ kvm_novcpu_exit:
* r3 contains the SRR1 wakeup value, SRR1 is trashed.
*/
_GLOBAL(idle_kvm_start_guest)
- ld r4,PACAEMERGSP(r13)
mfcr r5
mflr r0
- std r1,0(r4)
- std r5,8(r4)
- std r0,16(r4)
- subi r1,r4,STACK_FRAME_OVERHEAD
+ std r5, 8(r1) // Save CR in caller's frame
+ std r0, 16(r1) // Save LR in caller's frame
+ // Create frame on emergency stack
+ ld r4, PACAEMERGSP(r13)
+ stdu r1, -SWITCH_FRAME_SIZE(r4)
+ // Switch to new frame on emergency stack
+ mr r1, r4
+ std r3, 32(r1) // Save SRR1 wakeup value
SAVE_NVGPRS(r1)
/*
@@ -313,6 +316,10 @@ kvm_unsplit_wakeup:
kvm_secondary_got_guest:
+ // About to go to guest, clear saved SRR1
+ li r0, 0
+ std r0, 32(r1)
+
/* Set HSTATE_DSCR(r13) to something sensible */
ld r6, PACA_DSCR_DEFAULT(r13)
std r6, HSTATE_DSCR(r13)
@@ -392,13 +399,12 @@ kvm_no_guest:
mfspr r4, SPRN_LPCR
rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
mtspr SPRN_LPCR, r4
- /* set up r3 for return */
- mfspr r3,SPRN_SRR1
+ // Return SRR1 wakeup value, or 0 if we went into the guest
+ ld r3, 32(r1)
REST_NVGPRS(r1)
- addi r1, r1, STACK_FRAME_OVERHEAD
- ld r0, 16(r1)
- ld r5, 8(r1)
- ld r1, 0(r1)
+ ld r1, 0(r1) // Switch back to caller stack
+ ld r0, 16(r1) // Reload LR
+ ld r5, 8(r1) // Reload CR
mtlr r0
mtcr r5
blr
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index f92bf5f6b74f..7ea873ab2e6f 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -238,11 +238,27 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
ret |= P(SNOOP, HIT);
break;
case 5:
- ret = PH(LVL, REM_CCE1);
- if ((sub_idx == 0) || (sub_idx == 2) || (sub_idx == 4))
- ret |= P(SNOOP, HIT);
- else if ((sub_idx == 1) || (sub_idx == 3) || (sub_idx == 5))
- ret |= P(SNOOP, HITM);
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ ret = REM | P(HOPS, 0);
+
+ if (sub_idx == 0 || sub_idx == 4)
+ ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT);
+ else if (sub_idx == 1 || sub_idx == 5)
+ ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HITM);
+ else if (sub_idx == 2 || sub_idx == 6)
+ ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
+ else if (sub_idx == 3 || sub_idx == 7)
+ ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
+ } else {
+ if (sub_idx == 0)
+ ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HIT) | P(HOPS, 0);
+ else if (sub_idx == 1)
+ ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HITM) | P(HOPS, 0);
+ else if (sub_idx == 2 || sub_idx == 4)
+ ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HIT) | P(HOPS, 0);
+ else if (sub_idx == 3 || sub_idx == 5)
+ ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HITM) | P(HOPS, 0);
+ }
break;
case 6:
ret = PH(LVL, REM_CCE2);
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index 4a2cbc3dc047..ff122603989b 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -273,6 +273,8 @@
#define P(a, b) PERF_MEM_S(a, b)
#define PH(a, b) (P(LVL, HIT) | P(a, b))
#define PM(a, b) (P(LVL, MISS) | P(a, b))
+#define LEVEL(x) P(LVLNUM, x)
+#define REM P(REMOTE, REMOTE)
int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1);
int isa207_compute_mmcr(u64 event[], int n_ev,
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index bed05b644c2c..cb25acccd746 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -21,6 +21,7 @@
#include <linux/namei.h>
#include <linux/pagemap.h>
#include <linux/poll.h>
+#include <linux/seq_file.h>
#include <linux/slab.h>
#include <asm/prom.h>
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 5e037df2a3a1..2e57391e0778 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -159,6 +159,7 @@ config PPC_SVM
select SWIOTLB
select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_FORCE_DMA_UNENCRYPTED
+ select ARCH_HAS_CC_PLATFORM
help
There are certain POWER platforms which support secure guests using
the Protected Execution Facility, with the help of an Ultravisor
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 4cda0ef87be0..41d8aee98da4 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -31,3 +31,5 @@ obj-$(CONFIG_FA_DUMP) += rtas-fadump.o
obj-$(CONFIG_SUSPEND) += suspend.o
obj-$(CONFIG_PPC_VAS) += vas.o
+
+obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += cc_platform.o
diff --git a/arch/powerpc/platforms/pseries/cc_platform.c b/arch/powerpc/platforms/pseries/cc_platform.c
new file mode 100644
index 000000000000..e8021af83a19
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/cc_platform.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Confidential Computing Platform Capability checks
+ *
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ */
+
+#include <linux/export.h>
+#include <linux/cc_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/svm.h>
+
+bool cc_platform_has(enum cc_attr attr)
+{
+ switch (attr) {
+ case CC_ATTR_MEM_ENCRYPT:
+ return is_secure_guest();
+
+ default:
+ return false;
+ }
+}
+EXPORT_SYMBOL_GPL(cc_platform_has);
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index dab5c56ffd0e..a52af8fbf571 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -1302,6 +1302,12 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
struct property *default_win;
int reset_win_ext;
+ /* DDW + IOMMU on single window may fail if there is any allocation */
+ if (iommu_table_in_use(tbl)) {
+ dev_warn(&dev->dev, "current IOMMU table in use, can't be replaced.\n");
+ goto out_failed;
+ }
+
default_win = of_find_property(pdn, "ibm,dma-window", NULL);
if (!default_win)
goto out_failed;
@@ -1356,12 +1362,6 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
query.largest_available_block,
1ULL << page_shift);
- /* DDW + IOMMU on single window may fail if there is any allocation */
- if (default_win_removed && iommu_table_in_use(tbl)) {
- dev_dbg(&dev->dev, "current IOMMU table in use, can't be replaced.\n");
- goto out_failed;
- }
-
len = order_base_2(query.largest_available_block << page_shift);
win_name = DMA64_PROPNAME;
} else {
@@ -1411,18 +1411,19 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
} else {
struct iommu_table *newtbl;
int i;
+ unsigned long start = 0, end = 0;
for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) {
const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM;
/* Look for MMIO32 */
- if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM)
+ if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM) {
+ start = pci->phb->mem_resources[i].start;
+ end = pci->phb->mem_resources[i].end;
break;
+ }
}
- if (i == ARRAY_SIZE(pci->phb->mem_resources))
- goto out_del_list;
-
/* New table for using DDW instead of the default DMA window */
newtbl = iommu_pseries_alloc_table(pci->phb->node);
if (!newtbl) {
@@ -1432,15 +1433,15 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, win_addr,
1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops);
- iommu_init_table(newtbl, pci->phb->node, pci->phb->mem_resources[i].start,
- pci->phb->mem_resources[i].end);
+ iommu_init_table(newtbl, pci->phb->node, start, end);
pci->table_group->tables[1] = newtbl;
/* Keep default DMA window stuct if removed */
if (default_win_removed) {
tbl->it_size = 0;
- kfree(tbl->it_map);
+ vfree(tbl->it_map);
+ tbl->it_map = NULL;
}
set_iommu_table_base(&dev->dev, newtbl);
diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
index 87f001b4c4e4..c083ecbbae4d 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -8,6 +8,7 @@
#include <linux/mm.h>
#include <linux/memblock.h>
+#include <linux/cc_platform.h>
#include <asm/machdep.h>
#include <asm/svm.h>
#include <asm/swiotlb.h>
@@ -63,7 +64,7 @@ void __init svm_swiotlb_init(void)
int set_memory_encrypted(unsigned long addr, int numpages)
{
- if (!mem_encrypt_active())
+ if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
return 0;
if (!PAGE_ALIGNED(addr))
@@ -76,7 +77,7 @@ int set_memory_encrypted(unsigned long addr, int numpages)
int set_memory_decrypted(unsigned long addr, int numpages)
{
- if (!mem_encrypt_active())
+ if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
return 0;
if (!PAGE_ALIGNED(addr))
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index c732ce5a3e1a..c5d75c02ad8b 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -945,7 +945,8 @@ static int xive_get_irqchip_state(struct irq_data *data,
* interrupt to be inactive in that case.
*/
*state = (pq != XIVE_ESB_INVALID) && !xd->stale_p &&
- (xd->saved_p || !!(pq & XIVE_ESB_VAL_P));
+ (xd->saved_p || (!!(pq & XIVE_ESB_VAL_P) &&
+ !irqd_irq_disabled(data)));
return 0;
default:
return -EINVAL;
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 6a6fa9e976d5..c28b743eba57 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -62,7 +62,6 @@ config RISCV
select GENERIC_SCHED_CLOCK
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL if MMU && 64BIT
- select HANDLE_DOMAIN_IRQ
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL
@@ -163,6 +162,12 @@ config PAGE_OFFSET
default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB
+config KASAN_SHADOW_OFFSET
+ hex
+ depends on KASAN_GENERIC
+ default 0xdfffffc800000000 if 64BIT
+ default 0xffffffff if 32BIT
+
config ARCH_FLATMEM_ENABLE
def_bool !NUMA
diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h
index a2b3d9cdbc86..b00f503ec124 100644
--- a/arch/riscv/include/asm/kasan.h
+++ b/arch/riscv/include/asm/kasan.h
@@ -30,8 +30,7 @@
#define KASAN_SHADOW_SIZE (UL(1) << ((CONFIG_VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT))
#define KASAN_SHADOW_START KERN_VIRT_START
#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
-#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << \
- (64 - KASAN_SHADOW_SCALE_SHIFT)))
+#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
void kasan_init(void);
asmlinkage void kasan_early_init(void);
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 46b492c78cbb..0749924d9e55 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -66,7 +66,7 @@ static inline void release_thread(struct task_struct *dead_task)
{
}
-extern unsigned long get_wchan(struct task_struct *p);
+extern unsigned long __get_wchan(struct task_struct *p);
static inline void wait_for_interrupt(void)
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 98f502654edd..64236f7efde5 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -130,8 +130,7 @@ skip_context_tracking:
/* Handle interrupts */
move a0, sp /* pt_regs */
- la a1, handle_arch_irq
- REG_L a1, (a1)
+ la a1, generic_handle_arch_irq
jr a1
1:
/*
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index fce5184b22c3..52c5ff9804c5 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -193,6 +193,7 @@ setup_trap_vector:
csrw CSR_SCRATCH, zero
ret
+.align 2
.Lsecondary_park:
/* We lack SMP support or have too many harts, so park this hart */
wfi
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 921d9d7df400..2f6da845c9ae 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -140,12 +140,9 @@ void arch_irq_work_raise(void)
void handle_IPI(struct pt_regs *regs)
{
- struct pt_regs *old_regs = set_irq_regs(regs);
unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits;
unsigned long *stats = ipi_data[smp_processor_id()].stats;
- irq_enter();
-
riscv_clear_ipi();
while (true) {
@@ -156,7 +153,7 @@ void handle_IPI(struct pt_regs *regs)
ops = xchg(pending_ipis, 0);
if (ops == 0)
- goto done;
+ return;
if (ops & (1 << IPI_RESCHEDULE)) {
stats[IPI_RESCHEDULE]++;
@@ -189,10 +186,6 @@ void handle_IPI(struct pt_regs *regs)
/* Order data access and bit testing. */
mb();
}
-
-done:
- irq_exit();
- set_irq_regs(old_regs);
}
static const char * const ipi_names[] = {
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 315db3d0229b..0fcdc0233fac 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -128,16 +128,14 @@ static bool save_wchan(void *arg, unsigned long pc)
return true;
}
-unsigned long get_wchan(struct task_struct *task)
+unsigned long __get_wchan(struct task_struct *task)
{
unsigned long pc = 0;
- if (likely(task && task != current && !task_is_running(task))) {
- if (!try_get_task_stack(task))
- return 0;
- walk_stackframe(task, NULL, save_wchan, &pc);
- put_task_stack(task);
- }
+ if (!try_get_task_stack(task))
+ return 0;
+ walk_stackframe(task, NULL, save_wchan, &pc);
+ put_task_stack(task);
return pc;
}
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index d7189c8714a9..54294f83513d 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -17,6 +17,9 @@ asmlinkage void __init kasan_early_init(void)
uintptr_t i;
pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START);
+ BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
+ KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
+
for (i = 0; i < PTRS_PER_PTE; ++i)
set_pte(kasan_early_shadow_pte + i,
mk_pte(virt_to_page(kasan_early_shadow_page),
@@ -172,21 +175,10 @@ void __init kasan_init(void)
phys_addr_t p_start, p_end;
u64 i;
- /*
- * Populate all kernel virtual address space with kasan_early_shadow_page
- * except for the linear mapping and the modules/kernel/BPF mapping.
- */
- kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
- (void *)kasan_mem_to_shadow((void *)
- VMEMMAP_END));
if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
kasan_shallow_populate(
(void *)kasan_mem_to_shadow((void *)VMALLOC_START),
(void *)kasan_mem_to_shadow((void *)VMALLOC_END));
- else
- kasan_populate_early_shadow(
- (void *)kasan_mem_to_shadow((void *)VMALLOC_START),
- (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
/* Populate the linear mapping */
for_each_mem_range(i, &p_start, &p_end) {
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index fed86f42dfbe..753d85bdfad0 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -125,7 +125,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
if (i == NR_JIT_ITERATIONS) {
pr_err("bpf-jit: image did not converge in <%d passes!\n", i);
- bpf_jit_binary_free(jit_data->header);
+ if (jit_data->header)
+ bpf_jit_binary_free(jit_data->header);
prog = orig_prog;
goto out_offset;
}
@@ -166,6 +167,11 @@ out:
return prog;
}
+u64 bpf_jit_alloc_exec_limit(void)
+{
+ return BPF_JIT_REGION_SIZE;
+}
+
void *bpf_jit_alloc_exec(unsigned long size)
{
return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
diff --git a/arch/s390/include/asm/mem_encrypt.h b/arch/s390/include/asm/mem_encrypt.h
index 2542cbf7e2d1..08a8b96606d7 100644
--- a/arch/s390/include/asm/mem_encrypt.h
+++ b/arch/s390/include/asm/mem_encrypt.h
@@ -4,8 +4,6 @@
#ifndef __ASSEMBLY__
-static inline bool mem_encrypt_active(void) { return false; }
-
int set_memory_encrypted(unsigned long addr, int numpages);
int set_memory_decrypted(unsigned long addr, int numpages);
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 879b8e3f609c..f54c152bf2bf 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -192,7 +192,7 @@ static inline void release_thread(struct task_struct *tsk) { }
void guarded_storage_release(struct task_struct *tsk);
void gs_load_bc_cb(struct pt_regs *regs);
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
#define task_pt_regs(tsk) ((struct pt_regs *) \
(task_stack_page(tsk) + THREAD_SIZE) - 1)
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->psw.addr)
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index ef59588a3042..888a2f1c9ee3 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -67,14 +67,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lp)
arch_spin_lock_wait(lp);
}
-static inline void arch_spin_lock_flags(arch_spinlock_t *lp,
- unsigned long flags)
-{
- if (!arch_spin_trylock_once(lp))
- arch_spin_lock_wait(lp);
-}
-#define arch_spin_lock_flags arch_spin_lock_flags
-
static inline int arch_spin_trylock(arch_spinlock_t *lp)
{
if (!arch_spin_trylock_once(lp))
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 350e94d0cac2..e5dd46b1bff8 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -181,12 +181,12 @@ void execve_tail(void)
asm volatile("sfpc %0" : : "d" (0));
}
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
{
struct unwind_state state;
unsigned long ip = 0;
- if (!p || p == current || task_is_running(p) || !task_stack_page(p))
+ if (!task_stack_page(p))
return 0;
if (!try_get_task_stack(p))
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index b9f85b2dc053..6af59c59cc1b 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -894,6 +894,11 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
/**
* guest_translate_address - translate guest logical into guest absolute address
+ * @vcpu: virtual cpu
+ * @gva: Guest virtual address
+ * @ar: Access register
+ * @gpa: Guest physical address
+ * @mode: Translation access mode
*
* Parameter semantics are the same as the ones from guest_translate.
* The memory contents at the guest address are not changed.
@@ -934,6 +939,11 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
/**
* check_gva_range - test a range of guest virtual addresses for accessibility
+ * @vcpu: virtual cpu
+ * @gva: Guest virtual address
+ * @ar: Access register
+ * @length: Length of test range
+ * @mode: Translation access mode
*/
int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
unsigned long length, enum gacc_mode mode)
@@ -956,6 +966,7 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
/**
* kvm_s390_check_low_addr_prot_real - check for low-address protection
+ * @vcpu: virtual cpu
* @gra: Guest real address
*
* Checks whether an address is subject to low-address protection and set
@@ -979,6 +990,7 @@ int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
* @pgt: pointer to the beginning of the page table for the given address if
* successful (return value 0), or to the first invalid DAT entry in
* case of exceptions (return value > 0)
+ * @dat_protection: referenced memory is write protected
* @fake: pgt references contiguous guest memory block, not a pgtable
*/
static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 72b25b7cc6ae..2bd8f854f1b4 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -269,6 +269,7 @@ static int handle_prog(struct kvm_vcpu *vcpu)
/**
* handle_external_interrupt - used for external interruption interceptions
+ * @vcpu: virtual cpu
*
* This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if
* the new PSW does not have external interrupts disabled. In the first case,
@@ -315,7 +316,8 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
}
/**
- * Handle MOVE PAGE partial execution interception.
+ * handle_mvpg_pei - Handle MOVE PAGE partial execution interception.
+ * @vcpu: virtual cpu
*
* This interception can only happen for guests with DAT disabled and
* addresses that are currently not mapped in the host. Thus we try to
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 10722455fd02..2245f4b8d362 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -3053,13 +3053,14 @@ static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask)
int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus);
struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
struct kvm_vcpu *vcpu;
+ u8 vcpu_isc_mask;
for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) {
vcpu = kvm_get_vcpu(kvm, vcpu_idx);
if (psw_ioint_disabled(vcpu))
continue;
- deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
- if (deliverable_mask) {
+ vcpu_isc_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
+ if (deliverable_mask & vcpu_isc_mask) {
/* lately kicked but not yet running */
if (test_and_set_bit(vcpu_idx, gi->kicked_mask))
return;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6a6dd5e1daf6..1c97493d21e1 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -3363,6 +3363,7 @@ out_free_sie_block:
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
+ clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
return kvm_s390_vcpu_has_irq(vcpu, 0);
}
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
index cfcdf76d6a95..a95ca6df4e5e 100644
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -259,14 +259,13 @@ EXPORT_SYMBOL(strcmp);
#ifdef __HAVE_ARCH_STRRCHR
char *strrchr(const char *s, int c)
{
- size_t len = __strend(s) - s;
-
- if (len)
- do {
- if (s[len] == (char) c)
- return (char *) s + len;
- } while (--len > 0);
- return NULL;
+ ssize_t len = __strend(s) - s;
+
+ do {
+ if (s[len] == (char)c)
+ return (char *)s + len;
+ } while (--len >= 0);
+ return NULL;
}
EXPORT_SYMBOL(strrchr);
#endif
diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h
index 372afa82fee6..c7a97f32432f 100644
--- a/arch/sh/include/asm/cacheflush.h
+++ b/arch/sh/include/asm/cacheflush.h
@@ -42,7 +42,8 @@ extern void flush_cache_page(struct vm_area_struct *vma,
extern void flush_cache_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
-extern void flush_dcache_page(struct page *page);
+void flush_dcache_page(struct page *page);
+void flush_dcache_folio(struct folio *folio);
extern void flush_icache_range(unsigned long start, unsigned long end);
#define flush_icache_user_range flush_icache_range
extern void flush_icache_page(struct vm_area_struct *vma,
diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h
index aa92cc933889..45240ec6b85a 100644
--- a/arch/sh/include/asm/processor_32.h
+++ b/arch/sh/include/asm/processor_32.h
@@ -180,7 +180,7 @@ static inline void show_code(struct pt_regs *regs)
}
#endif
-extern unsigned long get_wchan(struct task_struct *p);
+extern unsigned long __get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc)
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->regs[15])
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 717de05c81f4..1c28e3cddb60 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -182,13 +182,10 @@ __switch_to(struct task_struct *prev, struct task_struct *next)
return prev;
}
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
{
unsigned long pc;
- if (!p || p == current || task_is_running(p))
- return 0;
-
/*
* The same comment as on the Alpha applies here, too ...
*/
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index b6242f7771e9..647bf0ac7beb 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -89,7 +89,7 @@ static inline void start_thread(struct pt_regs * regs, unsigned long pc,
/* Free all resources held by a thread. */
#define release_thread(tsk) do { } while(0)
-unsigned long get_wchan(struct task_struct *);
+unsigned long __get_wchan(struct task_struct *);
#define task_pt_regs(tsk) ((tsk)->thread.kregs)
#define KSTK_EIP(tsk) ((tsk)->thread.kregs->pc)
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index 5cf145f18f36..ae851e8fce4c 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -183,7 +183,7 @@ do { \
/* Free all resources held by a thread. */
#define release_thread(tsk) do { } while (0)
-unsigned long get_wchan(struct task_struct *task);
+unsigned long __get_wchan(struct task_struct *task);
#define task_pt_regs(tsk) (task_thread_info(tsk)->kregs)
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->tpc)
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index bbbe0cfef746..2dc0bf9fe62e 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -365,7 +365,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
return 0;
}
-unsigned long get_wchan(struct task_struct *task)
+unsigned long __get_wchan(struct task_struct *task)
{
unsigned long pc, fp, bias = 0;
unsigned long task_base = (unsigned long) task;
@@ -373,9 +373,6 @@ unsigned long get_wchan(struct task_struct *task)
struct reg_window32 *rw;
int count = 0;
- if (!task || task == current || task_is_running(task))
- goto out;
-
fp = task_thread_info(task)->ksp + bias;
do {
/* Bogus frame pointer? */
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index d1cc410d2f64..f5b2cac8669f 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -663,7 +663,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
return 0;
}
-unsigned long get_wchan(struct task_struct *task)
+unsigned long __get_wchan(struct task_struct *task)
{
unsigned long pc, fp, bias = 0;
struct thread_info *tp;
@@ -671,9 +671,6 @@ unsigned long get_wchan(struct task_struct *task)
unsigned long ret = 0;
int count = 0;
- if (!task || task == current || task_is_running(task))
- goto out;
-
tp = task_thread_info(task);
bias = STACK_BIAS;
fp = task_thread_info(task)->ksp + bias;
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index cd9dc0556e91..69d2d0049a61 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -27,6 +27,7 @@
#include <linux/blk-mq.h>
#include <linux/ata.h>
#include <linux/hdreg.h>
+#include <linux/major.h>
#include <linux/cdrom.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -854,8 +855,8 @@ static const struct attribute_group *ubd_attr_groups[] = {
NULL,
};
-static void ubd_disk_register(int major, u64 size, int unit,
- struct gendisk *disk)
+static int ubd_disk_register(int major, u64 size, int unit,
+ struct gendisk *disk)
{
disk->major = major;
disk->first_minor = unit << UBD_SHIFT;
@@ -872,7 +873,7 @@ static void ubd_disk_register(int major, u64 size, int unit,
disk->private_data = &ubd_devs[unit];
disk->queue = ubd_devs[unit].queue;
- device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups);
+ return device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups);
}
#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
@@ -919,10 +920,15 @@ static int ubd_add(int n, char **error_out)
blk_queue_write_cache(ubd_dev->queue, true, false);
blk_queue_max_segments(ubd_dev->queue, MAX_SG);
blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1);
- ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk);
+ err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk);
+ if (err)
+ goto out_cleanup_disk;
+
ubd_gendisk[n] = disk;
return 0;
+out_cleanup_disk:
+ blk_cleanup_disk(disk);
out_cleanup_tags:
blk_mq_free_tag_set(&ubd_dev->tag_set);
out:
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index b5cf0ed116d9..579692a40a55 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -106,6 +106,6 @@ extern struct cpuinfo_um boot_cpu_data;
#define cache_line_size() (boot_cpu_data.cache_alignment)
#define KSTK_REG(tsk, reg) get_thread_reg(reg, &tsk->thread.switch_buf)
-extern unsigned long get_wchan(struct task_struct *p);
+extern unsigned long __get_wchan(struct task_struct *p);
#endif
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 457a38db368b..82107373ac7e 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -364,14 +364,11 @@ unsigned long arch_align_stack(unsigned long sp)
}
#endif
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
{
unsigned long stack_page, sp, ip;
bool seen_sched = 0;
- if ((p == NULL) || (p == current) || task_is_running(p))
- return 0;
-
stack_page = (unsigned long) task_stack_page(p);
/* Bail if the process has no kernel stack for some reason */
if (stack_page == 0)
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index a149a5e9a16a..54447690de11 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -421,6 +421,10 @@ void __init check_bugs(void)
os_check_bugs();
}
+void apply_retpolines(s32 *start, s32 *end)
+{
+}
+
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bd70e8a39fbf..5ddfbbe5c51e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -125,6 +125,7 @@ config X86
select CLOCKSOURCE_VALIDATE_LAST_CYCLE
select CLOCKSOURCE_WATCHDOG
select DCACHE_WORD_ACCESS
+ select DYNAMIC_SIGFRAME
select EDAC_ATOMIC_SCRUB
select EDAC_SUPPORT
select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
@@ -610,9 +611,7 @@ config X86_INTEL_MID
depends on X86_IO_APIC
select I2C
select DW_APB_TIMER
- select APB_TIMER
select INTEL_SCU_PCI
- select MFD_INTEL_MSIC
help
Select to build a kernel capable of supporting Intel MID (Mobile
Internet Device) platform systems which do not have the PCI legacy
@@ -1001,6 +1000,17 @@ config NR_CPUS
This is purely to save memory: each supported CPU adds about 8KB
to the kernel image.
+config SCHED_CLUSTER
+ bool "Cluster scheduler support"
+ depends on SMP
+ default y
+ help
+ Cluster scheduler support improves the CPU scheduler's decision
+ making when dealing with machines that have clusters of CPUs.
+ Cluster usually means a couple of CPUs which are placed closely
+ by sharing mid-level caches, last-level cache tags or internal
+ busses.
+
config SCHED_SMT
def_bool y if SMP
@@ -1256,7 +1266,8 @@ config TOSHIBA
config I8K
tristate "Dell i8k legacy laptop support"
- select HWMON
+ depends on HWMON
+ depends on PROC_FS
select SENSORS_DELL_SMM
help
This option enables legacy /proc/i8k userspace interface in hwmon
@@ -1518,6 +1529,7 @@ config AMD_MEM_ENCRYPT
select ARCH_HAS_FORCE_DMA_UNENCRYPTED
select INSTRUCTION_DECODER
select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
+ select ARCH_HAS_CC_PLATFORM
help
Say yes to enable support for the encryption of system memory.
This requires an AMD processor that supports Secure Memory
@@ -1525,7 +1537,6 @@ config AMD_MEM_ENCRYPT
config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT
bool "Activate AMD Secure Memory Encryption (SME) by default"
- default y
depends on AMD_MEM_ENCRYPT
help
Say yes to have system memory encrypted by default if running on
@@ -2389,6 +2400,22 @@ config MODIFY_LDT_SYSCALL
Saying 'N' here may make sense for embedded or server kernels.
+config STRICT_SIGALTSTACK_SIZE
+ bool "Enforce strict size checking for sigaltstack"
+ depends on DYNAMIC_SIGFRAME
+ help
+ For historical reasons MINSIGSTKSZ is a constant which became
+ already too small with AVX512 support. Add a mechanism to
+ enforce strict checking of the sigaltstack size against the
+ real size of the FPU frame. This option enables the check
+ by default. It can also be controlled via the kernel command
+ line option 'strict_sas_size' independent of this config
+ switch. Enabling it might break existing applications which
+ allocate a too small sigaltstack but 'work' because they
+ never get a signal delivered.
+
+ Say 'N' unless you want to really enforce this check.
+
source "kernel/livepatch/Kconfig"
endmenu
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 814fe0d349b0..eefc434351db 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -508,3 +508,16 @@ config CPU_SUP_ZHAOXIN
CPU might render the kernel unbootable.
If unsure, say N.
+
+config CPU_SUP_VORTEX_32
+ default y
+ bool "Support Vortex processors" if PROCESSOR_SELECT
+ depends on X86_32
+ help
+ This enables detection, tunings and quirks for Vortex processors
+
+ You need this enabled if you want your kernel to run on a
+ Vortex CPU. Disabling this option on other types of CPUs
+ makes the kernel a tiny bit smaller.
+
+ If unsure, say N.
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 7488cfbbd2f6..aab70413ae7a 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -299,7 +299,7 @@ define archhelp
echo ' isoimage - Create a boot CD-ROM image (arch/x86/boot/image.iso)'
echo ' bzdisk/fdimage*/hdimage/isoimage also accept:'
echo ' FDARGS="..." arguments for the booted kernel'
- echo ' FDINITRD=file initrd for the booted kernel'
+ echo ' FDINITRD=file initrd for the booted kernel'
echo ''
echo ' kvm_guest.config - Enable Kconfig items for running this kernel as a KVM guest'
echo ' xen.config - Enable Kconfig items for running this kernel as a Xen guest'
diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh
index 0673fdfc1a11..c9299aeb7333 100644
--- a/arch/x86/boot/genimage.sh
+++ b/arch/x86/boot/genimage.sh
@@ -120,12 +120,13 @@ efiarch() {
}
# Get the combined sizes in bytes of the files given, counting sparse
-# files as full length, and padding each file to a 4K block size
+# files as full length, and padding each file to cluster size
+cluster=16384
filesizes() {
local t=0
local s
for s in $(ls -lnL "$@" 2>/dev/null | awk '/^-/{ print $5; }'); do
- t=$((t + ((s+4095)/4096)*4096))
+ t=$((t + ((s+cluster-1)/cluster)*cluster))
done
echo $t
}
@@ -230,14 +231,14 @@ genhdimage() {
ptype='-T 0xef' # EFI system partition, no GPT
fi
sizes=$(filesizes "$FBZIMAGE" "${FDINITRDS[@]}" "$efishell")
- # Allow 1% + 1 MiB for filesystem and partition table overhead,
- # syslinux, and config files
+ # Allow 1% + 2 MiB for filesystem and partition table overhead,
+ # syslinux, and config files; this is probably excessive...
megs=$(((sizes + sizes/100 + 2*1024*1024 - 1)/(1024*1024)))
$dd if=/dev/zero of="$FIMAGE" bs=$((1024*1024)) count=$megs 2>/dev/null
- mpartition -I -c -s 32 -h 64 -t $megs $ptype -b 512 -a h:
+ mpartition -I -c -s 32 -h 64 $ptype -b 64 -a p:
$dd if="$mbr" of="$FIMAGE" bs=440 count=1 conv=notrunc 2>/dev/null
- mformat -v 'LINUX_BOOT' -s 32 -h 64 -t $megs h:
- syslinux --offset $((512*512)) "$FIMAGE"
+ mformat -v 'LINUX_BOOT' -s 32 -h 64 -c $((cluster/512)) -t $megs h:
+ syslinux --offset $((64*512)) "$FIMAGE"
do_mcopy h:
}
diff --git a/arch/x86/boot/mtools.conf.in b/arch/x86/boot/mtools.conf.in
index 9e2662d01364..174c60508766 100644
--- a/arch/x86/boot/mtools.conf.in
+++ b/arch/x86/boot/mtools.conf.in
@@ -14,7 +14,8 @@ drive v:
drive w:
file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=36 filter
-# Hard disk
+# Hard disk (h: for the filesystem, p: for format - old mtools bug?)
drive h:
+ file="@OBJ@/hdimage" offset=32768 mformat_only
+drive p:
file="@OBJ@/hdimage" partition=1 mformat_only
-
diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
index 18d2f5199194..1cc72b4804fa 100644
--- a/arch/x86/crypto/sm4-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
@@ -78,7 +78,7 @@
vpxor tmp0, x, x;
-.section .rodata.cst164, "aM", @progbits, 164
+.section .rodata.cst16, "aM", @progbits, 16
.align 16
/*
@@ -133,6 +133,10 @@
.L0f0f0f0f:
.long 0x0f0f0f0f
+/* 12 bytes, only for padding */
+.Lpadding_deadbeef:
+ .long 0xdeadbeef, 0xdeadbeef, 0xdeadbeef
+
.text
.align 16
diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
index d2ffd7f76ee2..9c5d3f3ad45a 100644
--- a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
@@ -93,7 +93,7 @@
vpxor tmp0, x, x;
-.section .rodata.cst164, "aM", @progbits, 164
+.section .rodata.cst16, "aM", @progbits, 16
.align 16
/*
@@ -148,6 +148,10 @@
.L0f0f0f0f:
.long 0x0f0f0f0f
+/* 12 bytes, only for padding */
+.Lpadding_deadbeef:
+ .long 0xdeadbeef, 0xdeadbeef, 0xdeadbeef
+
.text
.align 16
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 960a021d543e..7e25543693de 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -453,3 +453,4 @@
446 i386 landlock_restrict_self sys_landlock_restrict_self
447 i386 memfd_secret sys_memfd_secret
448 i386 process_mrelease sys_process_mrelease
+449 i386 futex_waitv sys_futex_waitv
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 18b5500ea8bf..fe8f8dd157b4 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -370,6 +370,7 @@
446 common landlock_restrict_self sys_landlock_restrict_self
447 common memfd_secret sys_memfd_secret
448 common process_mrelease sys_process_mrelease
+449 common futex_waitv sys_futex_waitv
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 6dfa8ddaa60f..38b2c779146f 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -66,6 +66,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all, *x86_pmu.enable_all);
DEFINE_STATIC_CALL_NULL(x86_pmu_enable, *x86_pmu.enable);
DEFINE_STATIC_CALL_NULL(x86_pmu_disable, *x86_pmu.disable);
+DEFINE_STATIC_CALL_NULL(x86_pmu_assign, *x86_pmu.assign);
+
DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add);
DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del);
DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read);
@@ -1215,6 +1217,8 @@ static inline void x86_assign_hw_event(struct perf_event *event,
hwc->last_cpu = smp_processor_id();
hwc->last_tag = ++cpuc->tags[i];
+ static_call_cond(x86_pmu_assign)(event, idx);
+
switch (hwc->idx) {
case INTEL_PMC_IDX_FIXED_BTS:
case INTEL_PMC_IDX_FIXED_VLBR:
@@ -2005,6 +2009,8 @@ static void x86_pmu_static_call_update(void)
static_call_update(x86_pmu_enable, x86_pmu.enable);
static_call_update(x86_pmu_disable, x86_pmu.disable);
+ static_call_update(x86_pmu_assign, x86_pmu.assign);
+
static_call_update(x86_pmu_add, x86_pmu.add);
static_call_update(x86_pmu_del, x86_pmu.del);
static_call_update(x86_pmu_read, x86_pmu.read);
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 6320d2cfd9d3..974e917e65b2 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -209,6 +209,12 @@ static void bts_update(struct bts_ctx *bts)
} else {
local_set(&buf->data_size, head);
}
+
+ /*
+ * Since BTS is coherent, just add compiler barrier to ensure
+ * BTS updating is ordered against bts::handle::event.
+ */
+ barrier();
}
static int
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 9a044438072b..aa3f5b527dc0 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -243,7 +243,8 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
static struct event_constraint intel_icl_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
- FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */
+ FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* old INST_RETIRED.PREC_DIST */
+ FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
@@ -288,7 +289,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
static struct event_constraint intel_spr_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
- FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */
+ FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
@@ -2403,6 +2404,12 @@ static void intel_pmu_disable_event(struct perf_event *event)
intel_pmu_pebs_disable(event);
}
+static void intel_pmu_assign_event(struct perf_event *event, int idx)
+{
+ if (is_pebs_pt(event))
+ perf_report_aux_output_id(event, idx);
+}
+
static void intel_pmu_del_event(struct perf_event *event)
{
if (needs_branch_stack(event))
@@ -4495,8 +4502,16 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
}
+static void intel_aux_output_init(void)
+{
+ /* Refer also intel_pmu_aux_output_match() */
+ if (x86_pmu.intel_cap.pebs_output_pt_available)
+ x86_pmu.assign = intel_pmu_assign_event;
+}
+
static int intel_pmu_aux_output_match(struct perf_event *event)
{
+ /* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */
if (!x86_pmu.intel_cap.pebs_output_pt_available)
return 0;
@@ -6302,6 +6317,8 @@ __init int intel_pmu_init(void)
if (is_hybrid())
intel_pmu_check_hybrid_pmus((u64)fixed_mask);
+ intel_aux_output_init();
+
return 0;
}
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8647713276a7..4dbb55a43dad 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -923,7 +923,8 @@ struct event_constraint intel_skl_pebs_event_constraints[] = {
};
struct event_constraint intel_icl_pebs_event_constraints[] = {
- INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x100000000ULL), /* old INST_RETIRED.PREC_DIST */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */
INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
@@ -943,7 +944,7 @@ struct event_constraint intel_icl_pebs_event_constraints[] = {
};
struct event_constraint intel_spr_pebs_event_constraints[] = {
- INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
INTEL_FLAGS_EVENT_CONSTRAINT(0xc0, 0xfe),
diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h
index 7280c8a3c831..6d735611c281 100644
--- a/arch/x86/events/intel/uncore_discovery.h
+++ b/arch/x86/events/intel/uncore_discovery.h
@@ -30,7 +30,7 @@
#define uncore_discovery_invalid_unit(unit) \
- (!unit.table1 || !unit.ctl || !unit.table3 || \
+ (!unit.table1 || !unit.ctl || \
unit.table1 == -1ULL || unit.ctl == -1ULL || \
unit.table3 == -1ULL)
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 5ddc0f30db6f..eb2c6cea9d0d 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -452,7 +452,7 @@
#define ICX_M3UPI_PCI_PMON_BOX_CTL 0xa0
/* ICX IMC */
-#define ICX_NUMBER_IMC_CHN 2
+#define ICX_NUMBER_IMC_CHN 3
#define ICX_IMC_MEM_STRIDE 0x4
/* SPR */
@@ -5076,8 +5076,10 @@ static struct event_constraint icx_uncore_iio_constraints[] = {
UNCORE_EVENT_CONSTRAINT(0x02, 0x3),
UNCORE_EVENT_CONSTRAINT(0x03, 0x3),
UNCORE_EVENT_CONSTRAINT(0x83, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x88, 0xc),
UNCORE_EVENT_CONSTRAINT(0xc0, 0xc),
UNCORE_EVENT_CONSTRAINT(0xc5, 0xc),
+ UNCORE_EVENT_CONSTRAINT(0xd5, 0xc),
EVENT_CONSTRAINT_END
};
@@ -5463,7 +5465,7 @@ static struct intel_uncore_ops icx_uncore_mmio_ops = {
static struct intel_uncore_type icx_uncore_imc = {
.name = "imc",
.num_counters = 4,
- .num_boxes = 8,
+ .num_boxes = 12,
.perf_ctr_bits = 48,
.fixed_ctr_bits = 48,
.fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
@@ -5647,6 +5649,7 @@ static struct intel_uncore_type spr_uncore_chabox = {
.event_mask = SPR_CHA_PMON_EVENT_MASK,
.event_mask_ext = SPR_RAW_EVENT_MASK_EXT,
.num_shared_regs = 1,
+ .constraints = skx_uncore_chabox_constraints,
.ops = &spr_uncore_chabox_ops,
.format_group = &spr_uncore_chabox_format_group,
.attr_update = uncore_alias_groups,
@@ -5658,6 +5661,7 @@ static struct intel_uncore_type spr_uncore_iio = {
.event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT,
.format_group = &snr_uncore_iio_format_group,
.attr_update = uncore_alias_groups,
+ .constraints = icx_uncore_iio_constraints,
};
static struct attribute *spr_uncore_raw_formats_attr[] = {
@@ -5686,9 +5690,16 @@ static struct intel_uncore_type spr_uncore_irp = {
};
+static struct event_constraint spr_uncore_m2pcie_constraints[] = {
+ UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
+ UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
+ EVENT_CONSTRAINT_END
+};
+
static struct intel_uncore_type spr_uncore_m2pcie = {
SPR_UNCORE_COMMON_FORMAT(),
.name = "m2pcie",
+ .constraints = spr_uncore_m2pcie_constraints,
};
static struct intel_uncore_type spr_uncore_pcu = {
@@ -5765,6 +5776,7 @@ static struct intel_uncore_type spr_uncore_upi = {
static struct intel_uncore_type spr_uncore_m3upi = {
SPR_UNCORE_PCI_COMMON_FORMAT(),
.name = "m3upi",
+ .constraints = icx_uncore_m3upi_constraints,
};
static struct intel_uncore_type spr_uncore_mdf = {
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index c853b28efa33..96c775abe31f 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -68,6 +68,7 @@ static bool test_intel(int idx, void *data)
case INTEL_FAM6_BROADWELL_D:
case INTEL_FAM6_BROADWELL_G:
case INTEL_FAM6_BROADWELL_X:
+ case INTEL_FAM6_SAPPHIRERAPIDS_X:
case INTEL_FAM6_ATOM_SILVERMONT:
case INTEL_FAM6_ATOM_SILVERMONT_D:
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index e3ac05c97b5e..7b5167db3e78 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -14,6 +14,7 @@
#include <linux/perf_event.h>
+#include <asm/fpu/xstate.h>
#include <asm/intel_ds.h>
#include <asm/cpu.h>
@@ -726,6 +727,7 @@ struct x86_pmu {
void (*enable_all)(int added);
void (*enable)(struct perf_event *);
void (*disable)(struct perf_event *);
+ void (*assign)(struct perf_event *event, int idx);
void (*add)(struct perf_event *);
void (*del)(struct perf_event *);
void (*read)(struct perf_event *event);
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 5e3d9b7fd5fb..c9c3859322fa 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -24,7 +24,6 @@
#include <linux/syscalls.h>
#include <asm/ucontext.h>
#include <linux/uaccess.h>
-#include <asm/fpu/internal.h>
#include <asm/fpu/signal.h>
#include <asm/ptrace.h>
#include <asm/ia32_unistd.h>
@@ -57,8 +56,8 @@ static inline void reload_segments(struct sigcontext_32 *sc)
/*
* Do a signal return; undo the signal stack.
*/
-static int ia32_restore_sigcontext(struct pt_regs *regs,
- struct sigcontext_32 __user *usc)
+static bool ia32_restore_sigcontext(struct pt_regs *regs,
+ struct sigcontext_32 __user *usc)
{
struct sigcontext_32 sc;
@@ -66,7 +65,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
current->restart_block.fn = do_no_restart_syscall;
if (unlikely(copy_from_user(&sc, usc, sizeof(sc))))
- return -EFAULT;
+ return false;
/* Get only the ia32 registers. */
regs->bx = sc.bx;
@@ -111,7 +110,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn)
set_current_blocked(&set);
- if (ia32_restore_sigcontext(regs, &frame->sc))
+ if (!ia32_restore_sigcontext(regs, &frame->sc))
goto badframe;
return regs->ax;
@@ -135,7 +134,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
set_current_blocked(&set);
- if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext))
+ if (!ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext))
goto badframe;
if (compat_restore_altstack(&frame->uc.uc_stack))
@@ -220,8 +219,8 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
*fpstate = (struct _fpstate_32 __user *) sp;
- if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned,
- math_size) < 0)
+ if (!copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned,
+ math_size))
return (void __user *) -1L;
sp -= frame_size;
diff --git a/arch/x86/include/asm/GEN-for-each-reg.h b/arch/x86/include/asm/GEN-for-each-reg.h
index 1b07fb102c4e..07949102a08d 100644
--- a/arch/x86/include/asm/GEN-for-each-reg.h
+++ b/arch/x86/include/asm/GEN-for-each-reg.h
@@ -1,11 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * These are in machine order; things rely on that.
+ */
#ifdef CONFIG_64BIT
GEN(rax)
-GEN(rbx)
GEN(rcx)
GEN(rdx)
+GEN(rbx)
+GEN(rsp)
+GEN(rbp)
GEN(rsi)
GEN(rdi)
-GEN(rbp)
GEN(r8)
GEN(r9)
GEN(r10)
@@ -16,10 +21,11 @@ GEN(r14)
GEN(r15)
#else
GEN(eax)
-GEN(ebx)
GEN(ecx)
GEN(edx)
+GEN(ebx)
+GEN(esp)
+GEN(ebp)
GEN(esi)
GEN(edi)
-GEN(ebp)
#endif
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index a3c2315aca12..58eee6402832 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -75,6 +75,7 @@ extern int alternatives_patched;
extern void alternative_instructions(void);
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+extern void apply_retpolines(s32 *start, s32 *end);
struct module;
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 4cb726c71ed8..8f80de627c60 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -17,21 +17,3 @@
extern void cmpxchg8b_emu(void);
#endif
-#ifdef CONFIG_RETPOLINE
-
-#undef GEN
-#define GEN(reg) \
- extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
-#include <asm/GEN-for-each-reg.h>
-
-#undef GEN
-#define GEN(reg) \
- extern asmlinkage void __x86_indirect_alt_call_ ## reg (void);
-#include <asm/GEN-for-each-reg.h>
-
-#undef GEN
-#define GEN(reg) \
- extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void);
-#include <asm/GEN-for-each-reg.h>
-
-#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 3ad3da9a7d97..3a168483bc8e 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -6,11 +6,13 @@
# define __ASM_FORM(x, ...) x,## __VA_ARGS__
# define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__
# define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__,
+# define __ASM_REGPFX %
#else
#include <linux/stringify.h>
# define __ASM_FORM(x, ...) " " __stringify(x,##__VA_ARGS__) " "
# define __ASM_FORM_RAW(x, ...) __stringify(x,##__VA_ARGS__)
# define __ASM_FORM_COMMA(x, ...) " " __stringify(x,##__VA_ARGS__) ","
+# define __ASM_REGPFX %%
#endif
#define _ASM_BYTES(x, ...) __ASM_FORM(.byte x,##__VA_ARGS__ ;)
@@ -49,6 +51,9 @@
#define _ASM_SI __ASM_REG(si)
#define _ASM_DI __ASM_REG(di)
+/* Adds a (%rip) suffix on 64 bits only; for immediate memory references */
+#define _ASM_RIP(x) __ASM_SEL_RAW(x, x (__ASM_REGPFX rip))
+
#ifndef __x86_64__
/* 32 bit */
@@ -122,28 +127,19 @@
#ifdef __KERNEL__
+# include <asm/extable_fixup_types.h>
+
/* Exception table entry */
#ifdef __ASSEMBLY__
-# define _ASM_EXTABLE_HANDLE(from, to, handler) \
+
+# define _ASM_EXTABLE_TYPE(from, to, type) \
.pushsection "__ex_table","a" ; \
.balign 4 ; \
.long (from) - . ; \
.long (to) - . ; \
- .long (handler) - . ; \
+ .long type ; \
.popsection
-# define _ASM_EXTABLE(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
-
-# define _ASM_EXTABLE_UA(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
-
-# define _ASM_EXTABLE_CPY(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
-
-# define _ASM_EXTABLE_FAULT(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
-
# ifdef CONFIG_KPROBES
# define _ASM_NOKPROBE(entry) \
.pushsection "_kprobe_blacklist","aw" ; \
@@ -155,27 +151,15 @@
# endif
#else /* ! __ASSEMBLY__ */
-# define _EXPAND_EXTABLE_HANDLE(x) #x
-# define _ASM_EXTABLE_HANDLE(from, to, handler) \
+
+# define _ASM_EXTABLE_TYPE(from, to, type) \
" .pushsection \"__ex_table\",\"a\"\n" \
" .balign 4\n" \
" .long (" #from ") - .\n" \
" .long (" #to ") - .\n" \
- " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \
+ " .long " __stringify(type) " \n" \
" .popsection\n"
-# define _ASM_EXTABLE(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
-
-# define _ASM_EXTABLE_UA(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
-
-# define _ASM_EXTABLE_CPY(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
-
-# define _ASM_EXTABLE_FAULT(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
-
/* For C file, we already have NOKPROBE_SYMBOL macro */
/*
@@ -188,6 +172,17 @@ register unsigned long current_stack_pointer asm(_ASM_SP);
#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
#endif /* __ASSEMBLY__ */
-#endif /* __KERNEL__ */
+#define _ASM_EXTABLE(from, to) \
+ _ASM_EXTABLE_TYPE(from, to, EX_TYPE_DEFAULT)
+#define _ASM_EXTABLE_UA(from, to) \
+ _ASM_EXTABLE_TYPE(from, to, EX_TYPE_UACCESS)
+
+#define _ASM_EXTABLE_CPY(from, to) \
+ _ASM_EXTABLE_TYPE(from, to, EX_TYPE_COPY)
+
+#define _ASM_EXTABLE_FAULT(from, to) \
+ _ASM_EXTABLE_TYPE(from, to, EX_TYPE_FAULT)
+
+#endif /* __KERNEL__ */
#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 16a51e7288d5..1261842d006c 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -173,20 +173,25 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
* means that the boot_cpu_has() variant is already fast enough for the
* majority of cases and you should stick to using it as it is generally
* only two instructions: a RIP-relative MOV and a TEST.
+ *
+ * Do not use an "m" constraint for [cap_byte] here: gcc doesn't know
+ * that this is only used on a fallback path and will sometimes cause
+ * it to manifest the address of boot_cpu_data in a register, fouling
+ * the mainline (post-initialization) code.
*/
static __always_inline bool _static_cpu_has(u16 bit)
{
asm_volatile_goto(
ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
- ".section .altinstr_aux,\"ax\"\n"
+ ".pushsection .altinstr_aux,\"ax\"\n"
"6:\n"
- " testb %[bitnum],%[cap_byte]\n"
+ " testb %[bitnum]," _ASM_RIP(%P[cap_byte]) "\n"
" jnz %l[t_yes]\n"
" jmp %l[t_no]\n"
- ".previous\n"
+ ".popsection\n"
: : [feature] "i" (bit),
[bitnum] "i" (1 << (bit & 7)),
- [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+ [cap_byte] "i" (&((const char *)boot_cpu_data.x86_capability)[bit >> 3])
: : t_yes, t_no);
t_yes:
return true;
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index d0ce5cfd3ac1..d5b5f2ab87a0 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -277,6 +277,7 @@
#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */
#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
+#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */
/*
* Extended auxiliary flags: Linux defined - for features scattered in various
@@ -298,6 +299,7 @@
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
+#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
index 1f0cbc52937c..93f400eb728f 100644
--- a/arch/x86/include/asm/extable.h
+++ b/arch/x86/include/asm/extable.h
@@ -1,12 +1,18 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_EXTABLE_H
#define _ASM_X86_EXTABLE_H
+
+#include <asm/extable_fixup_types.h>
+
/*
- * The exception table consists of triples of addresses relative to the
- * exception table entry itself. The first address is of an instruction
- * that is allowed to fault, the second is the target at which the program
- * should continue. The third is a handler function to deal with the fault
- * caused by the instruction in the first field.
+ * The exception table consists of two addresses relative to the
+ * exception table entry itself and a type selector field.
+ *
+ * The first address is of an instruction that is allowed to fault, the
+ * second is the target at which the program should continue.
+ *
+ * The type entry is used by fixup_exception() to select the handler to
+ * deal with the fault caused by the instruction in the first field.
*
* All the routines below use bits of fixup code that are out of line
* with the main instruction path. This means when everything is well,
@@ -15,7 +21,7 @@
*/
struct exception_table_entry {
- int insn, fixup, handler;
+ int insn, fixup, type;
};
struct pt_regs;
@@ -25,21 +31,27 @@ struct pt_regs;
do { \
(a)->fixup = (b)->fixup + (delta); \
(b)->fixup = (tmp).fixup - (delta); \
- (a)->handler = (b)->handler + (delta); \
- (b)->handler = (tmp).handler - (delta); \
+ (a)->type = (b)->type; \
+ (b)->type = (tmp).type; \
} while (0)
-enum handler_type {
- EX_HANDLER_NONE,
- EX_HANDLER_FAULT,
- EX_HANDLER_UACCESS,
- EX_HANDLER_OTHER
-};
-
extern int fixup_exception(struct pt_regs *regs, int trapnr,
unsigned long error_code, unsigned long fault_addr);
extern int fixup_bug(struct pt_regs *regs, int trapnr);
-extern enum handler_type ex_get_fault_handler_type(unsigned long ip);
+extern int ex_get_fixup_type(unsigned long ip);
extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
+#ifdef CONFIG_X86_MCE
+extern void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr);
+#else
+static inline void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) { }
+#endif
+
+#if defined(CONFIG_BPF_JIT) && defined(CONFIG_X86_64)
+bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs);
+#else
+static inline bool ex_handler_bpf(const struct exception_table_entry *x,
+ struct pt_regs *regs) { return false; }
+#endif
+
#endif
diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h
new file mode 100644
index 000000000000..409524d5d2eb
--- /dev/null
+++ b/arch/x86/include/asm/extable_fixup_types.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_EXTABLE_FIXUP_TYPES_H
+#define _ASM_X86_EXTABLE_FIXUP_TYPES_H
+
+#define EX_TYPE_NONE 0
+#define EX_TYPE_DEFAULT 1
+#define EX_TYPE_FAULT 2
+#define EX_TYPE_UACCESS 3
+#define EX_TYPE_COPY 4
+#define EX_TYPE_CLEAR_FS 5
+#define EX_TYPE_FPU_RESTORE 6
+#define EX_TYPE_WRMSR 7
+#define EX_TYPE_RDMSR 8
+#define EX_TYPE_BPF 9
+
+#define EX_TYPE_WRMSR_IN_MCE 10
+#define EX_TYPE_RDMSR_IN_MCE 11
+
+#define EX_TYPE_DEFAULT_MCE_SAFE 12
+#define EX_TYPE_FAULT_MCE_SAFE 13
+
+#endif
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 23bef08a8388..b7267b9e452f 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -12,6 +12,8 @@
#define _ASM_X86_FPU_API_H
#include <linux/bottom_half.h>
+#include <asm/fpu/types.h>
+
/*
* Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It
* disables preemption so be careful if you intend to use it for long periods
@@ -48,9 +50,9 @@ static inline void kernel_fpu_begin(void)
}
/*
- * Use fpregs_lock() while editing CPU's FPU registers or fpu->state.
+ * Use fpregs_lock() while editing CPU's FPU registers or fpu->fpstate.
* A context switch will (and softirq might) save CPU's FPU registers to
- * fpu->state and set TIF_NEED_FPU_LOAD leaving CPU's FPU registers in
+ * fpu->fpstate.regs and set TIF_NEED_FPU_LOAD leaving CPU's FPU registers in
* a random state.
*
* local_bh_disable() protects against both preemption and soft interrupts
@@ -108,4 +110,56 @@ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
static inline void update_pasid(void) { }
+/* Trap handling */
+extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
+extern void fpu_sync_fpstate(struct fpu *fpu);
+extern void fpu_reset_from_exception_fixup(void);
+
+/* Boot, hotplug and resume */
+extern void fpu__init_cpu(void);
+extern void fpu__init_system(struct cpuinfo_x86 *c);
+extern void fpu__init_check_bugs(void);
+extern void fpu__resume_cpu(void);
+
+#ifdef CONFIG_MATH_EMULATION
+extern void fpstate_init_soft(struct swregs_state *soft);
+#else
+static inline void fpstate_init_soft(struct swregs_state *soft) {}
+#endif
+
+/* State tracking */
+DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
+
+/* Process cleanup */
+#ifdef CONFIG_X86_64
+extern void fpstate_free(struct fpu *fpu);
+#else
+static inline void fpstate_free(struct fpu *fpu) { }
+#endif
+
+/* fpstate-related functions which are exported to KVM */
+extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature);
+
+/* KVM specific functions */
+extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu);
+extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu);
+extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest);
+
+extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru);
+extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru);
+
+static inline void fpstate_set_confidential(struct fpu_guest *gfpu)
+{
+ gfpu->fpstate->is_confidential = true;
+}
+
+static inline bool fpstate_is_confidential(struct fpu_guest *gfpu)
+{
+ return gfpu->fpstate->is_confidential;
+}
+
+/* prctl */
+struct task_struct;
+extern long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2);
+
#endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 5a18694a89b2..e69de29bb2d1 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -1,540 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 1994 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- * Gareth Hughes <gareth@valinux.com>, May 2000
- * x86-64 work by Andi Kleen 2002
- */
-
-#ifndef _ASM_X86_FPU_INTERNAL_H
-#define _ASM_X86_FPU_INTERNAL_H
-
-#include <linux/compat.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-
-#include <asm/user.h>
-#include <asm/fpu/api.h>
-#include <asm/fpu/xstate.h>
-#include <asm/fpu/xcr.h>
-#include <asm/cpufeature.h>
-#include <asm/trace/fpu.h>
-
-/*
- * High level FPU state handling functions:
- */
-extern int fpu__restore_sig(void __user *buf, int ia32_frame);
-extern void fpu__drop(struct fpu *fpu);
-extern void fpu__clear_user_states(struct fpu *fpu);
-extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
-
-extern void fpu_sync_fpstate(struct fpu *fpu);
-
-/* Clone and exit operations */
-extern int fpu_clone(struct task_struct *dst);
-extern void fpu_flush_thread(void);
-
-/*
- * Boot time FPU initialization functions:
- */
-extern void fpu__init_cpu(void);
-extern void fpu__init_system_xstate(void);
-extern void fpu__init_cpu_xstate(void);
-extern void fpu__init_system(struct cpuinfo_x86 *c);
-extern void fpu__init_check_bugs(void);
-extern void fpu__resume_cpu(void);
-
-/*
- * Debugging facility:
- */
-#ifdef CONFIG_X86_DEBUG_FPU
-# define WARN_ON_FPU(x) WARN_ON_ONCE(x)
-#else
-# define WARN_ON_FPU(x) ({ (void)(x); 0; })
-#endif
-
-/*
- * FPU related CPU feature flag helper routines:
- */
-static __always_inline __pure bool use_xsaveopt(void)
-{
- return static_cpu_has(X86_FEATURE_XSAVEOPT);
-}
-
-static __always_inline __pure bool use_xsave(void)
-{
- return static_cpu_has(X86_FEATURE_XSAVE);
-}
-
-static __always_inline __pure bool use_fxsr(void)
-{
- return static_cpu_has(X86_FEATURE_FXSR);
-}
-
-/*
- * fpstate handling functions:
- */
-
-extern union fpregs_state init_fpstate;
-
-extern void fpstate_init(union fpregs_state *state);
-#ifdef CONFIG_MATH_EMULATION
-extern void fpstate_init_soft(struct swregs_state *soft);
-#else
-static inline void fpstate_init_soft(struct swregs_state *soft) {}
-#endif
-extern void save_fpregs_to_fpstate(struct fpu *fpu);
-
-/* Returns 0 or the negated trap number, which results in -EFAULT for #PF */
-#define user_insn(insn, output, input...) \
-({ \
- int err; \
- \
- might_fault(); \
- \
- asm volatile(ASM_STAC "\n" \
- "1: " #insn "\n" \
- "2: " ASM_CLAC "\n" \
- ".section .fixup,\"ax\"\n" \
- "3: negl %%eax\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE_FAULT(1b, 3b) \
- : [err] "=a" (err), output \
- : "0"(0), input); \
- err; \
-})
-
-#define kernel_insn_err(insn, output, input...) \
-({ \
- int err; \
- asm volatile("1:" #insn "\n\t" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl $-1,%[err]\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE(1b, 3b) \
- : [err] "=r" (err), output \
- : "0"(0), input); \
- err; \
-})
-
-#define kernel_insn(insn, output, input...) \
- asm volatile("1:" #insn "\n\t" \
- "2:\n" \
- _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \
- : output : input)
-
-static inline int fnsave_to_user_sigframe(struct fregs_state __user *fx)
-{
- return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx));
-}
-
-static inline int fxsave_to_user_sigframe(struct fxregs_state __user *fx)
-{
- if (IS_ENABLED(CONFIG_X86_32))
- return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
- else
- return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
-
-}
-
-static inline void fxrstor(struct fxregs_state *fx)
-{
- if (IS_ENABLED(CONFIG_X86_32))
- kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
- else
- kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
-}
-
-static inline int fxrstor_safe(struct fxregs_state *fx)
-{
- if (IS_ENABLED(CONFIG_X86_32))
- return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
- else
- return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
-}
-
-static inline int fxrstor_from_user_sigframe(struct fxregs_state __user *fx)
-{
- if (IS_ENABLED(CONFIG_X86_32))
- return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
- else
- return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
-}
-
-static inline void frstor(struct fregs_state *fx)
-{
- kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-}
-
-static inline int frstor_safe(struct fregs_state *fx)
-{
- return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-}
-
-static inline int frstor_from_user_sigframe(struct fregs_state __user *fx)
-{
- return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-}
-
-static inline void fxsave(struct fxregs_state *fx)
-{
- if (IS_ENABLED(CONFIG_X86_32))
- asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx));
- else
- asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx));
-}
-
-/* These macros all use (%edi)/(%rdi) as the single memory argument. */
-#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
-#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
-#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
-#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
-#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
-
-/*
- * After this @err contains 0 on success or the negated trap number when
- * the operation raises an exception. For faults this results in -EFAULT.
- */
-#define XSTATE_OP(op, st, lmask, hmask, err) \
- asm volatile("1:" op "\n\t" \
- "xor %[err], %[err]\n" \
- "2:\n\t" \
- ".pushsection .fixup,\"ax\"\n\t" \
- "3: negl %%eax\n\t" \
- "jmp 2b\n\t" \
- ".popsection\n\t" \
- _ASM_EXTABLE_FAULT(1b, 3b) \
- : [err] "=a" (err) \
- : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
- : "memory")
-
-/*
- * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact
- * format and supervisor states in addition to modified optimization in
- * XSAVEOPT.
- *
- * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
- * supports modified optimization which is not supported by XSAVE.
- *
- * We use XSAVE as a fallback.
- *
- * The 661 label is defined in the ALTERNATIVE* macros as the address of the
- * original instruction which gets replaced. We need to use it here as the
- * address of the instruction where we might get an exception at.
- */
-#define XSTATE_XSAVE(st, lmask, hmask, err) \
- asm volatile(ALTERNATIVE_2(XSAVE, \
- XSAVEOPT, X86_FEATURE_XSAVEOPT, \
- XSAVES, X86_FEATURE_XSAVES) \
- "\n" \
- "xor %[err], %[err]\n" \
- "3:\n" \
- ".pushsection .fixup,\"ax\"\n" \
- "4: movl $-2, %[err]\n" \
- "jmp 3b\n" \
- ".popsection\n" \
- _ASM_EXTABLE(661b, 4b) \
- : [err] "=r" (err) \
- : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
- : "memory")
-
-/*
- * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
- * XSAVE area format.
- */
-#define XSTATE_XRESTORE(st, lmask, hmask) \
- asm volatile(ALTERNATIVE(XRSTOR, \
- XRSTORS, X86_FEATURE_XSAVES) \
- "\n" \
- "3:\n" \
- _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\
- : \
- : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
- : "memory")
-
-/*
- * This function is called only during boot time when x86 caps are not set
- * up and alternative can not be used yet.
- */
-static inline void os_xrstor_booting(struct xregs_state *xstate)
-{
- u64 mask = xfeatures_mask_fpstate();
- u32 lmask = mask;
- u32 hmask = mask >> 32;
- int err;
-
- WARN_ON(system_state != SYSTEM_BOOTING);
-
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
- else
- XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
-
- /*
- * We should never fault when copying from a kernel buffer, and the FPU
- * state we set at boot time should be valid.
- */
- WARN_ON_FPU(err);
-}
-
-/*
- * Save processor xstate to xsave area.
- *
- * Uses either XSAVE or XSAVEOPT or XSAVES depending on the CPU features
- * and command line options. The choice is permanent until the next reboot.
- */
-static inline void os_xsave(struct xregs_state *xstate)
-{
- u64 mask = xfeatures_mask_all;
- u32 lmask = mask;
- u32 hmask = mask >> 32;
- int err;
-
- WARN_ON_FPU(!alternatives_patched);
-
- XSTATE_XSAVE(xstate, lmask, hmask, err);
-
- /* We should never fault when copying to a kernel buffer: */
- WARN_ON_FPU(err);
-}
-
-/*
- * Restore processor xstate from xsave area.
- *
- * Uses XRSTORS when XSAVES is used, XRSTOR otherwise.
- */
-static inline void os_xrstor(struct xregs_state *xstate, u64 mask)
-{
- u32 lmask = mask;
- u32 hmask = mask >> 32;
-
- XSTATE_XRESTORE(xstate, lmask, hmask);
-}
-
-/*
- * Save xstate to user space xsave area.
- *
- * We don't use modified optimization because xrstor/xrstors might track
- * a different application.
- *
- * We don't use compacted format xsave area for
- * backward compatibility for old applications which don't understand
- * compacted format of xsave area.
- */
-static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
-{
- /*
- * Include the features which are not xsaved/rstored by the kernel
- * internally, e.g. PKRU. That's user space ABI and also required
- * to allow the signal handler to modify PKRU.
- */
- u64 mask = xfeatures_mask_uabi();
- u32 lmask = mask;
- u32 hmask = mask >> 32;
- int err;
-
- /*
- * Clear the xsave header first, so that reserved fields are
- * initialized to zero.
- */
- err = __clear_user(&buf->header, sizeof(buf->header));
- if (unlikely(err))
- return -EFAULT;
-
- stac();
- XSTATE_OP(XSAVE, buf, lmask, hmask, err);
- clac();
-
- return err;
-}
-
-/*
- * Restore xstate from user space xsave area.
- */
-static inline int xrstor_from_user_sigframe(struct xregs_state __user *buf, u64 mask)
-{
- struct xregs_state *xstate = ((__force struct xregs_state *)buf);
- u32 lmask = mask;
- u32 hmask = mask >> 32;
- int err;
-
- stac();
- XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
- clac();
-
- return err;
-}
-
-/*
- * Restore xstate from kernel space xsave area, return an error code instead of
- * an exception.
- */
-static inline int os_xrstor_safe(struct xregs_state *xstate, u64 mask)
-{
- u32 lmask = mask;
- u32 hmask = mask >> 32;
- int err;
-
- if (cpu_feature_enabled(X86_FEATURE_XSAVES))
- XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
- else
- XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
-
- return err;
-}
-
-extern void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask);
-
-static inline void restore_fpregs_from_fpstate(union fpregs_state *fpstate)
-{
- __restore_fpregs_from_fpstate(fpstate, xfeatures_mask_fpstate());
-}
-
-extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
-
-/*
- * FPU context switch related helper methods:
- */
-
-DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
-
-/*
- * The in-register FPU state for an FPU context on a CPU is assumed to be
- * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx
- * matches the FPU.
- *
- * If the FPU register state is valid, the kernel can skip restoring the
- * FPU state from memory.
- *
- * Any code that clobbers the FPU registers or updates the in-memory
- * FPU state for a task MUST let the rest of the kernel know that the
- * FPU registers are no longer valid for this task.
- *
- * Either one of these invalidation functions is enough. Invalidate
- * a resource you control: CPU if using the CPU for something else
- * (with preemption disabled), FPU for the current task, or a task that
- * is prevented from running by the current task.
- */
-static inline void __cpu_invalidate_fpregs_state(void)
-{
- __this_cpu_write(fpu_fpregs_owner_ctx, NULL);
-}
-
-static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
-{
- fpu->last_cpu = -1;
-}
-
-static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
-{
- return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
-}
-
-/*
- * These generally need preemption protection to work,
- * do try to avoid using these on their own:
- */
-static inline void fpregs_deactivate(struct fpu *fpu)
-{
- this_cpu_write(fpu_fpregs_owner_ctx, NULL);
- trace_x86_fpu_regs_deactivated(fpu);
-}
-
-static inline void fpregs_activate(struct fpu *fpu)
-{
- this_cpu_write(fpu_fpregs_owner_ctx, fpu);
- trace_x86_fpu_regs_activated(fpu);
-}
-
-/* Internal helper for switch_fpu_return() and signal frame setup */
-static inline void fpregs_restore_userregs(void)
-{
- struct fpu *fpu = &current->thread.fpu;
- int cpu = smp_processor_id();
-
- if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
- return;
-
- if (!fpregs_state_valid(fpu, cpu)) {
- u64 mask;
-
- /*
- * This restores _all_ xstate which has not been
- * established yet.
- *
- * If PKRU is enabled, then the PKRU value is already
- * correct because it was either set in switch_to() or in
- * flush_thread(). So it is excluded because it might be
- * not up to date in current->thread.fpu.xsave state.
- */
- mask = xfeatures_mask_restore_user() |
- xfeatures_mask_supervisor();
- __restore_fpregs_from_fpstate(&fpu->state, mask);
-
- fpregs_activate(fpu);
- fpu->last_cpu = cpu;
- }
- clear_thread_flag(TIF_NEED_FPU_LOAD);
-}
-
-/*
- * FPU state switching for scheduling.
- *
- * This is a two-stage process:
- *
- * - switch_fpu_prepare() saves the old state.
- * This is done within the context of the old process.
- *
- * - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state
- * will get loaded on return to userspace, or when the kernel needs it.
- *
- * If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers
- * are saved in the current thread's FPU register state.
- *
- * If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not
- * hold current()'s FPU registers. It is required to load the
- * registers before returning to userland or using the content
- * otherwise.
- *
- * The FPU context is only stored/restored for a user task and
- * PF_KTHREAD is used to distinguish between kernel and user threads.
- */
-static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
-{
- if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) {
- save_fpregs_to_fpstate(old_fpu);
- /*
- * The save operation preserved register state, so the
- * fpu_fpregs_owner_ctx is still @old_fpu. Store the
- * current CPU number in @old_fpu, so the next return
- * to user space can avoid the FPU register restore
- * when is returns on the same CPU and still owns the
- * context.
- */
- old_fpu->last_cpu = cpu;
-
- trace_x86_fpu_regs_deactivated(old_fpu);
- }
-}
-
-/*
- * Misc helper functions:
- */
-
-/*
- * Delay loading of the complete FPU state until the return to userland.
- * PKRU is handled separately.
- */
-static inline void switch_fpu_finish(struct fpu *new_fpu)
-{
- if (cpu_feature_enabled(X86_FEATURE_FPU))
- set_thread_flag(TIF_NEED_FPU_LOAD);
-}
-
-#endif /* _ASM_X86_FPU_INTERNAL_H */
diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h
new file mode 100644
index 000000000000..99a8820e8cc4
--- /dev/null
+++ b/arch/x86/include/asm/fpu/sched.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_FPU_SCHED_H
+#define _ASM_X86_FPU_SCHED_H
+
+#include <linux/sched.h>
+
+#include <asm/cpufeature.h>
+#include <asm/fpu/types.h>
+
+#include <asm/trace/fpu.h>
+
+extern void save_fpregs_to_fpstate(struct fpu *fpu);
+extern void fpu__drop(struct fpu *fpu);
+extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags);
+extern void fpu_flush_thread(void);
+
+/*
+ * FPU state switching for scheduling.
+ *
+ * This is a two-stage process:
+ *
+ * - switch_fpu_prepare() saves the old state.
+ * This is done within the context of the old process.
+ *
+ * - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state
+ * will get loaded on return to userspace, or when the kernel needs it.
+ *
+ * If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers
+ * are saved in the current thread's FPU register state.
+ *
+ * If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not
+ * hold current()'s FPU registers. It is required to load the
+ * registers before returning to userland or using the content
+ * otherwise.
+ *
+ * The FPU context is only stored/restored for a user task and
+ * PF_KTHREAD is used to distinguish between kernel and user threads.
+ */
+static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
+{
+ if (cpu_feature_enabled(X86_FEATURE_FPU) &&
+ !(current->flags & PF_KTHREAD)) {
+ save_fpregs_to_fpstate(old_fpu);
+ /*
+ * The save operation preserved register state, so the
+ * fpu_fpregs_owner_ctx is still @old_fpu. Store the
+ * current CPU number in @old_fpu, so the next return
+ * to user space can avoid the FPU register restore
+ * when is returns on the same CPU and still owns the
+ * context.
+ */
+ old_fpu->last_cpu = cpu;
+
+ trace_x86_fpu_regs_deactivated(old_fpu);
+ }
+}
+
+/*
+ * Delay loading of the complete FPU state until the return to userland.
+ * PKRU is handled separately.
+ */
+static inline void switch_fpu_finish(void)
+{
+ if (cpu_feature_enabled(X86_FEATURE_FPU))
+ set_thread_flag(TIF_NEED_FPU_LOAD);
+}
+
+#endif /* _ASM_X86_FPU_SCHED_H */
diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
index 8b6631dffefd..22b0273a8bf1 100644
--- a/arch/x86/include/asm/fpu/signal.h
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -5,6 +5,11 @@
#ifndef _ASM_X86_FPU_SIGNAL_H
#define _ASM_X86_FPU_SIGNAL_H
+#include <linux/compat.h>
+#include <linux/user.h>
+
+#include <asm/fpu/types.h>
+
#ifdef CONFIG_X86_64
# include <uapi/asm/sigcontext.h>
# include <asm/user32.h>
@@ -31,6 +36,12 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
unsigned long fpu__get_fpstate_size(void);
-extern void fpu__init_prepare_fx_sw_frame(void);
+extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
+extern void fpu__clear_user_states(struct fpu *fpu);
+extern bool fpu__restore_sig(void __user *buf, int ia32_frame);
+
+extern void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask);
+
+extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
#endif /* _ASM_X86_FPU_SIGNAL_H */
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index f5a38a5f3ae1..3c06c82ab355 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -120,6 +120,9 @@ enum xfeature {
XFEATURE_RSRVD_COMP_13,
XFEATURE_RSRVD_COMP_14,
XFEATURE_LBR,
+ XFEATURE_RSRVD_COMP_16,
+ XFEATURE_XTILE_CFG,
+ XFEATURE_XTILE_DATA,
XFEATURE_MAX,
};
@@ -136,12 +139,21 @@ enum xfeature {
#define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU)
#define XFEATURE_MASK_PASID (1 << XFEATURE_PASID)
#define XFEATURE_MASK_LBR (1 << XFEATURE_LBR)
+#define XFEATURE_MASK_XTILE_CFG (1 << XFEATURE_XTILE_CFG)
+#define XFEATURE_MASK_XTILE_DATA (1 << XFEATURE_XTILE_DATA)
#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \
| XFEATURE_MASK_ZMM_Hi256 \
| XFEATURE_MASK_Hi16_ZMM)
+#ifdef CONFIG_X86_64
+# define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILE_DATA \
+ | XFEATURE_MASK_XTILE_CFG)
+#else
+# define XFEATURE_MASK_XTILE (0)
+#endif
+
#define FIRST_EXTENDED_XFEATURE XFEATURE_YMM
struct reg_128_bit {
@@ -153,6 +165,9 @@ struct reg_256_bit {
struct reg_512_bit {
u8 regbytes[512/8];
};
+struct reg_1024_byte {
+ u8 regbytes[1024];
+};
/*
* State component 2:
@@ -255,6 +270,23 @@ struct arch_lbr_state {
u64 ler_to;
u64 ler_info;
struct lbr_entry entries[];
+};
+
+/*
+ * State component 17: 64-byte tile configuration register.
+ */
+struct xtile_cfg {
+ u64 tcfg[8];
+} __packed;
+
+/*
+ * State component 18: 1KB tile data register.
+ * Each register represents 16 64-byte rows of the matrix
+ * data. But the number of registers depends on the actual
+ * implementation.
+ */
+struct xtile_data {
+ struct reg_1024_byte tmm;
} __packed;
/*
@@ -309,6 +341,91 @@ union fpregs_state {
u8 __padding[PAGE_SIZE];
};
+struct fpstate {
+ /* @kernel_size: The size of the kernel register image */
+ unsigned int size;
+
+ /* @user_size: The size in non-compacted UABI format */
+ unsigned int user_size;
+
+ /* @xfeatures: xfeatures for which the storage is sized */
+ u64 xfeatures;
+
+ /* @user_xfeatures: xfeatures valid in UABI buffers */
+ u64 user_xfeatures;
+
+ /* @xfd: xfeatures disabled to trap userspace use. */
+ u64 xfd;
+
+ /* @is_valloc: Indicator for dynamically allocated state */
+ unsigned int is_valloc : 1;
+
+ /* @is_guest: Indicator for guest state (KVM) */
+ unsigned int is_guest : 1;
+
+ /*
+ * @is_confidential: Indicator for KVM confidential mode.
+ * The FPU registers are restored by the
+ * vmentry firmware from encrypted guest
+ * memory. On vmexit the FPU registers are
+ * saved by firmware to encrypted guest memory
+ * and the registers are scrubbed before
+ * returning to the host. So there is no
+ * content which is worth saving and restoring.
+ * The fpstate has to be there so that
+ * preemption and softirq FPU usage works
+ * without special casing.
+ */
+ unsigned int is_confidential : 1;
+
+ /* @in_use: State is in use */
+ unsigned int in_use : 1;
+
+ /* @regs: The register state union for all supported formats */
+ union fpregs_state regs;
+
+ /* @regs is dynamically sized! Don't add anything after @regs! */
+} __aligned(64);
+
+struct fpu_state_perm {
+ /*
+ * @__state_perm:
+ *
+ * This bitmap indicates the permission for state components, which
+ * are available to a thread group. The permission prctl() sets the
+ * enabled state bits in thread_group_leader()->thread.fpu.
+ *
+ * All run time operations use the per thread information in the
+ * currently active fpu.fpstate which contains the xfeature masks
+ * and sizes for kernel and user space.
+ *
+ * This master permission field is only to be used when
+ * task.fpu.fpstate based checks fail to validate whether the task
+ * is allowed to expand it's xfeatures set which requires to
+ * allocate a larger sized fpstate buffer.
+ *
+ * Do not access this field directly. Use the provided helper
+ * function. Unlocked access is possible for quick checks.
+ */
+ u64 __state_perm;
+
+ /*
+ * @__state_size:
+ *
+ * The size required for @__state_perm. Only valid to access
+ * with sighand locked.
+ */
+ unsigned int __state_size;
+
+ /*
+ * @__user_state_size:
+ *
+ * The size required for @__state_perm user part. Only valid to
+ * access with sighand locked.
+ */
+ unsigned int __user_state_size;
+};
+
/*
* Highest level per task FPU state data structure that
* contains the FPU register state plus various FPU
@@ -337,19 +454,100 @@ struct fpu {
unsigned long avx512_timestamp;
/*
- * @state:
+ * @fpstate:
+ *
+ * Pointer to the active struct fpstate. Initialized to
+ * point at @__fpstate below.
+ */
+ struct fpstate *fpstate;
+
+ /*
+ * @__task_fpstate:
+ *
+ * Pointer to an inactive struct fpstate. Initialized to NULL. Is
+ * used only for KVM support to swap out the regular task fpstate.
+ */
+ struct fpstate *__task_fpstate;
+
+ /*
+ * @perm:
+ *
+ * Permission related information
+ */
+ struct fpu_state_perm perm;
+
+ /*
+ * @__fpstate:
*
- * In-memory copy of all FPU registers that we save/restore
- * over context switches. If the task is using the FPU then
- * the registers in the FPU are more recent than this state
- * copy. If the task context-switches away then they get
- * saved here and represent the FPU state.
+ * Initial in-memory storage for FPU registers which are saved in
+ * context switch and when the kernel uses the FPU. The registers
+ * are restored from this storage on return to user space if they
+ * are not longer containing the tasks FPU register state.
*/
- union fpregs_state state;
+ struct fpstate __fpstate;
/*
- * WARNING: 'state' is dynamically-sized. Do not put
+ * WARNING: '__fpstate' is dynamically-sized. Do not put
* anything after it here.
*/
};
+/*
+ * Guest pseudo FPU container
+ */
+struct fpu_guest {
+ /*
+ * @fpstate: Pointer to the allocated guest fpstate
+ */
+ struct fpstate *fpstate;
+};
+
+/*
+ * FPU state configuration data. Initialized at boot time. Read only after init.
+ */
+struct fpu_state_config {
+ /*
+ * @max_size:
+ *
+ * The maximum size of the register state buffer. Includes all
+ * supported features except independent managed features.
+ */
+ unsigned int max_size;
+
+ /*
+ * @default_size:
+ *
+ * The default size of the register state buffer. Includes all
+ * supported features except independent managed features and
+ * features which have to be requested by user space before usage.
+ */
+ unsigned int default_size;
+
+ /*
+ * @max_features:
+ *
+ * The maximum supported features bitmap. Does not include
+ * independent managed features.
+ */
+ u64 max_features;
+
+ /*
+ * @default_features:
+ *
+ * The default supported features bitmap. Does not include
+ * independent managed features and features which have to
+ * be requested by user space before usage.
+ */
+ u64 default_features;
+ /*
+ * @legacy_features:
+ *
+ * Features which can be reported back to user space
+ * even without XSAVE support, i.e. legacy features FP + SSE
+ */
+ u64 legacy_features;
+};
+
+/* FPU state configuration information */
+extern struct fpu_state_config fpu_kernel_cfg, fpu_user_cfg;
+
#endif /* _ASM_X86_FPU_H */
diff --git a/arch/x86/include/asm/fpu/xcr.h b/arch/x86/include/asm/fpu/xcr.h
index 1c7ab8d95da5..79f95d3787e2 100644
--- a/arch/x86/include/asm/fpu/xcr.h
+++ b/arch/x86/include/asm/fpu/xcr.h
@@ -2,17 +2,6 @@
#ifndef _ASM_X86_FPU_XCR_H
#define _ASM_X86_FPU_XCR_H
-/*
- * MXCSR and XCR definitions:
- */
-
-static inline void ldmxcsr(u32 mxcsr)
-{
- asm volatile("ldmxcsr %0" :: "m" (mxcsr));
-}
-
-extern unsigned int mxcsr_feature_mask;
-
#define XCR_XFEATURE_ENABLED_MASK 0x00000000
static inline u64 xgetbv(u32 index)
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 109dfcc75299..0f8b90ab18c9 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -14,6 +14,8 @@
#define XSTATE_CPUID 0x0000000d
+#define TILE_CPUID 0x0000001d
+
#define FXSAVE_SIZE 512
#define XSAVE_HDR_SIZE 64
@@ -33,7 +35,8 @@
XFEATURE_MASK_Hi16_ZMM | \
XFEATURE_MASK_PKRU | \
XFEATURE_MASK_BNDREGS | \
- XFEATURE_MASK_BNDCSR)
+ XFEATURE_MASK_BNDCSR | \
+ XFEATURE_MASK_XTILE)
/*
* Features which are restored when returning to user space.
@@ -43,6 +46,9 @@
#define XFEATURE_MASK_USER_RESTORE \
(XFEATURE_MASK_USER_SUPPORTED & ~XFEATURE_MASK_PKRU)
+/* Features which are dynamically enabled for a process on request */
+#define XFEATURE_MASK_USER_DYNAMIC XFEATURE_MASK_XTILE_DATA
+
/* All currently supported supervisor features */
#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID)
@@ -78,78 +84,42 @@
XFEATURE_MASK_INDEPENDENT | \
XFEATURE_MASK_SUPERVISOR_UNSUPPORTED)
-#ifdef CONFIG_X86_64
-#define REX_PREFIX "0x48, "
-#else
-#define REX_PREFIX
-#endif
-
-extern u64 xfeatures_mask_all;
-
-static inline u64 xfeatures_mask_supervisor(void)
-{
- return xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_SUPPORTED;
-}
-
/*
- * The xfeatures which are enabled in XCR0 and expected to be in ptrace
- * buffers and signal frames.
+ * The feature mask required to restore FPU state:
+ * - All user states which are not eagerly switched in switch_to()/exec()
+ * - The suporvisor states
*/
-static inline u64 xfeatures_mask_uabi(void)
-{
- return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED;
-}
-
-/*
- * The xfeatures which are restored by the kernel when returning to user
- * mode. This is not necessarily the same as xfeatures_mask_uabi() as the
- * kernel does not manage all XCR0 enabled features via xsave/xrstor as
- * some of them have to be switched eagerly on context switch and exec().
- */
-static inline u64 xfeatures_mask_restore_user(void)
-{
- return xfeatures_mask_all & XFEATURE_MASK_USER_RESTORE;
-}
-
-/*
- * Like xfeatures_mask_restore_user() but additionally restors the
- * supported supervisor states.
- */
-static inline u64 xfeatures_mask_fpstate(void)
-{
- return xfeatures_mask_all & \
- (XFEATURE_MASK_USER_RESTORE | XFEATURE_MASK_SUPERVISOR_SUPPORTED);
-}
-
-static inline u64 xfeatures_mask_independent(void)
-{
- if (!boot_cpu_has(X86_FEATURE_ARCH_LBR))
- return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR;
-
- return XFEATURE_MASK_INDEPENDENT;
-}
+#define XFEATURE_MASK_FPSTATE (XFEATURE_MASK_USER_RESTORE | \
+ XFEATURE_MASK_SUPERVISOR_SUPPORTED)
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
extern void __init update_regset_xstate_info(unsigned int size,
u64 xstate_mask);
-void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
int xfeature_size(int xfeature_nr);
-int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
-int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
void xsaves(struct xregs_state *xsave, u64 mask);
void xrstors(struct xregs_state *xsave, u64 mask);
-enum xstate_copy_mode {
- XSTATE_COPY_FP,
- XSTATE_COPY_FX,
- XSTATE_COPY_XSAVE,
-};
+int xfd_enable_feature(u64 xfd_err);
+
+#ifdef CONFIG_X86_64
+DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic);
+#endif
+
+#ifdef CONFIG_X86_64
+DECLARE_STATIC_KEY_FALSE(__fpu_state_size_dynamic);
-struct membuf;
-void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
- enum xstate_copy_mode mode);
+static __always_inline __pure bool fpu_state_size_dynamic(void)
+{
+ return static_branch_unlikely(&__fpu_state_size_dynamic);
+}
+#else
+static __always_inline __pure bool fpu_state_size_dynamic(void)
+{
+ return false;
+}
+#endif
#endif
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 2c5f7861d373..fada857f0a1e 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -68,6 +68,6 @@ extern void ia32_pick_mmap_layout(struct mm_struct *mm);
#endif
-#endif /* !CONFIG_IA32_SUPPORT */
+#endif /* CONFIG_IA32_EMULATION */
#endif /* _ASM_X86_IA32_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 841a5d104afa..5c6a4af0b911 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -391,6 +391,7 @@ extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size)
#define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc
#endif
+#ifdef CONFIG_AMD_MEM_ENCRYPT
extern bool arch_memremap_can_ram_remap(resource_size_t offset,
unsigned long size,
unsigned long flags);
@@ -398,6 +399,13 @@ extern bool arch_memremap_can_ram_remap(resource_size_t offset,
extern bool phys_mem_access_encrypted(unsigned long phys_addr,
unsigned long size);
+#else
+static inline bool phys_mem_access_encrypted(unsigned long phys_addr,
+ unsigned long size)
+{
+ return true;
+}
+#endif
/**
* iosubmit_cmds512 - copy data to single MMIO location, in 512-bit units
diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
index 562854c60808..7dcc2b0a4f09 100644
--- a/arch/x86/include/asm/irq_stack.h
+++ b/arch/x86/include/asm/irq_stack.h
@@ -58,7 +58,7 @@
* the output constraints to make the compiler aware that R11 cannot be
* reused after the asm() statement.
*
- * For builds with CONFIG_UNWIND_FRAME_POINTER ASM_CALL_CONSTRAINT is
+ * For builds with CONFIG_UNWINDER_FRAME_POINTER, ASM_CALL_CONSTRAINT is
* required as well as this prevents certain creative GCC variants from
* misplacing the ASM code.
*
@@ -185,6 +185,7 @@
IRQ_CONSTRAINTS, regs, vector); \
}
+#ifndef CONFIG_PREEMPT_RT
#define ASM_CALL_SOFTIRQ \
"call %P[__func] \n"
@@ -201,6 +202,8 @@
__this_cpu_write(hardirq_stack_inuse, false); \
}
+#endif
+
#else /* CONFIG_X86_64 */
/* System vector handlers always run on the stack they interrupted. */
#define run_sysvec_on_irqstack_cond(func, regs) \
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 0a6e34b07017..11b7c06e2828 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -129,7 +129,7 @@ relocate_kernel(unsigned long indirection_page,
unsigned long page_list,
unsigned long start_address,
unsigned int preserve_context,
- unsigned int sme_active);
+ unsigned int host_mem_enc_active);
#endif
#define ARCH_HAS_KIMAGE_ARCH
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f8f48a7ec577..32f300dade5e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -691,18 +691,18 @@ struct kvm_vcpu_arch {
*
* Note that while the PKRU state lives inside the fpu registers,
* it is switched out separately at VMENTER and VMEXIT time. The
- * "guest_fpu" state here contains the guest FPU context, with the
+ * "guest_fpstate" state here contains the guest FPU context, with the
* host PRKU bits.
*/
- struct fpu *user_fpu;
- struct fpu *guest_fpu;
+ struct fpu_guest guest_fpu;
u64 xcr0;
u64 guest_supported_xcr0;
struct kvm_pio_request pio;
void *pio_data;
- void *guest_ins_data;
+ void *sev_pio_data;
+ unsigned sev_pio_count;
u8 event_exit_inst_len;
@@ -1097,7 +1097,7 @@ struct kvm_arch {
u64 cur_tsc_generation;
int nr_vcpus_matched_tsc;
- spinlock_t pvclock_gtod_sync_lock;
+ raw_spinlock_t pvclock_gtod_sync_lock;
bool use_master_clock;
u64 master_kernel_ns;
u64 master_cycle_now;
@@ -1685,8 +1685,6 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
int reason, bool has_error_code, u32 error_code);
-void kvm_free_guest_fpu(struct kvm_vcpu *vcpu);
-
void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0);
void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4);
int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index da9321548f6f..813b4f5b0dd6 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -205,28 +205,16 @@ struct cper_ia_proc_ctx;
int mcheck_init(void);
void mcheck_cpu_init(struct cpuinfo_x86 *c);
void mcheck_cpu_clear(struct cpuinfo_x86 *c);
-void mcheck_vendor_init_severity(void);
int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
u64 lapic_id);
#else
static inline int mcheck_init(void) { return 0; }
static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {}
-static inline void mcheck_vendor_init_severity(void) {}
static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
u64 lapic_id) { return -EINVAL; }
#endif
-#ifdef CONFIG_X86_ANCIENT_MCE
-void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
-void winchip_mcheck_init(struct cpuinfo_x86 *c);
-static inline void enable_p5_mce(void) { mce_p5_enabled = 1; }
-#else
-static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
-static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
-static inline void enable_p5_mce(void) {}
-#endif
-
void mce_setup(struct mce *m);
void mce_log(struct mce *m);
DECLARE_PER_CPU(struct device *, mce_device);
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 9c80c68d75b5..2d4f5c17d79c 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -13,6 +13,7 @@
#ifndef __ASSEMBLY__
#include <linux/init.h>
+#include <linux/cc_platform.h>
#include <asm/bootparam.h>
@@ -50,9 +51,6 @@ void __init mem_encrypt_free_decrypted_mem(void);
void __init mem_encrypt_init(void);
void __init sev_es_init_vc_handling(void);
-bool sme_active(void);
-bool sev_active(void);
-bool sev_es_active(void);
#define __bss_decrypted __section(".bss..decrypted")
@@ -75,9 +73,6 @@ static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
static inline void __init sme_enable(struct boot_params *bp) { }
static inline void sev_es_init_vc_handling(void) { }
-static inline bool sme_active(void) { return false; }
-static inline bool sev_active(void) { return false; }
-static inline bool sev_es_active(void) { return false; }
static inline int __init
early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; }
@@ -101,11 +96,6 @@ static inline void mem_encrypt_free_decrypted_mem(void) { }
extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
-static inline bool mem_encrypt_active(void)
-{
- return sme_me_mask;
-}
-
static inline u64 sme_get_me_mask(void)
{
return sme_me_mask;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index a7c413432b33..01e2650b9585 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -625,6 +625,8 @@
#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc
+#define MSR_IA32_XFD 0x000001c4
+#define MSR_IA32_XFD_ERR 0x000001c5
#define MSR_IA32_XSS 0x00000da0
#define MSR_IA32_APICBASE 0x0000001b
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index a3f87f1015d3..6b52182e178a 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -92,7 +92,7 @@ static __always_inline unsigned long long __rdmsr(unsigned int msr)
asm volatile("1: rdmsr\n"
"2:\n"
- _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe)
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR)
: EAX_EDX_RET(val, low, high) : "c" (msr));
return EAX_EDX_VAL(val, low, high);
@@ -102,7 +102,7 @@ static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high)
{
asm volatile("1: wrmsr\n"
"2:\n"
- _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe)
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
: : "c" (msr), "a"(low), "d" (high) : "memory");
}
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index ec2d5c8c6694..cc74dc584836 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -5,12 +5,15 @@
#include <linux/static_key.h>
#include <linux/objtool.h>
+#include <linux/linkage.h>
#include <asm/alternative.h>
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
#include <asm/unwind_hints.h>
+#define RETPOLINE_THUNK_SIZE 32
+
/*
* Fill the CPU return stack buffer.
*
@@ -118,6 +121,16 @@
".popsection\n\t"
#ifdef CONFIG_RETPOLINE
+
+typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
+
+#define GEN(reg) \
+ extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+
+extern retpoline_thunk_t __x86_indirect_thunk_array[];
+
#ifdef CONFIG_X86_64
/*
@@ -303,63 +316,4 @@ static inline void mds_idle_clear_cpu_buffers(void)
#endif /* __ASSEMBLY__ */
-/*
- * Below is used in the eBPF JIT compiler and emits the byte sequence
- * for the following assembly:
- *
- * With retpolines configured:
- *
- * callq do_rop
- * spec_trap:
- * pause
- * lfence
- * jmp spec_trap
- * do_rop:
- * mov %rcx,(%rsp) for x86_64
- * mov %edx,(%esp) for x86_32
- * retq
- *
- * Without retpolines configured:
- *
- * jmp *%rcx for x86_64
- * jmp *%edx for x86_32
- */
-#ifdef CONFIG_RETPOLINE
-# ifdef CONFIG_X86_64
-# define RETPOLINE_RCX_BPF_JIT_SIZE 17
-# define RETPOLINE_RCX_BPF_JIT() \
-do { \
- EMIT1_off32(0xE8, 7); /* callq do_rop */ \
- /* spec_trap: */ \
- EMIT2(0xF3, 0x90); /* pause */ \
- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
- /* do_rop: */ \
- EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */ \
- EMIT1(0xC3); /* retq */ \
-} while (0)
-# else /* !CONFIG_X86_64 */
-# define RETPOLINE_EDX_BPF_JIT() \
-do { \
- EMIT1_off32(0xE8, 7); /* call do_rop */ \
- /* spec_trap: */ \
- EMIT2(0xF3, 0x90); /* pause */ \
- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
- /* do_rop: */ \
- EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \
- EMIT1(0xC3); /* ret */ \
-} while (0)
-# endif
-#else /* !CONFIG_RETPOLINE */
-# ifdef CONFIG_X86_64
-# define RETPOLINE_RCX_BPF_JIT_SIZE 2
-# define RETPOLINE_RCX_BPF_JIT() \
- EMIT2(0xFF, 0xE1); /* jmp *%rcx */
-# else /* !CONFIG_X86_64 */
-# define RETPOLINE_EDX_BPF_JIT() \
- EMIT2(0xFF, 0xE2) /* jmp *%edx */
-# endif
-#endif
-
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
index 94dbd51df58f..b13f8488ac85 100644
--- a/arch/x86/include/asm/page_32.h
+++ b/arch/x86/include/asm/page_32.h
@@ -43,7 +43,7 @@ static inline void copy_page(void *to, void *from)
{
memcpy(to, from, PAGE_SIZE);
}
-#endif /* CONFIG_X86_3DNOW */
+#endif /* CONFIG_X86_USE_3DNOW */
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_PAGE_32_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index da3a1ac82be5..cebec95a7124 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -52,11 +52,11 @@ void __init paravirt_set_cap(void);
/* The paravirtualized I/O functions */
static inline void slow_down_io(void)
{
- pv_ops.cpu.io_delay();
+ PVOP_VCALL0(cpu.io_delay);
#ifdef REALLY_SLOW_IO
- pv_ops.cpu.io_delay();
- pv_ops.cpu.io_delay();
- pv_ops.cpu.io_delay();
+ PVOP_VCALL0(cpu.io_delay);
+ PVOP_VCALL0(cpu.io_delay);
+ PVOP_VCALL0(cpu.io_delay);
#endif
}
@@ -113,12 +113,12 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
/*
* These special macros can be used to get or set a debugging register
*/
-static inline unsigned long paravirt_get_debugreg(int reg)
+static __always_inline unsigned long paravirt_get_debugreg(int reg)
{
return PVOP_CALL1(unsigned long, cpu.get_debugreg, reg);
}
#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg)
-static inline void set_debugreg(unsigned long val, int reg)
+static __always_inline void set_debugreg(unsigned long val, int reg)
{
PVOP_VCALL2(cpu.set_debugreg, reg, val);
}
@@ -133,14 +133,14 @@ static inline void write_cr0(unsigned long x)
PVOP_VCALL1(cpu.write_cr0, x);
}
-static inline unsigned long read_cr2(void)
+static __always_inline unsigned long read_cr2(void)
{
return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2,
"mov %%cr2, %%rax;",
ALT_NOT(X86_FEATURE_XENPV));
}
-static inline void write_cr2(unsigned long x)
+static __always_inline void write_cr2(unsigned long x)
{
PVOP_VCALL1(mmu.write_cr2, x);
}
@@ -653,10 +653,10 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
* functions.
*/
#define PV_THUNK_NAME(func) "__raw_callee_save_" #func
-#define PV_CALLEE_SAVE_REGS_THUNK(func) \
+#define __PV_CALLEE_SAVE_REGS_THUNK(func, section) \
extern typeof(func) __raw_callee_save_##func; \
\
- asm(".pushsection .text;" \
+ asm(".pushsection " section ", \"ax\";" \
".globl " PV_THUNK_NAME(func) ";" \
".type " PV_THUNK_NAME(func) ", @function;" \
PV_THUNK_NAME(func) ":" \
@@ -669,6 +669,9 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \
".popsection")
+#define PV_CALLEE_SAVE_REGS_THUNK(func) \
+ __PV_CALLEE_SAVE_REGS_THUNK(func, ".text")
+
/* Get a reference to a callee-save function */
#define PV_CALLEE_SAVE(func) \
((struct paravirt_callee_save) { __raw_callee_save_##func })
@@ -678,23 +681,23 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
((struct paravirt_callee_save) { func })
#ifdef CONFIG_PARAVIRT_XXL
-static inline notrace unsigned long arch_local_save_flags(void)
+static __always_inline unsigned long arch_local_save_flags(void)
{
return PVOP_ALT_CALLEE0(unsigned long, irq.save_fl, "pushf; pop %%rax;",
ALT_NOT(X86_FEATURE_XENPV));
}
-static inline notrace void arch_local_irq_disable(void)
+static __always_inline void arch_local_irq_disable(void)
{
PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT(X86_FEATURE_XENPV));
}
-static inline notrace void arch_local_irq_enable(void)
+static __always_inline void arch_local_irq_enable(void)
{
PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT(X86_FEATURE_XENPV));
}
-static inline notrace unsigned long arch_local_irq_save(void)
+static __always_inline unsigned long arch_local_irq_save(void)
{
unsigned long f;
diff --git a/arch/x86/include/asm/pkru.h b/arch/x86/include/asm/pkru.h
index ccc539faa5bb..4cd49afa0ca4 100644
--- a/arch/x86/include/asm/pkru.h
+++ b/arch/x86/include/asm/pkru.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_PKRU_H
#define _ASM_X86_PKRU_H
-#include <asm/fpu/xstate.h>
+#include <asm/cpufeature.h>
#define PKRU_AD_BIT 0x1
#define PKRU_WD_BIT 0x2
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 9ad2acaaae9b..887380fc9dfe 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -164,7 +164,8 @@ enum cpuid_regs_idx {
#define X86_VENDOR_NSC 8
#define X86_VENDOR_HYGON 9
#define X86_VENDOR_ZHAOXIN 10
-#define X86_VENDOR_NUM 11
+#define X86_VENDOR_VORTEX 11
+#define X86_VENDOR_NUM 12
#define X86_VENDOR_UNKNOWN 0xff
@@ -461,9 +462,6 @@ DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
#endif /* !X86_64 */
-extern unsigned int fpu_kernel_xstate_size;
-extern unsigned int fpu_user_xstate_size;
-
struct perf_event;
struct thread_struct {
@@ -537,12 +535,12 @@ struct thread_struct {
*/
};
-/* Whitelist the FPU state from the task_struct for hardened usercopy. */
+extern void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size);
+
static inline void arch_thread_struct_whitelist(unsigned long *offset,
unsigned long *size)
{
- *offset = offsetof(struct thread_struct, fpu.state);
- *size = fpu_kernel_xstate_size;
+ fpu_thread_struct_whitelist(offset, size);
}
static inline void
@@ -589,7 +587,7 @@ static inline void load_sp0(unsigned long sp0)
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
/*
* Generic CPUID function
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 8c5d1910a848..feed36d44d04 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -40,6 +40,6 @@ void x86_report_nx(void);
extern int reboot_force;
long do_arch_prctl_common(struct task_struct *task, int option,
- unsigned long cpuid_enabled);
+ unsigned long arg2);
#endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index b94f615600d5..703663175a5a 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -181,7 +181,7 @@ static inline bool any_64bit_mode(struct pt_regs *regs)
#define current_user_stack_pointer() current_pt_regs()->sp
#define compat_user_stack_pointer() current_pt_regs()->sp
-static inline bool ip_within_syscall_gap(struct pt_regs *regs)
+static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs)
{
bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack);
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 72044026eb3c..8dd8e8ec9fa5 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -339,7 +339,7 @@ static inline void __loadsegment_fs(unsigned short value)
"1: movw %0, %%fs \n"
"2: \n"
- _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_clear_fs)
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_CLEAR_FS)
: : "rm" (value) : "memory");
}
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 630ff08532be..08b0e90623ad 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -16,7 +16,9 @@ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
/* cpus sharing the last level cache: */
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
+DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
+DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id);
DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
static inline struct cpumask *cpu_llc_shared_mask(int cpu)
@@ -24,6 +26,11 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu)
return per_cpu(cpu_llc_shared_map, cpu);
}
+static inline struct cpumask *cpu_l2c_shared_mask(int cpu)
+{
+ return per_cpu(cpu_l2c_shared_map, cpu);
+}
+
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 9239399e5491..cc164777e661 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -103,6 +103,7 @@ static inline void setup_node_to_cpumask_map(void) { }
#include <asm-generic/topology.h>
extern const struct cpumask *cpu_coregroup_mask(int cpu);
+extern const struct cpumask *cpu_clustergroup_mask(int cpu);
#define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id)
#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id)
@@ -113,7 +114,9 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
extern unsigned int __max_die_per_package;
#ifdef CONFIG_SMP
+#define topology_cluster_id(cpu) (per_cpu(cpu_l2c_id, cpu))
#define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu))
+#define topology_cluster_cpumask(cpu) (cpu_clustergroup_mask(cpu))
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 879b77792f94..4645a6334063 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -22,8 +22,8 @@ DECLARE_EVENT_CLASS(x86_fpu,
__entry->fpu = fpu;
__entry->load_fpu = test_thread_flag(TIF_NEED_FPU_LOAD);
if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
- __entry->xfeatures = fpu->state.xsave.header.xfeatures;
- __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv;
+ __entry->xfeatures = fpu->fpstate->regs.xsave.header.xfeatures;
+ __entry->xcomp_bv = fpu->fpstate->regs.xsave.header.xcomp_bv;
}
),
TP_printk("x86/fpu: %p load: %d xfeatures: %llx xcomp_bv: %llx",
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 5c95d242f38d..33a68407def3 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -411,7 +411,7 @@ do { \
: [umem] "m" (__m(addr)), \
[efault] "i" (-EFAULT), "0" (err))
-#endif // CONFIG_CC_ASM_GOTO_OUTPUT
+#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT
/* FIXME: this hack is definitely wrong -AK */
struct __large_struct { unsigned long buf[100]; };
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 454b20815f35..4a7ff8b0db20 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -308,13 +308,13 @@ HYPERVISOR_platform_op(struct xen_platform_op *op)
return _hypercall1(int, platform_op, op);
}
-static inline int
+static __always_inline int
HYPERVISOR_set_debugreg(int reg, unsigned long value)
{
return _hypercall2(int, set_debugreg, reg, value);
}
-static inline unsigned long
+static __always_inline unsigned long
HYPERVISOR_get_debugreg(int reg)
{
return _hypercall1(unsigned long, get_debugreg, reg);
@@ -358,7 +358,7 @@ HYPERVISOR_event_channel_op(int cmd, void *arg)
return _hypercall2(int, event_channel_op, cmd, arg);
}
-static inline int
+static __always_inline int
HYPERVISOR_xen_version(int cmd, void *arg)
{
return _hypercall2(int, xen_version, cmd, arg);
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 5a6aac9fa41f..754a07856817 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -10,6 +10,10 @@
#define ARCH_GET_CPUID 0x1011
#define ARCH_SET_CPUID 0x1012
+#define ARCH_GET_XCOMP_SUPP 0x1021
+#define ARCH_GET_XCOMP_PERM 0x1022
+#define ARCH_REQ_XCOMP_PERM 0x1023
+
#define ARCH_MAP_VDSO_X32 0x2001
#define ARCH_MAP_VDSO_32 0x2002
#define ARCH_MAP_VDSO_64 0x2003
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8f4e8fa6ed75..2ff3e600f426 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -21,6 +21,7 @@ CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
CFLAGS_REMOVE_head64.o = -pg
CFLAGS_REMOVE_sev.o = -pg
+CFLAGS_REMOVE_cc_platform.o = -pg
endif
KASAN_SANITIZE_head$(BITS).o := n
@@ -29,6 +30,7 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n
KASAN_SANITIZE_stacktrace.o := n
KASAN_SANITIZE_paravirt.o := n
KASAN_SANITIZE_sev.o := n
+KASAN_SANITIZE_cc_platform.o := n
# With some compiler versions the generated code results in boot hangs, caused
# by several compilation units. To be safe, disable all instrumentation.
@@ -47,6 +49,7 @@ endif
KCOV_INSTRUMENT := n
CFLAGS_head$(BITS).o += -fno-stack-protector
+CFLAGS_cc_platform.o += -fno-stack-protector
CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace
@@ -147,6 +150,9 @@ obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o
+
+obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += cc_platform.o
+
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index e9da3dc71254..23fb4d51a5da 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -29,6 +29,7 @@
#include <asm/io.h>
#include <asm/fixmap.h>
#include <asm/paravirt.h>
+#include <asm/asm-prototypes.h>
int __read_mostly alternatives_patched;
@@ -113,6 +114,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
}
}
+extern s32 __retpoline_sites[], __retpoline_sites_end[];
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[];
void text_poke_early(void *addr, const void *opcode, size_t len);
@@ -221,7 +223,7 @@ static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
* "noinline" to cause control flow change and thus invalidate I$ and
* cause refetch after modification.
*/
-static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
+static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
{
struct insn insn;
int i = 0;
@@ -239,11 +241,11 @@ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *ins
* optimized.
*/
if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
- i += optimize_nops_range(instr, a->instrlen, i);
+ i += optimize_nops_range(instr, len, i);
else
i += insn.length;
- if (i >= a->instrlen)
+ if (i >= len)
return;
}
}
@@ -331,10 +333,185 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
text_poke_early(instr, insn_buff, insn_buff_sz);
next:
- optimize_nops(a, instr);
+ optimize_nops(instr, a->instrlen);
}
}
+#if defined(CONFIG_RETPOLINE) && defined(CONFIG_STACK_VALIDATION)
+
+/*
+ * CALL/JMP *%\reg
+ */
+static int emit_indirect(int op, int reg, u8 *bytes)
+{
+ int i = 0;
+ u8 modrm;
+
+ switch (op) {
+ case CALL_INSN_OPCODE:
+ modrm = 0x10; /* Reg = 2; CALL r/m */
+ break;
+
+ case JMP32_INSN_OPCODE:
+ modrm = 0x20; /* Reg = 4; JMP r/m */
+ break;
+
+ default:
+ WARN_ON_ONCE(1);
+ return -1;
+ }
+
+ if (reg >= 8) {
+ bytes[i++] = 0x41; /* REX.B prefix */
+ reg -= 8;
+ }
+
+ modrm |= 0xc0; /* Mod = 3 */
+ modrm += reg;
+
+ bytes[i++] = 0xff; /* opcode */
+ bytes[i++] = modrm;
+
+ return i;
+}
+
+/*
+ * Rewrite the compiler generated retpoline thunk calls.
+ *
+ * For spectre_v2=off (!X86_FEATURE_RETPOLINE), rewrite them into immediate
+ * indirect instructions, avoiding the extra indirection.
+ *
+ * For example, convert:
+ *
+ * CALL __x86_indirect_thunk_\reg
+ *
+ * into:
+ *
+ * CALL *%\reg
+ *
+ * It also tries to inline spectre_v2=retpoline,amd when size permits.
+ */
+static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
+{
+ retpoline_thunk_t *target;
+ int reg, ret, i = 0;
+ u8 op, cc;
+
+ target = addr + insn->length + insn->immediate.value;
+ reg = target - __x86_indirect_thunk_array;
+
+ if (WARN_ON_ONCE(reg & ~0xf))
+ return -1;
+
+ /* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */
+ BUG_ON(reg == 4);
+
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
+ !cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD))
+ return -1;
+
+ op = insn->opcode.bytes[0];
+
+ /*
+ * Convert:
+ *
+ * Jcc.d32 __x86_indirect_thunk_\reg
+ *
+ * into:
+ *
+ * Jncc.d8 1f
+ * [ LFENCE ]
+ * JMP *%\reg
+ * [ NOP ]
+ * 1:
+ */
+ /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
+ if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) {
+ cc = insn->opcode.bytes[1] & 0xf;
+ cc ^= 1; /* invert condition */
+
+ bytes[i++] = 0x70 + cc; /* Jcc.d8 */
+ bytes[i++] = insn->length - 2; /* sizeof(Jcc.d8) == 2 */
+
+ /* Continue as if: JMP.d32 __x86_indirect_thunk_\reg */
+ op = JMP32_INSN_OPCODE;
+ }
+
+ /*
+ * For RETPOLINE_AMD: prepend the indirect CALL/JMP with an LFENCE.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
+ bytes[i++] = 0x0f;
+ bytes[i++] = 0xae;
+ bytes[i++] = 0xe8; /* LFENCE */
+ }
+
+ ret = emit_indirect(op, reg, bytes + i);
+ if (ret < 0)
+ return ret;
+ i += ret;
+
+ for (; i < insn->length;)
+ bytes[i++] = BYTES_NOP1;
+
+ return i;
+}
+
+/*
+ * Generated by 'objtool --retpoline'.
+ */
+void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
+{
+ s32 *s;
+
+ for (s = start; s < end; s++) {
+ void *addr = (void *)s + *s;
+ struct insn insn;
+ int len, ret;
+ u8 bytes[16];
+ u8 op1, op2;
+
+ ret = insn_decode_kernel(&insn, addr);
+ if (WARN_ON_ONCE(ret < 0))
+ continue;
+
+ op1 = insn.opcode.bytes[0];
+ op2 = insn.opcode.bytes[1];
+
+ switch (op1) {
+ case CALL_INSN_OPCODE:
+ case JMP32_INSN_OPCODE:
+ break;
+
+ case 0x0f: /* escape */
+ if (op2 >= 0x80 && op2 <= 0x8f)
+ break;
+ fallthrough;
+ default:
+ WARN_ON_ONCE(1);
+ continue;
+ }
+
+ DPRINTK("retpoline at: %pS (%px) len: %d to: %pS",
+ addr, addr, insn.length,
+ addr + insn.length + insn.immediate.value);
+
+ len = patch_retpoline(addr, &insn, bytes);
+ if (len == insn.length) {
+ optimize_nops(bytes, len);
+ DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
+ DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
+ text_poke_early(addr, bytes, len);
+ }
+ }
+}
+
+#else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */
+
+void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
+
+#endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */
+
#ifdef CONFIG_SMP
static void alternatives_smp_lock(const s32 *start, const s32 *end,
u8 *text, u8 *text_end)
@@ -643,6 +820,12 @@ void __init alternative_instructions(void)
apply_paravirt(__parainstructions, __parainstructions_end);
/*
+ * Rewrite the retpolines, must be done before alternatives since
+ * those can rewrite the retpoline thunks.
+ */
+ apply_retpolines(__retpoline_sites, __retpoline_sites_end);
+
+ /*
* Then patch alternatives, such that those paravirt calls that are in
* alternatives can be overwritten by their immediate fragments.
*/
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index f4da9bb69a88..e696e22d0531 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -15,9 +15,15 @@ struct cluster_mask {
struct cpumask mask;
};
-static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
+/*
+ * __x2apic_send_IPI_mask() possibly needs to read
+ * x86_cpu_to_logical_apicid for all online cpus in a sequential way.
+ * Using per cpu variable would cost one cache line per cpu.
+ */
+static u32 *x86_cpu_to_logical_apicid __read_mostly;
+
static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);
-static DEFINE_PER_CPU(struct cluster_mask *, cluster_masks);
+static DEFINE_PER_CPU_READ_MOSTLY(struct cluster_mask *, cluster_masks);
static struct cluster_mask *cluster_hotplug_mask;
static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
@@ -27,7 +33,7 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
static void x2apic_send_IPI(int cpu, int vector)
{
- u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
+ u32 dest = x86_cpu_to_logical_apicid[cpu];
/* x2apic MSRs are special and need a special fence: */
weak_wrmsr_fence();
@@ -58,7 +64,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
dest = 0;
for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask)
- dest |= per_cpu(x86_cpu_to_logical_apicid, clustercpu);
+ dest |= x86_cpu_to_logical_apicid[clustercpu];
if (!dest)
continue;
@@ -94,7 +100,7 @@ static void x2apic_send_IPI_all(int vector)
static u32 x2apic_calc_apicid(unsigned int cpu)
{
- return per_cpu(x86_cpu_to_logical_apicid, cpu);
+ return x86_cpu_to_logical_apicid[cpu];
}
static void init_x2apic_ldr(void)
@@ -103,7 +109,7 @@ static void init_x2apic_ldr(void)
u32 cluster, apicid = apic_read(APIC_LDR);
unsigned int cpu;
- this_cpu_write(x86_cpu_to_logical_apicid, apicid);
+ x86_cpu_to_logical_apicid[smp_processor_id()] = apicid;
if (cmsk)
goto update;
@@ -166,12 +172,21 @@ static int x2apic_dead_cpu(unsigned int dead_cpu)
static int x2apic_cluster_probe(void)
{
+ u32 slots;
+
if (!x2apic_mode)
return 0;
+ slots = max_t(u32, L1_CACHE_BYTES/sizeof(u32), nr_cpu_ids);
+ x86_cpu_to_logical_apicid = kcalloc(slots, sizeof(u32), GFP_KERNEL);
+ if (!x86_cpu_to_logical_apicid)
+ return 0;
+
if (cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare",
x2apic_prepare_cpu, x2apic_dead_cpu) < 0) {
pr_err("Failed to register X2APIC_PREPARE\n");
+ kfree(x86_cpu_to_logical_apicid);
+ x86_cpu_to_logical_apicid = NULL;
return 0;
}
init_x2apic_ldr();
diff --git a/arch/x86/kernel/cc_platform.c b/arch/x86/kernel/cc_platform.c
new file mode 100644
index 000000000000..03bb2f343ddb
--- /dev/null
+++ b/arch/x86/kernel/cc_platform.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Confidential Computing Platform Capability checks
+ *
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ */
+
+#include <linux/export.h>
+#include <linux/cc_platform.h>
+#include <linux/mem_encrypt.h>
+
+#include <asm/processor.h>
+
+static bool __maybe_unused intel_cc_platform_has(enum cc_attr attr)
+{
+#ifdef CONFIG_INTEL_TDX_GUEST
+ return false;
+#else
+ return false;
+#endif
+}
+
+/*
+ * SME and SEV are very similar but they are not the same, so there are
+ * times that the kernel will need to distinguish between SME and SEV. The
+ * cc_platform_has() function is used for this. When a distinction isn't
+ * needed, the CC_ATTR_MEM_ENCRYPT attribute can be used.
+ *
+ * The trampoline code is a good example for this requirement. Before
+ * paging is activated, SME will access all memory as decrypted, but SEV
+ * will access all memory as encrypted. So, when APs are being brought
+ * up under SME the trampoline area cannot be encrypted, whereas under SEV
+ * the trampoline area must be encrypted.
+ */
+static bool amd_cc_platform_has(enum cc_attr attr)
+{
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ switch (attr) {
+ case CC_ATTR_MEM_ENCRYPT:
+ return sme_me_mask;
+
+ case CC_ATTR_HOST_MEM_ENCRYPT:
+ return sme_me_mask && !(sev_status & MSR_AMD64_SEV_ENABLED);
+
+ case CC_ATTR_GUEST_MEM_ENCRYPT:
+ return sev_status & MSR_AMD64_SEV_ENABLED;
+
+ case CC_ATTR_GUEST_STATE_ENCRYPT:
+ return sev_status & MSR_AMD64_SEV_ES_ENABLED;
+
+ default:
+ return false;
+ }
+#else
+ return false;
+#endif
+}
+
+
+bool cc_platform_has(enum cc_attr attr)
+{
+ if (sme_me_mask)
+ return amd_cc_platform_has(attr);
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(cc_platform_has);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 637b499450d1..9661e3e802be 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -43,6 +43,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
obj-$(CONFIG_CPU_SUP_ZHAOXIN) += zhaoxin.o
+obj-$(CONFIG_CPU_SUP_VORTEX_32) += vortex.o
obj-$(CONFIG_X86_MCE) += mce/
obj-$(CONFIG_MTRR) += mtrr/
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 2131af9f2fa2..4edb6f0f628c 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -989,6 +989,8 @@ static void init_amd(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_IRPERF) &&
!cpu_has_amd_erratum(c, amd_erratum_1054))
msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT);
+
+ check_null_seg_clears_base(c);
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ecfca3bbcd96..31e2412b3f17 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -22,7 +22,7 @@
#include <asm/bugs.h>
#include <asm/processor.h>
#include <asm/processor-flags.h>
-#include <asm/fpu/internal.h>
+#include <asm/fpu/api.h>
#include <asm/msr.h>
#include <asm/vmx.h>
#include <asm/paravirt.h>
@@ -882,13 +882,6 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
return SPECTRE_V2_CMD_AUTO;
}
- if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD &&
- boot_cpu_data.x86_vendor != X86_VENDOR_HYGON &&
- boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
- pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
- return SPECTRE_V2_CMD_AUTO;
- }
-
spec_v2_print_cond(mitigation_options[i].option,
mitigation_options[i].secure);
return cmd;
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index b5e36bd0425b..fe98a1465be6 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -846,6 +846,7 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
l2 = new_l2;
#ifdef CONFIG_SMP
per_cpu(cpu_llc_id, cpu) = l2_id;
+ per_cpu(cpu_l2c_id, cpu) = l2_id;
#endif
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b3410f1ac217..0083464de5e3 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -42,7 +42,7 @@
#include <asm/setup.h>
#include <asm/apic.h>
#include <asm/desc.h>
-#include <asm/fpu/internal.h>
+#include <asm/fpu/api.h>
#include <asm/mtrr.h>
#include <asm/hwcap2.h>
#include <linux/numa.h>
@@ -85,6 +85,9 @@ u16 get_llc_id(unsigned int cpu)
}
EXPORT_SYMBOL_GPL(get_llc_id);
+/* L2 cache ID of each logical CPU */
+DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id) = BAD_APICID;
+
/* correctly size the local cpu masks */
void __init setup_cpu_local_masks(void)
{
@@ -1045,6 +1048,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL(CENTAUR, 5, X86_MODEL_ANY, NO_SPECULATION),
VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION),
VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION),
+ VULNWL(VORTEX, 5, X86_MODEL_ANY, NO_SPECULATION),
+ VULNWL(VORTEX, 6, X86_MODEL_ANY, NO_SPECULATION),
/* Intel Family 6 */
VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT),
@@ -1396,9 +1401,8 @@ void __init early_cpu_init(void)
early_identify_cpu(&boot_cpu_data);
}
-static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
+static bool detect_null_seg_behavior(void)
{
-#ifdef CONFIG_X86_64
/*
* Empirically, writing zero to a segment selector on AMD does
* not clear the base, whereas writing zero to a segment
@@ -1419,10 +1423,43 @@ static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
wrmsrl(MSR_FS_BASE, 1);
loadsegment(fs, 0);
rdmsrl(MSR_FS_BASE, tmp);
- if (tmp != 0)
- set_cpu_bug(c, X86_BUG_NULL_SEG);
wrmsrl(MSR_FS_BASE, old_base);
-#endif
+ return tmp == 0;
+}
+
+void check_null_seg_clears_base(struct cpuinfo_x86 *c)
+{
+ /* BUG_NULL_SEG is only relevant with 64bit userspace */
+ if (!IS_ENABLED(CONFIG_X86_64))
+ return;
+
+ /* Zen3 CPUs advertise Null Selector Clears Base in CPUID. */
+ if (c->extended_cpuid_level >= 0x80000021 &&
+ cpuid_eax(0x80000021) & BIT(6))
+ return;
+
+ /*
+ * CPUID bit above wasn't set. If this kernel is still running
+ * as a HV guest, then the HV has decided not to advertize
+ * that CPUID bit for whatever reason. For example, one
+ * member of the migration pool might be vulnerable. Which
+ * means, the bug is present: set the BUG flag and return.
+ */
+ if (cpu_has(c, X86_FEATURE_HYPERVISOR)) {
+ set_cpu_bug(c, X86_BUG_NULL_SEG);
+ return;
+ }
+
+ /*
+ * Zen2 CPUs also have this behaviour, but no CPUID bit.
+ * 0x18 is the respective family for Hygon.
+ */
+ if ((c->x86 == 0x17 || c->x86 == 0x18) &&
+ detect_null_seg_behavior())
+ return;
+
+ /* All the remaining ones are affected */
+ set_cpu_bug(c, X86_BUG_NULL_SEG);
}
static void generic_identify(struct cpuinfo_x86 *c)
@@ -1458,8 +1495,6 @@ static void generic_identify(struct cpuinfo_x86 *c)
get_model_name(c); /* Default name */
- detect_null_seg_behavior(c);
-
/*
* ESPFIX is a strange bug. All real CPUs have it. Paravirt
* systems that run Linux at CPL > 0 may or may not have the
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 95521302630d..ee6f23f7587d 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -75,6 +75,7 @@ extern int detect_extended_topology_early(struct cpuinfo_x86 *c);
extern int detect_extended_topology(struct cpuinfo_x86 *c);
extern int detect_ht_early(struct cpuinfo_x86 *c);
extern void detect_ht(struct cpuinfo_x86 *c);
+extern void check_null_seg_clears_base(struct cpuinfo_x86 *c);
unsigned int aperfmperf_get_khz(int cpu);
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index defda61f372d..cb2fdd130aae 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -75,6 +75,8 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_SGX_LC, X86_FEATURE_SGX },
{ X86_FEATURE_SGX1, X86_FEATURE_SGX },
{ X86_FEATURE_SGX2, X86_FEATURE_SGX1 },
+ { X86_FEATURE_XFD, X86_FEATURE_XSAVES },
+ { X86_FEATURE_AMX_TILE, X86_FEATURE_XFD },
{}
};
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index 6d50136f7ab9..3fcdda4c1e11 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -335,6 +335,8 @@ static void init_hygon(struct cpuinfo_x86 *c)
/* Hygon CPUs don't reset SS attributes on SYSRET, Xen does. */
if (!cpu_has(c, X86_FEATURE_XENPV))
set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+
+ check_null_seg_clears_base(c);
}
static void cpu_detect_tlb_hygon(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 08831acc1d03..27cacf504663 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -526,7 +526,7 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 high,
/* Fall back to method we used for older processors: */
switch (block) {
case 0:
- addr = msr_ops.misc(bank);
+ addr = mca_msr_reg(bank, MCA_MISC);
break;
case 1:
offset = ((low & MASK_BLKPTR_LO) >> 21);
@@ -978,8 +978,8 @@ static void log_error_deferred(unsigned int bank)
{
bool defrd;
- defrd = _log_error_bank(bank, msr_ops.status(bank),
- msr_ops.addr(bank), 0);
+ defrd = _log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS),
+ mca_msr_reg(bank, MCA_ADDR), 0);
if (!mce_flags.smca)
return;
@@ -1009,7 +1009,7 @@ static void amd_deferred_error_interrupt(void)
static void log_error_thresholding(unsigned int bank, u64 misc)
{
- _log_error_bank(bank, msr_ops.status(bank), msr_ops.addr(bank), misc);
+ _log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS), mca_msr_reg(bank, MCA_ADDR), misc);
}
static void log_and_reset_block(struct threshold_block *block)
@@ -1397,7 +1397,7 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu,
}
}
- err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank));
+ err = allocate_threshold_blocks(cpu, b, bank, 0, mca_msr_reg(bank, MCA_MISC));
if (err)
goto out_kobj;
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 193204aee880..6ed365337a3b 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -121,8 +121,6 @@ mce_banks_t mce_banks_ce_disabled;
static struct work_struct mce_work;
static struct irq_work mce_irq_work;
-static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
-
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form.
@@ -176,53 +174,27 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
}
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
-static inline u32 ctl_reg(int bank)
-{
- return MSR_IA32_MCx_CTL(bank);
-}
-
-static inline u32 status_reg(int bank)
-{
- return MSR_IA32_MCx_STATUS(bank);
-}
-
-static inline u32 addr_reg(int bank)
-{
- return MSR_IA32_MCx_ADDR(bank);
-}
-
-static inline u32 misc_reg(int bank)
+u32 mca_msr_reg(int bank, enum mca_msr reg)
{
- return MSR_IA32_MCx_MISC(bank);
-}
-
-static inline u32 smca_ctl_reg(int bank)
-{
- return MSR_AMD64_SMCA_MCx_CTL(bank);
-}
-
-static inline u32 smca_status_reg(int bank)
-{
- return MSR_AMD64_SMCA_MCx_STATUS(bank);
-}
+ if (mce_flags.smca) {
+ switch (reg) {
+ case MCA_CTL: return MSR_AMD64_SMCA_MCx_CTL(bank);
+ case MCA_ADDR: return MSR_AMD64_SMCA_MCx_ADDR(bank);
+ case MCA_MISC: return MSR_AMD64_SMCA_MCx_MISC(bank);
+ case MCA_STATUS: return MSR_AMD64_SMCA_MCx_STATUS(bank);
+ }
+ }
-static inline u32 smca_addr_reg(int bank)
-{
- return MSR_AMD64_SMCA_MCx_ADDR(bank);
-}
+ switch (reg) {
+ case MCA_CTL: return MSR_IA32_MCx_CTL(bank);
+ case MCA_ADDR: return MSR_IA32_MCx_ADDR(bank);
+ case MCA_MISC: return MSR_IA32_MCx_MISC(bank);
+ case MCA_STATUS: return MSR_IA32_MCx_STATUS(bank);
+ }
-static inline u32 smca_misc_reg(int bank)
-{
- return MSR_AMD64_SMCA_MCx_MISC(bank);
+ return 0;
}
-struct mca_msr_regs msr_ops = {
- .ctl = ctl_reg,
- .status = status_reg,
- .addr = addr_reg,
- .misc = misc_reg
-};
-
static void __print_mce(struct mce *m)
{
pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
@@ -362,24 +334,27 @@ static int msr_to_offset(u32 msr)
if (msr == mca_cfg.rip_msr)
return offsetof(struct mce, ip);
- if (msr == msr_ops.status(bank))
+ if (msr == mca_msr_reg(bank, MCA_STATUS))
return offsetof(struct mce, status);
- if (msr == msr_ops.addr(bank))
+ if (msr == mca_msr_reg(bank, MCA_ADDR))
return offsetof(struct mce, addr);
- if (msr == msr_ops.misc(bank))
+ if (msr == mca_msr_reg(bank, MCA_MISC))
return offsetof(struct mce, misc);
if (msr == MSR_IA32_MCG_STATUS)
return offsetof(struct mce, mcgstatus);
return -1;
}
-__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr,
- unsigned long error_code,
- unsigned long fault_addr)
+void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr)
{
- pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
- (unsigned int)regs->cx, regs->ip, (void *)regs->ip);
+ if (wrmsr) {
+ pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
+ (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax,
+ regs->ip, (void *)regs->ip);
+ } else {
+ pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
+ (unsigned int)regs->cx, regs->ip, (void *)regs->ip);
+ }
show_stack_regs(regs);
@@ -387,8 +362,6 @@ __visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
while (true)
cpu_relax();
-
- return true;
}
/* MSR access wrappers used for error injection */
@@ -420,32 +393,13 @@ static noinstr u64 mce_rdmsrl(u32 msr)
*/
asm volatile("1: rdmsr\n"
"2:\n"
- _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_fault)
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR_IN_MCE)
: EAX_EDX_RET(val, low, high) : "c" (msr));
return EAX_EDX_VAL(val, low, high);
}
-__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr,
- unsigned long error_code,
- unsigned long fault_addr)
-{
- pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
- (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax,
- regs->ip, (void *)regs->ip);
-
- show_stack_regs(regs);
-
- panic("MCA architectural violation!\n");
-
- while (true)
- cpu_relax();
-
- return true;
-}
-
static noinstr void mce_wrmsrl(u32 msr, u64 v)
{
u32 low, high;
@@ -470,7 +424,7 @@ static noinstr void mce_wrmsrl(u32 msr, u64 v)
/* See comment in mce_rdmsrl() */
asm volatile("1: wrmsr\n"
"2:\n"
- _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_fault)
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR_IN_MCE)
: : "c" (msr), "a"(low), "d" (high) : "memory");
}
@@ -685,10 +639,10 @@ static struct notifier_block mce_default_nb = {
static void mce_read_aux(struct mce *m, int i)
{
if (m->status & MCI_STATUS_MISCV)
- m->misc = mce_rdmsrl(msr_ops.misc(i));
+ m->misc = mce_rdmsrl(mca_msr_reg(i, MCA_MISC));
if (m->status & MCI_STATUS_ADDRV) {
- m->addr = mce_rdmsrl(msr_ops.addr(i));
+ m->addr = mce_rdmsrl(mca_msr_reg(i, MCA_ADDR));
/*
* Mask the reported address by the reported granularity.
@@ -758,7 +712,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
m.bank = i;
barrier();
- m.status = mce_rdmsrl(msr_ops.status(i));
+ m.status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
/* If this entry is not valid, ignore it */
if (!(m.status & MCI_STATUS_VAL))
@@ -826,7 +780,7 @@ clear_it:
/*
* Clear state for this bank.
*/
- mce_wrmsrl(msr_ops.status(i), 0);
+ mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
}
/*
@@ -841,6 +795,34 @@ clear_it:
EXPORT_SYMBOL_GPL(machine_check_poll);
/*
+ * During IFU recovery Sandy Bridge -EP4S processors set the RIPV and
+ * EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM
+ * Vol 3B Table 15-20). But this confuses both the code that determines
+ * whether the machine check occurred in kernel or user mode, and also
+ * the severity assessment code. Pretend that EIPV was set, and take the
+ * ip/cs values from the pt_regs that mce_gather_info() ignored earlier.
+ */
+static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
+{
+ if (bank != 0)
+ return;
+ if ((m->mcgstatus & (MCG_STATUS_EIPV|MCG_STATUS_RIPV)) != 0)
+ return;
+ if ((m->status & (MCI_STATUS_OVER|MCI_STATUS_UC|
+ MCI_STATUS_EN|MCI_STATUS_MISCV|MCI_STATUS_ADDRV|
+ MCI_STATUS_PCC|MCI_STATUS_S|MCI_STATUS_AR|
+ MCACOD)) !=
+ (MCI_STATUS_UC|MCI_STATUS_EN|
+ MCI_STATUS_MISCV|MCI_STATUS_ADDRV|MCI_STATUS_S|
+ MCI_STATUS_AR|MCACOD_INSTR))
+ return;
+
+ m->mcgstatus |= MCG_STATUS_EIPV;
+ m->ip = regs->ip;
+ m->cs = regs->cs;
+}
+
+/*
* Do a quick check if any of the events requires a panic.
* This decides if we keep the events around or clear them.
*/
@@ -851,13 +833,13 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
int i;
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
- m->status = mce_rdmsrl(msr_ops.status(i));
+ m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
if (!(m->status & MCI_STATUS_VAL))
continue;
__set_bit(i, validp);
- if (quirk_no_way_out)
- quirk_no_way_out(i, m, regs);
+ if (mce_flags.snb_ifu_quirk)
+ quirk_sandybridge_ifu(i, m, regs);
m->bank = i;
if (mce_severity(m, regs, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
@@ -1144,7 +1126,7 @@ static void mce_clear_state(unsigned long *toclear)
for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
if (test_bit(i, toclear))
- mce_wrmsrl(msr_ops.status(i), 0);
+ mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
}
}
@@ -1203,7 +1185,7 @@ static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *fin
m->addr = 0;
m->bank = i;
- m->status = mce_rdmsrl(msr_ops.status(i));
+ m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
if (!(m->status & MCI_STATUS_VAL))
continue;
@@ -1272,7 +1254,7 @@ static void kill_me_maybe(struct callback_head *cb)
flags |= MF_MUST_KILL;
ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags);
- if (!ret && !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
+ if (!ret) {
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
sync_core();
return;
@@ -1286,15 +1268,21 @@ static void kill_me_maybe(struct callback_head *cb)
if (ret == -EHWPOISON)
return;
- if (p->mce_vaddr != (void __user *)-1l) {
- force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
- } else {
- pr_err("Memory error not recovered");
- kill_me_now(cb);
- }
+ pr_err("Memory error not recovered");
+ kill_me_now(cb);
+}
+
+static void kill_me_never(struct callback_head *cb)
+{
+ struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
+
+ p->mce_count = 0;
+ pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
+ if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
+ set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
}
-static void queue_task_work(struct mce *m, char *msg, int kill_current_task)
+static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *))
{
int count = ++current->mce_count;
@@ -1304,11 +1292,7 @@ static void queue_task_work(struct mce *m, char *msg, int kill_current_task)
current->mce_kflags = m->kflags;
current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
current->mce_whole_page = whole_page(m);
-
- if (kill_current_task)
- current->mce_kill_me.func = kill_me_now;
- else
- current->mce_kill_me.func = kill_me_maybe;
+ current->mce_kill_me.func = func;
}
/* Ten is likely overkill. Don't expect more than two faults before task_work() */
@@ -1326,6 +1310,15 @@ static void queue_task_work(struct mce *m, char *msg, int kill_current_task)
task_work_add(current, &current->mce_kill_me, TWA_RESUME);
}
+/* Handle unconfigured int18 (should never happen) */
+static noinstr void unexpected_machine_check(struct pt_regs *regs)
+{
+ instrumentation_begin();
+ pr_err("CPU#%d: Unexpected int18 (Machine Check)\n",
+ smp_processor_id());
+ instrumentation_end();
+}
+
/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
@@ -1346,36 +1339,43 @@ static void queue_task_work(struct mce *m, char *msg, int kill_current_task)
*/
noinstr void do_machine_check(struct pt_regs *regs)
{
+ int worst = 0, order, no_way_out, kill_current_task, lmce;
DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
struct mca_config *cfg = &mca_cfg;
struct mce m, *final;
char *msg = NULL;
- int worst = 0;
+
+ if (unlikely(mce_flags.p5))
+ return pentium_machine_check(regs);
+ else if (unlikely(mce_flags.winchip))
+ return winchip_machine_check(regs);
+ else if (unlikely(!mca_cfg.initialized))
+ return unexpected_machine_check(regs);
/*
* Establish sequential order between the CPUs entering the machine
* check handler.
*/
- int order = -1;
+ order = -1;
/*
* If no_way_out gets set, there is no safe way to recover from this
* MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
*/
- int no_way_out = 0;
+ no_way_out = 0;
/*
* If kill_current_task is not set, there might be a way to recover from this
* error.
*/
- int kill_current_task = 0;
+ kill_current_task = 0;
/*
* MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES
* on Intel.
*/
- int lmce = 1;
+ lmce = 1;
this_cpu_inc(mce_exception_count);
@@ -1459,7 +1459,10 @@ noinstr void do_machine_check(struct pt_regs *regs)
/* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs));
- queue_task_work(&m, msg, kill_current_task);
+ if (kill_current_task)
+ queue_task_work(&m, msg, kill_me_now);
+ else
+ queue_task_work(&m, msg, kill_me_maybe);
} else {
/*
@@ -1477,7 +1480,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
}
if (m.kflags & MCE_IN_KERNEL_COPYIN)
- queue_task_work(&m, msg, kill_current_task);
+ queue_task_work(&m, msg, kill_me_never);
}
out:
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
@@ -1687,8 +1690,8 @@ static void __mcheck_cpu_init_clear_banks(void)
if (!b->init)
continue;
- wrmsrl(msr_ops.ctl(i), b->ctl);
- wrmsrl(msr_ops.status(i), 0);
+ wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl);
+ wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
}
}
@@ -1714,39 +1717,11 @@ static void __mcheck_cpu_check_banks(void)
if (!b->init)
continue;
- rdmsrl(msr_ops.ctl(i), msrval);
+ rdmsrl(mca_msr_reg(i, MCA_CTL), msrval);
b->init = !!msrval;
}
}
-/*
- * During IFU recovery Sandy Bridge -EP4S processors set the RIPV and
- * EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM
- * Vol 3B Table 15-20). But this confuses both the code that determines
- * whether the machine check occurred in kernel or user mode, and also
- * the severity assessment code. Pretend that EIPV was set, and take the
- * ip/cs values from the pt_regs that mce_gather_info() ignored earlier.
- */
-static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
-{
- if (bank != 0)
- return;
- if ((m->mcgstatus & (MCG_STATUS_EIPV|MCG_STATUS_RIPV)) != 0)
- return;
- if ((m->status & (MCI_STATUS_OVER|MCI_STATUS_UC|
- MCI_STATUS_EN|MCI_STATUS_MISCV|MCI_STATUS_ADDRV|
- MCI_STATUS_PCC|MCI_STATUS_S|MCI_STATUS_AR|
- MCACOD)) !=
- (MCI_STATUS_UC|MCI_STATUS_EN|
- MCI_STATUS_MISCV|MCI_STATUS_ADDRV|MCI_STATUS_S|
- MCI_STATUS_AR|MCACOD_INSTR))
- return;
-
- m->mcgstatus |= MCG_STATUS_EIPV;
- m->ip = regs->ip;
- m->cs = regs->cs;
-}
-
/* Add per CPU specific workarounds here */
static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
@@ -1820,7 +1795,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
cfg->bootlog = 0;
if (c->x86 == 6 && c->x86_model == 45)
- quirk_no_way_out = quirk_sandybridge_ifu;
+ mce_flags.snb_ifu_quirk = 1;
}
if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
@@ -1850,9 +1825,11 @@ static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
intel_p5_mcheck_init(c);
+ mce_flags.p5 = 1;
return 1;
case X86_VENDOR_CENTAUR:
winchip_mcheck_init(c);
+ mce_flags.winchip = 1;
return 1;
default:
return 0;
@@ -1871,13 +1848,6 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
mce_flags.amd_threshold = 1;
-
- if (mce_flags.smca) {
- msr_ops.ctl = smca_ctl_reg;
- msr_ops.status = smca_status_reg;
- msr_ops.addr = smca_addr_reg;
- msr_ops.misc = smca_misc_reg;
- }
}
}
@@ -2007,18 +1977,6 @@ bool filter_mce(struct mce *m)
return false;
}
-/* Handle unconfigured int18 (should never happen) */
-static noinstr void unexpected_machine_check(struct pt_regs *regs)
-{
- instrumentation_begin();
- pr_err("CPU#%d: Unexpected int18 (Machine Check)\n",
- smp_processor_id());
- instrumentation_end();
-}
-
-/* Call the installed machine check handler for this CPU setup. */
-void (*machine_check_vector)(struct pt_regs *) = unexpected_machine_check;
-
static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
{
irqentry_state_t irq_state;
@@ -2029,31 +1987,22 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
* Only required when from kernel mode. See
* mce_check_crashing_cpu() for details.
*/
- if (machine_check_vector == do_machine_check &&
- mce_check_crashing_cpu())
+ if (mca_cfg.initialized && mce_check_crashing_cpu())
return;
irq_state = irqentry_nmi_enter(regs);
- /*
- * The call targets are marked noinstr, but objtool can't figure
- * that out because it's an indirect call. Annotate it.
- */
- instrumentation_begin();
- machine_check_vector(regs);
+ do_machine_check(regs);
- instrumentation_end();
irqentry_nmi_exit(regs, irq_state);
}
static __always_inline void exc_machine_check_user(struct pt_regs *regs)
{
irqentry_enter_from_user_mode(regs);
- instrumentation_begin();
- machine_check_vector(regs);
+ do_machine_check(regs);
- instrumentation_end();
irqentry_exit_to_user_mode(regs);
}
@@ -2120,7 +2069,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
return;
}
- machine_check_vector = do_machine_check;
+ mca_cfg.initialized = 1;
__mcheck_cpu_init_early(c);
__mcheck_cpu_init_generic();
@@ -2228,7 +2177,6 @@ int __init mcheck_init(void)
mce_register_decode_chain(&early_nb);
mce_register_decode_chain(&mce_uc_nb);
mce_register_decode_chain(&mce_default_nb);
- mcheck_vendor_init_severity();
INIT_WORK(&mce_work, mce_gen_pool_process);
init_irq_work(&mce_irq_work, mce_irq_work_cb);
@@ -2253,7 +2201,7 @@ static void mce_disable_error_reporting(void)
struct mce_bank *b = &mce_banks[i];
if (b->init)
- wrmsrl(msr_ops.ctl(i), 0);
+ wrmsrl(mca_msr_reg(i, MCA_CTL), 0);
}
return;
}
@@ -2605,7 +2553,7 @@ static void mce_reenable_cpu(void)
struct mce_bank *b = &mce_banks[i];
if (b->init)
- wrmsrl(msr_ops.ctl(i), b->ctl);
+ wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl);
}
}
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 88dcc79cfb07..acd61c41846c 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -8,9 +8,6 @@
#include <linux/device.h>
#include <asm/mce.h>
-/* Pointer to the installed machine check handler for this CPU setup. */
-extern void (*machine_check_vector)(struct pt_regs *);
-
enum severity_level {
MCE_NO_SEVERITY,
MCE_DEFERRED_SEVERITY,
@@ -38,8 +35,7 @@ int mce_gen_pool_add(struct mce *mce);
int mce_gen_pool_init(void);
struct llist_node *mce_gen_pool_prepare_records(void);
-extern int (*mce_severity)(struct mce *a, struct pt_regs *regs,
- int tolerant, char **msg, bool is_excp);
+int mce_severity(struct mce *a, struct pt_regs *regs, int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void);
extern mce_banks_t mce_banks_ce_disabled;
@@ -61,7 +57,7 @@ static inline void cmci_disable_bank(int bank) { }
static inline void intel_init_cmci(void) { }
static inline void intel_init_lmce(void) { }
static inline void intel_clear_lmce(void) { }
-static inline bool intel_filter_mce(struct mce *m) { return false; };
+static inline bool intel_filter_mce(struct mce *m) { return false; }
#endif
void mce_timer_kick(unsigned long interval);
@@ -117,23 +113,25 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb) { }
#endif
struct mca_config {
- bool dont_log_ce;
- bool cmci_disabled;
- bool ignore_ce;
- bool print_all;
-
__u64 lmce_disabled : 1,
disabled : 1,
ser : 1,
recovery : 1,
bios_cmci_threshold : 1,
- __reserved : 59;
+ /* Proper #MC exception handler is set */
+ initialized : 1,
+ __reserved : 58;
+
+ bool dont_log_ce;
+ bool cmci_disabled;
+ bool ignore_ce;
+ bool print_all;
- s8 bootlog;
int tolerant;
int monarch_timeout;
int panic_timeout;
u32 rip_msr;
+ s8 bootlog;
};
extern struct mca_config mca_cfg;
@@ -163,19 +161,28 @@ struct mce_vendor_flags {
/* AMD-style error thresholding banks present. */
amd_threshold : 1,
- __reserved_0 : 60;
+ /* Pentium, family 5-style MCA */
+ p5 : 1,
+
+ /* Centaur Winchip C6-style MCA */
+ winchip : 1,
+
+ /* SandyBridge IFU quirk */
+ snb_ifu_quirk : 1,
+
+ __reserved_0 : 57;
};
extern struct mce_vendor_flags mce_flags;
-struct mca_msr_regs {
- u32 (*ctl) (int bank);
- u32 (*status) (int bank);
- u32 (*addr) (int bank);
- u32 (*misc) (int bank);
+enum mca_msr {
+ MCA_CTL,
+ MCA_STATUS,
+ MCA_ADDR,
+ MCA_MISC,
};
-extern struct mca_msr_regs msr_ops;
+u32 mca_msr_reg(int bank, enum mca_msr reg);
/* Decide whether to add MCE record to MCE event pool or filter it out. */
extern bool filter_mce(struct mce *m);
@@ -183,17 +190,21 @@ extern bool filter_mce(struct mce *m);
#ifdef CONFIG_X86_MCE_AMD
extern bool amd_filter_mce(struct mce *m);
#else
-static inline bool amd_filter_mce(struct mce *m) { return false; };
+static inline bool amd_filter_mce(struct mce *m) { return false; }
#endif
-__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr,
- unsigned long error_code,
- unsigned long fault_addr);
-
-__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr,
- unsigned long error_code,
- unsigned long fault_addr);
+#ifdef CONFIG_X86_ANCIENT_MCE
+void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
+void winchip_mcheck_init(struct cpuinfo_x86 *c);
+noinstr void pentium_machine_check(struct pt_regs *regs);
+noinstr void winchip_machine_check(struct pt_regs *regs);
+static inline void enable_p5_mce(void) { mce_p5_enabled = 1; }
+#else
+static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
+static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
+static inline void enable_p5_mce(void) {}
+static inline void pentium_machine_check(struct pt_regs *regs) {}
+static inline void winchip_machine_check(struct pt_regs *regs) {}
+#endif
#endif /* __X86_MCE_INTERNAL_H__ */
diff --git a/arch/x86/kernel/cpu/mce/p5.c b/arch/x86/kernel/cpu/mce/p5.c
index 19e90cae8e97..2272ad53fc33 100644
--- a/arch/x86/kernel/cpu/mce/p5.c
+++ b/arch/x86/kernel/cpu/mce/p5.c
@@ -21,7 +21,7 @@
int mce_p5_enabled __read_mostly;
/* Machine check handler for Pentium class Intel CPUs: */
-static noinstr void pentium_machine_check(struct pt_regs *regs)
+noinstr void pentium_machine_check(struct pt_regs *regs)
{
u32 loaddr, hi, lotype;
@@ -54,10 +54,6 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
if (!cpu_has(c, X86_FEATURE_MCE))
return;
- machine_check_vector = pentium_machine_check;
- /* Make sure the vector pointer is visible before we enable MCEs: */
- wmb();
-
/* Read registers before enabling: */
rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 17e631443116..bb019a594a2c 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -265,25 +265,25 @@ static bool is_copy_from_user(struct pt_regs *regs)
*/
static int error_context(struct mce *m, struct pt_regs *regs)
{
- enum handler_type t;
-
if ((m->cs & 3) == 3)
return IN_USER;
if (!mc_recoverable(m->mcgstatus))
return IN_KERNEL;
- t = ex_get_fault_handler_type(m->ip);
- if (t == EX_HANDLER_FAULT) {
- m->kflags |= MCE_IN_KERNEL_RECOV;
- return IN_KERNEL_RECOV;
- }
- if (t == EX_HANDLER_UACCESS && regs && is_copy_from_user(regs)) {
- m->kflags |= MCE_IN_KERNEL_RECOV;
+ switch (ex_get_fixup_type(m->ip)) {
+ case EX_TYPE_UACCESS:
+ case EX_TYPE_COPY:
+ if (!regs || !is_copy_from_user(regs))
+ return IN_KERNEL;
m->kflags |= MCE_IN_KERNEL_COPYIN;
+ fallthrough;
+ case EX_TYPE_FAULT_MCE_SAFE:
+ case EX_TYPE_DEFAULT_MCE_SAFE:
+ m->kflags |= MCE_IN_KERNEL_RECOV;
return IN_KERNEL_RECOV;
+ default:
+ return IN_KERNEL;
}
-
- return IN_KERNEL;
}
static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
@@ -407,15 +407,14 @@ static int mce_severity_intel(struct mce *m, struct pt_regs *regs,
}
}
-/* Default to mce_severity_intel */
-int (*mce_severity)(struct mce *m, struct pt_regs *regs, int tolerant, char **msg, bool is_excp) =
- mce_severity_intel;
-
-void __init mcheck_vendor_init_severity(void)
+int mce_severity(struct mce *m, struct pt_regs *regs, int tolerant, char **msg,
+ bool is_excp)
{
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
- mce_severity = mce_severity_amd;
+ return mce_severity_amd(m, regs, tolerant, msg, is_excp);
+ else
+ return mce_severity_intel(m, regs, tolerant, msg, is_excp);
}
#ifdef CONFIG_DEBUG_FS
diff --git a/arch/x86/kernel/cpu/mce/winchip.c b/arch/x86/kernel/cpu/mce/winchip.c
index 9c9f0abd2d7f..6c99f2941909 100644
--- a/arch/x86/kernel/cpu/mce/winchip.c
+++ b/arch/x86/kernel/cpu/mce/winchip.c
@@ -17,7 +17,7 @@
#include "internal.h"
/* Machine check handler for WinChip C6: */
-static noinstr void winchip_machine_check(struct pt_regs *regs)
+noinstr void winchip_machine_check(struct pt_regs *regs)
{
instrumentation_begin();
pr_emerg("CPU0: Machine Check Exception.\n");
@@ -30,10 +30,6 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
{
u32 lo, hi;
- machine_check_vector = winchip_machine_check;
- /* Make sure the vector pointer is visible before we enable MCEs: */
- wmb();
-
rdmsr(MSR_IDT_FCR1, lo, hi);
lo |= (1<<2); /* Enable EIERRINT (int 18 MCE) */
lo &= ~(1<<4); /* Enable MCE */
diff --git a/arch/x86/kernel/cpu/vortex.c b/arch/x86/kernel/cpu/vortex.c
new file mode 100644
index 000000000000..e2685470ba94
--- /dev/null
+++ b/arch/x86/kernel/cpu/vortex.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <asm/processor.h>
+#include "cpu.h"
+
+/*
+ * No special init required for Vortex processors.
+ */
+
+static const struct cpu_dev vortex_cpu_dev = {
+ .c_vendor = "Vortex",
+ .c_ident = { "Vortex86 SoC" },
+ .legacy_models = {
+ {
+ .family = 5,
+ .model_names = {
+ [2] = "Vortex86DX",
+ [8] = "Vortex86MX",
+ },
+ },
+ {
+ .family = 6,
+ .model_names = {
+ /*
+ * Both the Vortex86EX and the Vortex86EX2
+ * have the same family and model id.
+ *
+ * However, the -EX2 supports the product name
+ * CPUID call, so this name will only be used
+ * for the -EX, which does not.
+ */
+ [0] = "Vortex86EX",
+ },
+ },
+ },
+ .c_x86_vendor = X86_VENDOR_VORTEX,
+};
+
+cpu_dev_register(vortex_cpu_dev);
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index 045e82e8945b..a7f617a3981d 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -10,6 +10,7 @@
#include <linux/crash_dump.h>
#include <linux/uaccess.h>
#include <linux/io.h>
+#include <linux/cc_platform.h>
static ssize_t __copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
unsigned long offset, int userbuf,
@@ -73,5 +74,6 @@ ssize_t copy_oldmem_page_encrypted(unsigned long pfn, char *buf, size_t csize,
ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
{
- return read_from_oldmem(buf, count, ppos, 0, sev_active());
+ return read_from_oldmem(buf, count, ppos, 0,
+ cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT));
}
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 6a4cb71c2498..78b2311b3b8b 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -31,11 +31,6 @@ char __initdata cmd_line[COMMAND_LINE_SIZE];
int __initdata of_ioapic;
-void __init early_init_dt_scan_chosen_arch(unsigned long node)
-{
- BUG();
-}
-
void __init early_init_dt_add_memory_arch(u64 base, u64 size)
{
BUG();
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c
index 2954fab15e51..794e70151203 100644
--- a/arch/x86/kernel/fpu/bugs.c
+++ b/arch/x86/kernel/fpu/bugs.c
@@ -2,7 +2,7 @@
/*
* x86 FPU bug checks:
*/
-#include <asm/fpu/internal.h>
+#include <asm/fpu/api.h>
/*
* Boot time CPU/FPU FDIV bug detection code:
diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h
new file mode 100644
index 000000000000..958accf2ccf0
--- /dev/null
+++ b/arch/x86/kernel/fpu/context.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __X86_KERNEL_FPU_CONTEXT_H
+#define __X86_KERNEL_FPU_CONTEXT_H
+
+#include <asm/fpu/xstate.h>
+#include <asm/trace/fpu.h>
+
+/* Functions related to FPU context tracking */
+
+/*
+ * The in-register FPU state for an FPU context on a CPU is assumed to be
+ * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx
+ * matches the FPU.
+ *
+ * If the FPU register state is valid, the kernel can skip restoring the
+ * FPU state from memory.
+ *
+ * Any code that clobbers the FPU registers or updates the in-memory
+ * FPU state for a task MUST let the rest of the kernel know that the
+ * FPU registers are no longer valid for this task.
+ *
+ * Either one of these invalidation functions is enough. Invalidate
+ * a resource you control: CPU if using the CPU for something else
+ * (with preemption disabled), FPU for the current task, or a task that
+ * is prevented from running by the current task.
+ */
+static inline void __cpu_invalidate_fpregs_state(void)
+{
+ __this_cpu_write(fpu_fpregs_owner_ctx, NULL);
+}
+
+static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
+{
+ fpu->last_cpu = -1;
+}
+
+static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
+{
+ return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
+}
+
+static inline void fpregs_deactivate(struct fpu *fpu)
+{
+ __this_cpu_write(fpu_fpregs_owner_ctx, NULL);
+ trace_x86_fpu_regs_deactivated(fpu);
+}
+
+static inline void fpregs_activate(struct fpu *fpu)
+{
+ __this_cpu_write(fpu_fpregs_owner_ctx, fpu);
+ trace_x86_fpu_regs_activated(fpu);
+}
+
+/* Internal helper for switch_fpu_return() and signal frame setup */
+static inline void fpregs_restore_userregs(void)
+{
+ struct fpu *fpu = &current->thread.fpu;
+ int cpu = smp_processor_id();
+
+ if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
+ return;
+
+ if (!fpregs_state_valid(fpu, cpu)) {
+ /*
+ * This restores _all_ xstate which has not been
+ * established yet.
+ *
+ * If PKRU is enabled, then the PKRU value is already
+ * correct because it was either set in switch_to() or in
+ * flush_thread(). So it is excluded because it might be
+ * not up to date in current->thread.fpu.xsave state.
+ *
+ * XFD state is handled in restore_fpregs_from_fpstate().
+ */
+ restore_fpregs_from_fpstate(fpu->fpstate, XFEATURE_MASK_FPSTATE);
+
+ fpregs_activate(fpu);
+ fpu->last_cpu = cpu;
+ }
+ clear_thread_flag(TIF_NEED_FPU_LOAD);
+}
+
+#endif
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 7ada7bd03a32..8ea306b1bf8e 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -6,8 +6,9 @@
* General FPU state handling cleanups
* Gareth Hughes <gareth@valinux.com>, May 2000
*/
-#include <asm/fpu/internal.h>
+#include <asm/fpu/api.h>
#include <asm/fpu/regset.h>
+#include <asm/fpu/sched.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/types.h>
#include <asm/traps.h>
@@ -15,15 +16,30 @@
#include <linux/hardirq.h>
#include <linux/pkeys.h>
+#include <linux/vmalloc.h>
+
+#include "context.h"
+#include "internal.h"
+#include "legacy.h"
+#include "xstate.h"
#define CREATE_TRACE_POINTS
#include <asm/trace/fpu.h>
+#ifdef CONFIG_X86_64
+DEFINE_STATIC_KEY_FALSE(__fpu_state_size_dynamic);
+DEFINE_PER_CPU(u64, xfd_state);
+#endif
+
+/* The FPU state configuration data for kernel and user space */
+struct fpu_state_config fpu_kernel_cfg __ro_after_init;
+struct fpu_state_config fpu_user_cfg __ro_after_init;
+
/*
* Represents the initial FPU state. It's mostly (but not completely) zeroes,
* depending on the FPU hardware format:
*/
-union fpregs_state init_fpstate __ro_after_init;
+struct fpstate init_fpstate __ro_after_init;
/*
* Track whether the kernel is using the FPU state
@@ -83,7 +99,7 @@ bool irq_fpu_usable(void)
EXPORT_SYMBOL(irq_fpu_usable);
/*
- * Save the FPU register state in fpu->state. The register state is
+ * Save the FPU register state in fpu->fpstate->regs. The register state is
* preserved.
*
* Must be called with fpregs_lock() held.
@@ -99,19 +115,19 @@ EXPORT_SYMBOL(irq_fpu_usable);
void save_fpregs_to_fpstate(struct fpu *fpu)
{
if (likely(use_xsave())) {
- os_xsave(&fpu->state.xsave);
+ os_xsave(fpu->fpstate);
/*
* AVX512 state is tracked here because its use is
* known to slow the max clock speed of the core.
*/
- if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
+ if (fpu->fpstate->regs.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
fpu->avx512_timestamp = jiffies;
return;
}
if (likely(use_fxsr())) {
- fxsave(&fpu->state.fxsave);
+ fxsave(&fpu->fpstate->regs.fxsave);
return;
}
@@ -119,12 +135,11 @@ void save_fpregs_to_fpstate(struct fpu *fpu)
* Legacy FPU register saving, FNSAVE always clears FPU registers,
* so we have to reload them from the memory state.
*/
- asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
- frstor(&fpu->state.fsave);
+ asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->fpstate->regs.fsave));
+ frstor(&fpu->fpstate->regs.fsave);
}
-EXPORT_SYMBOL(save_fpregs_to_fpstate);
-void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask)
+void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask)
{
/*
* AMD K7/K8 and later CPUs up to Zen don't save/restore
@@ -141,15 +156,181 @@ void __restore_fpregs_from_fpstate(union fpregs_state *fpstate, u64 mask)
}
if (use_xsave()) {
- os_xrstor(&fpstate->xsave, mask);
+ /*
+ * Dynamically enabled features are enabled in XCR0, but
+ * usage requires also that the corresponding bits in XFD
+ * are cleared. If the bits are set then using a related
+ * instruction will raise #NM. This allows to do the
+ * allocation of the larger FPU buffer lazy from #NM or if
+ * the task has no permission to kill it which would happen
+ * via #UD if the feature is disabled in XCR0.
+ *
+ * XFD state is following the same life time rules as
+ * XSTATE and to restore state correctly XFD has to be
+ * updated before XRSTORS otherwise the component would
+ * stay in or go into init state even if the bits are set
+ * in fpstate::regs::xsave::xfeatures.
+ */
+ xfd_update_state(fpstate);
+
+ /*
+ * Restoring state always needs to modify all features
+ * which are in @mask even if the current task cannot use
+ * extended features.
+ *
+ * So fpstate->xfeatures cannot be used here, because then
+ * a feature for which the task has no permission but was
+ * used by the previous task would not go into init state.
+ */
+ mask = fpu_kernel_cfg.max_features & mask;
+
+ os_xrstor(fpstate, mask);
} else {
if (use_fxsr())
- fxrstor(&fpstate->fxsave);
+ fxrstor(&fpstate->regs.fxsave);
else
- frstor(&fpstate->fsave);
+ frstor(&fpstate->regs.fsave);
}
}
-EXPORT_SYMBOL_GPL(__restore_fpregs_from_fpstate);
+
+void fpu_reset_from_exception_fixup(void)
+{
+ restore_fpregs_from_fpstate(&init_fpstate, XFEATURE_MASK_FPSTATE);
+}
+
+#if IS_ENABLED(CONFIG_KVM)
+static void __fpstate_reset(struct fpstate *fpstate);
+
+bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
+{
+ struct fpstate *fpstate;
+ unsigned int size;
+
+ size = fpu_user_cfg.default_size + ALIGN(offsetof(struct fpstate, regs), 64);
+ fpstate = vzalloc(size);
+ if (!fpstate)
+ return false;
+
+ __fpstate_reset(fpstate);
+ fpstate_init_user(fpstate);
+ fpstate->is_valloc = true;
+ fpstate->is_guest = true;
+
+ gfpu->fpstate = fpstate;
+ return true;
+}
+EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate);
+
+void fpu_free_guest_fpstate(struct fpu_guest *gfpu)
+{
+ struct fpstate *fps = gfpu->fpstate;
+
+ if (!fps)
+ return;
+
+ if (WARN_ON_ONCE(!fps->is_valloc || !fps->is_guest || fps->in_use))
+ return;
+
+ gfpu->fpstate = NULL;
+ vfree(fps);
+}
+EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate);
+
+int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
+{
+ struct fpstate *guest_fps = guest_fpu->fpstate;
+ struct fpu *fpu = &current->thread.fpu;
+ struct fpstate *cur_fps = fpu->fpstate;
+
+ fpregs_lock();
+ if (!cur_fps->is_confidential && !test_thread_flag(TIF_NEED_FPU_LOAD))
+ save_fpregs_to_fpstate(fpu);
+
+ /* Swap fpstate */
+ if (enter_guest) {
+ fpu->__task_fpstate = cur_fps;
+ fpu->fpstate = guest_fps;
+ guest_fps->in_use = true;
+ } else {
+ guest_fps->in_use = false;
+ fpu->fpstate = fpu->__task_fpstate;
+ fpu->__task_fpstate = NULL;
+ }
+
+ cur_fps = fpu->fpstate;
+
+ if (!cur_fps->is_confidential) {
+ /* Includes XFD update */
+ restore_fpregs_from_fpstate(cur_fps, XFEATURE_MASK_FPSTATE);
+ } else {
+ /*
+ * XSTATE is restored by firmware from encrypted
+ * memory. Make sure XFD state is correct while
+ * running with guest fpstate
+ */
+ xfd_update_state(cur_fps);
+ }
+
+ fpregs_mark_activate();
+ fpregs_unlock();
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate);
+
+void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf,
+ unsigned int size, u32 pkru)
+{
+ struct fpstate *kstate = gfpu->fpstate;
+ union fpregs_state *ustate = buf;
+ struct membuf mb = { .p = buf, .left = size };
+
+ if (cpu_feature_enabled(X86_FEATURE_XSAVE)) {
+ __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE);
+ } else {
+ memcpy(&ustate->fxsave, &kstate->regs.fxsave,
+ sizeof(ustate->fxsave));
+ /* Make it restorable on a XSAVE enabled host */
+ ustate->xsave.header.xfeatures = XFEATURE_MASK_FPSSE;
+ }
+}
+EXPORT_SYMBOL_GPL(fpu_copy_guest_fpstate_to_uabi);
+
+int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf,
+ u64 xcr0, u32 *vpkru)
+{
+ struct fpstate *kstate = gfpu->fpstate;
+ const union fpregs_state *ustate = buf;
+ struct pkru_state *xpkru;
+ int ret;
+
+ if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) {
+ if (ustate->xsave.header.xfeatures & ~XFEATURE_MASK_FPSSE)
+ return -EINVAL;
+ if (ustate->fxsave.mxcsr & ~mxcsr_feature_mask)
+ return -EINVAL;
+ memcpy(&kstate->regs.fxsave, &ustate->fxsave, sizeof(ustate->fxsave));
+ return 0;
+ }
+
+ if (ustate->xsave.header.xfeatures & ~xcr0)
+ return -EINVAL;
+
+ ret = copy_uabi_from_kernel_to_xstate(kstate, ustate);
+ if (ret)
+ return ret;
+
+ /* Retrieve PKRU if not in init state */
+ if (kstate->regs.xsave.header.xfeatures & XFEATURE_MASK_PKRU) {
+ xpkru = get_xsave_addr(&kstate->regs.xsave, XFEATURE_PKRU);
+ *vpkru = xpkru->pkru;
+ }
+
+ /* Ensure that XCOMP_BV is set up for XSAVES */
+ xstate_init_xcomp_bv(&kstate->regs.xsave, kstate->xfeatures);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate);
+#endif /* CONFIG_KVM */
void kernel_fpu_begin_mask(unsigned int kfpu_mask)
{
@@ -203,52 +384,88 @@ void fpu_sync_fpstate(struct fpu *fpu)
fpregs_unlock();
}
-static inline void fpstate_init_xstate(struct xregs_state *xsave)
+static inline unsigned int init_fpstate_copy_size(void)
{
- /*
- * XRSTORS requires these bits set in xcomp_bv, or it will
- * trigger #GP:
- */
- xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all;
+ if (!use_xsave())
+ return fpu_kernel_cfg.default_size;
+
+ /* XSAVE(S) just needs the legacy and the xstate header part */
+ return sizeof(init_fpstate.regs.xsave);
}
-static inline void fpstate_init_fxstate(struct fxregs_state *fx)
+static inline void fpstate_init_fxstate(struct fpstate *fpstate)
{
- fx->cwd = 0x37f;
- fx->mxcsr = MXCSR_DEFAULT;
+ fpstate->regs.fxsave.cwd = 0x37f;
+ fpstate->regs.fxsave.mxcsr = MXCSR_DEFAULT;
}
/*
* Legacy x87 fpstate state init:
*/
-static inline void fpstate_init_fstate(struct fregs_state *fp)
+static inline void fpstate_init_fstate(struct fpstate *fpstate)
{
- fp->cwd = 0xffff037fu;
- fp->swd = 0xffff0000u;
- fp->twd = 0xffffffffu;
- fp->fos = 0xffff0000u;
+ fpstate->regs.fsave.cwd = 0xffff037fu;
+ fpstate->regs.fsave.swd = 0xffff0000u;
+ fpstate->regs.fsave.twd = 0xffffffffu;
+ fpstate->regs.fsave.fos = 0xffff0000u;
}
-void fpstate_init(union fpregs_state *state)
+/*
+ * Used in two places:
+ * 1) Early boot to setup init_fpstate for non XSAVE systems
+ * 2) fpu_init_fpstate_user() which is invoked from KVM
+ */
+void fpstate_init_user(struct fpstate *fpstate)
{
- if (!static_cpu_has(X86_FEATURE_FPU)) {
- fpstate_init_soft(&state->soft);
+ if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
+ fpstate_init_soft(&fpstate->regs.soft);
return;
}
- memset(state, 0, fpu_kernel_xstate_size);
+ xstate_init_xcomp_bv(&fpstate->regs.xsave, fpstate->xfeatures);
- if (static_cpu_has(X86_FEATURE_XSAVES))
- fpstate_init_xstate(&state->xsave);
- if (static_cpu_has(X86_FEATURE_FXSR))
- fpstate_init_fxstate(&state->fxsave);
+ if (cpu_feature_enabled(X86_FEATURE_FXSR))
+ fpstate_init_fxstate(fpstate);
else
- fpstate_init_fstate(&state->fsave);
+ fpstate_init_fstate(fpstate);
+}
+
+static void __fpstate_reset(struct fpstate *fpstate)
+{
+ /* Initialize sizes and feature masks */
+ fpstate->size = fpu_kernel_cfg.default_size;
+ fpstate->user_size = fpu_user_cfg.default_size;
+ fpstate->xfeatures = fpu_kernel_cfg.default_features;
+ fpstate->user_xfeatures = fpu_user_cfg.default_features;
+ fpstate->xfd = init_fpstate.xfd;
+}
+
+void fpstate_reset(struct fpu *fpu)
+{
+ /* Set the fpstate pointer to the default fpstate */
+ fpu->fpstate = &fpu->__fpstate;
+ __fpstate_reset(fpu->fpstate);
+
+ /* Initialize the permission related info in fpu */
+ fpu->perm.__state_perm = fpu_kernel_cfg.default_features;
+ fpu->perm.__state_size = fpu_kernel_cfg.default_size;
+ fpu->perm.__user_state_size = fpu_user_cfg.default_size;
+}
+
+static inline void fpu_inherit_perms(struct fpu *dst_fpu)
+{
+ if (fpu_state_size_dynamic()) {
+ struct fpu *src_fpu = &current->group_leader->thread.fpu;
+
+ spin_lock_irq(&current->sighand->siglock);
+ /* Fork also inherits the permissions of the parent */
+ dst_fpu->perm = src_fpu->perm;
+ spin_unlock_irq(&current->sighand->siglock);
+ }
}
-EXPORT_SYMBOL_GPL(fpstate_init);
/* Clone current's FPU state on fork */
-int fpu_clone(struct task_struct *dst)
+int fpu_clone(struct task_struct *dst, unsigned long clone_flags)
{
struct fpu *src_fpu = &current->thread.fpu;
struct fpu *dst_fpu = &dst->thread.fpu;
@@ -256,30 +473,51 @@ int fpu_clone(struct task_struct *dst)
/* The new task's FPU state cannot be valid in the hardware. */
dst_fpu->last_cpu = -1;
+ fpstate_reset(dst_fpu);
+
if (!cpu_feature_enabled(X86_FEATURE_FPU))
return 0;
/*
- * Don't let 'init optimized' areas of the XSAVE area
- * leak into the child task:
+ * Enforce reload for user space tasks and prevent kernel threads
+ * from trying to save the FPU registers on context switch.
+ */
+ set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);
+
+ /*
+ * No FPU state inheritance for kernel threads and IO
+ * worker threads.
+ */
+ if (dst->flags & (PF_KTHREAD | PF_IO_WORKER)) {
+ /* Clear out the minimal state */
+ memcpy(&dst_fpu->fpstate->regs, &init_fpstate.regs,
+ init_fpstate_copy_size());
+ return 0;
+ }
+
+ /*
+ * If a new feature is added, ensure all dynamic features are
+ * caller-saved from here!
*/
- memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
+ BUILD_BUG_ON(XFEATURE_MASK_USER_DYNAMIC != XFEATURE_MASK_XTILE_DATA);
/*
- * If the FPU registers are not owned by current just memcpy() the
- * state. Otherwise save the FPU registers directly into the
- * child's FPU context, without any memory-to-memory copying.
+ * Save the default portion of the current FPU state into the
+ * clone. Assume all dynamic features to be defined as caller-
+ * saved, which enables skipping both the expansion of fpstate
+ * and the copying of any dynamic state.
+ *
+ * Do not use memcpy() when TIF_NEED_FPU_LOAD is set because
+ * copying is not valid when current uses non-default states.
*/
fpregs_lock();
if (test_thread_flag(TIF_NEED_FPU_LOAD))
- memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size);
-
- else
- save_fpregs_to_fpstate(dst_fpu);
+ fpregs_restore_userregs();
+ save_fpregs_to_fpstate(dst_fpu);
+ if (!(clone_flags & CLONE_THREAD))
+ fpu_inherit_perms(dst_fpu);
fpregs_unlock();
- set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);
-
trace_x86_fpu_copy_src(src_fpu);
trace_x86_fpu_copy_dst(dst_fpu);
@@ -287,6 +525,16 @@ int fpu_clone(struct task_struct *dst)
}
/*
+ * Whitelist the FPU register state embedded into task_struct for hardened
+ * usercopy.
+ */
+void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size)
+{
+ *offset = offsetof(struct thread_struct, fpu.__fpstate.regs);
+ *size = fpu_kernel_cfg.default_size;
+}
+
+/*
* Drops current FPU state: deactivates the fpregs and
* the fpstate. NOTE: it still leaves previous contents
* in the fpregs in the eager-FPU case.
@@ -319,28 +567,19 @@ void fpu__drop(struct fpu *fpu)
static inline void restore_fpregs_from_init_fpstate(u64 features_mask)
{
if (use_xsave())
- os_xrstor(&init_fpstate.xsave, features_mask);
+ os_xrstor(&init_fpstate, features_mask);
else if (use_fxsr())
- fxrstor(&init_fpstate.fxsave);
+ fxrstor(&init_fpstate.regs.fxsave);
else
- frstor(&init_fpstate.fsave);
+ frstor(&init_fpstate.regs.fsave);
pkru_write_default();
}
-static inline unsigned int init_fpstate_copy_size(void)
-{
- if (!use_xsave())
- return fpu_kernel_xstate_size;
-
- /* XSAVE(S) just needs the legacy and the xstate header part */
- return sizeof(init_fpstate.xsave);
-}
-
/*
* Reset current->fpu memory state to the init values.
*/
-static void fpu_reset_fpstate(void)
+static void fpu_reset_fpregs(void)
{
struct fpu *fpu = &current->thread.fpu;
@@ -359,7 +598,7 @@ static void fpu_reset_fpstate(void)
* user space as PKRU is eagerly written in switch_to() and
* flush_thread().
*/
- memcpy(&fpu->state, &init_fpstate, init_fpstate_copy_size());
+ memcpy(&fpu->fpstate->regs, &init_fpstate.regs, init_fpstate_copy_size());
set_thread_flag(TIF_NEED_FPU_LOAD);
fpregs_unlock();
}
@@ -375,7 +614,7 @@ void fpu__clear_user_states(struct fpu *fpu)
fpregs_lock();
if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
- fpu_reset_fpstate();
+ fpu_reset_fpregs();
fpregs_unlock();
return;
}
@@ -385,12 +624,11 @@ void fpu__clear_user_states(struct fpu *fpu)
* corresponding registers.
*/
if (xfeatures_mask_supervisor() &&
- !fpregs_state_valid(fpu, smp_processor_id())) {
- os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor());
- }
+ !fpregs_state_valid(fpu, smp_processor_id()))
+ os_xrstor_supervisor(fpu->fpstate);
/* Reset user states in registers. */
- restore_fpregs_from_init_fpstate(xfeatures_mask_restore_user());
+ restore_fpregs_from_init_fpstate(XFEATURE_MASK_USER_RESTORE);
/*
* Now all FPU registers have their desired values. Inform the FPU
@@ -405,7 +643,8 @@ void fpu__clear_user_states(struct fpu *fpu)
void fpu_flush_thread(void)
{
- fpu_reset_fpstate();
+ fpstate_reset(&current->thread.fpu);
+ fpu_reset_fpregs();
}
/*
* Load FPU context before returning to userspace.
@@ -445,7 +684,6 @@ void fpregs_mark_activate(void)
fpu->last_cpu = smp_processor_id();
clear_thread_flag(TIF_NEED_FPU_LOAD);
}
-EXPORT_SYMBOL_GPL(fpregs_mark_activate);
/*
* x87 math exception handling:
@@ -468,11 +706,11 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
* fully reproduce the context of the exception.
*/
if (boot_cpu_has(X86_FEATURE_FXSR)) {
- cwd = fpu->state.fxsave.cwd;
- swd = fpu->state.fxsave.swd;
+ cwd = fpu->fpstate->regs.fxsave.cwd;
+ swd = fpu->fpstate->regs.fxsave.swd;
} else {
- cwd = (unsigned short)fpu->state.fsave.cwd;
- swd = (unsigned short)fpu->state.fsave.swd;
+ cwd = (unsigned short)fpu->fpstate->regs.fsave.cwd;
+ swd = (unsigned short)fpu->fpstate->regs.fsave.swd;
}
err = swd & ~cwd;
@@ -486,7 +724,7 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
unsigned short mxcsr = MXCSR_DEFAULT;
if (boot_cpu_has(X86_FEATURE_XMM))
- mxcsr = fpu->state.fxsave.mxcsr;
+ mxcsr = fpu->fpstate->regs.fxsave.mxcsr;
err = ~(mxcsr >> 7) & mxcsr;
}
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 64e29927cc32..621f4b6cac4a 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -2,7 +2,7 @@
/*
* x86 FPU boot time init code:
*/
-#include <asm/fpu/internal.h>
+#include <asm/fpu/api.h>
#include <asm/tlbflush.h>
#include <asm/setup.h>
@@ -10,6 +10,10 @@
#include <linux/sched/task.h>
#include <linux/init.h>
+#include "internal.h"
+#include "legacy.h"
+#include "xstate.h"
+
/*
* Initialize the registers found in all CPUs, CR0 and CR4:
*/
@@ -34,7 +38,7 @@ static void fpu__init_cpu_generic(void)
/* Flush out any pending x87 state: */
#ifdef CONFIG_MATH_EMULATION
if (!boot_cpu_has(X86_FEATURE_FPU))
- fpstate_init_soft(&current->thread.fpu.state.soft);
+ fpstate_init_soft(&current->thread.fpu.fpstate->regs.soft);
else
#endif
asm volatile ("fninit");
@@ -121,23 +125,14 @@ static void __init fpu__init_system_mxcsr(void)
static void __init fpu__init_system_generic(void)
{
/*
- * Set up the legacy init FPU context. (xstate init might overwrite this
- * with a more modern format, if the CPU supports it.)
+ * Set up the legacy init FPU context. Will be updated when the
+ * CPU supports XSAVE[S].
*/
- fpstate_init(&init_fpstate);
+ fpstate_init_user(&init_fpstate);
fpu__init_system_mxcsr();
}
-/*
- * Size of the FPU context state. All tasks in the system use the
- * same context size, regardless of what portion they use.
- * This is inherent to the XSAVE architecture which puts all state
- * components into a single, continuous memory block:
- */
-unsigned int fpu_kernel_xstate_size __ro_after_init;
-EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size);
-
/* Get alignment of the TYPE. */
#define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
@@ -162,13 +157,13 @@ static void __init fpu__init_task_struct_size(void)
* Subtract off the static size of the register state.
* It potentially has a bunch of padding.
*/
- task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state);
+ task_size -= sizeof(current->thread.fpu.__fpstate.regs);
/*
* Add back the dynamically-calculated register state
* size.
*/
- task_size += fpu_kernel_xstate_size;
+ task_size += fpu_kernel_cfg.default_size;
/*
* We dynamically size 'struct fpu', so we require that
@@ -177,7 +172,7 @@ static void __init fpu__init_task_struct_size(void)
* you hit a compile error here, check the structure to
* see if something got added to the end.
*/
- CHECK_MEMBER_AT_END_OF(struct fpu, state);
+ CHECK_MEMBER_AT_END_OF(struct fpu, __fpstate);
CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu);
CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
@@ -192,37 +187,34 @@ static void __init fpu__init_task_struct_size(void)
*/
static void __init fpu__init_system_xstate_size_legacy(void)
{
- static int on_boot_cpu __initdata = 1;
-
- WARN_ON_FPU(!on_boot_cpu);
- on_boot_cpu = 0;
+ unsigned int size;
/*
- * Note that xstate sizes might be overwritten later during
- * fpu__init_system_xstate().
+ * Note that the size configuration might be overwritten later
+ * during fpu__init_system_xstate().
*/
-
- if (!boot_cpu_has(X86_FEATURE_FPU)) {
- fpu_kernel_xstate_size = sizeof(struct swregs_state);
+ if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
+ size = sizeof(struct swregs_state);
+ } else if (cpu_feature_enabled(X86_FEATURE_FXSR)) {
+ size = sizeof(struct fxregs_state);
+ fpu_user_cfg.legacy_features = XFEATURE_MASK_FPSSE;
} else {
- if (boot_cpu_has(X86_FEATURE_FXSR))
- fpu_kernel_xstate_size =
- sizeof(struct fxregs_state);
- else
- fpu_kernel_xstate_size =
- sizeof(struct fregs_state);
+ size = sizeof(struct fregs_state);
+ fpu_user_cfg.legacy_features = XFEATURE_MASK_FP;
}
- fpu_user_xstate_size = fpu_kernel_xstate_size;
+ fpu_kernel_cfg.max_size = size;
+ fpu_kernel_cfg.default_size = size;
+ fpu_user_cfg.max_size = size;
+ fpu_user_cfg.default_size = size;
+ fpstate_reset(&current->thread.fpu);
}
-/* Legacy code to initialize eager fpu mode. */
-static void __init fpu__init_system_ctx_switch(void)
+static void __init fpu__init_init_fpstate(void)
{
- static bool on_boot_cpu __initdata = 1;
-
- WARN_ON_FPU(!on_boot_cpu);
- on_boot_cpu = 0;
+ /* Bring init_fpstate size and features up to date */
+ init_fpstate.size = fpu_kernel_cfg.max_size;
+ init_fpstate.xfeatures = fpu_kernel_cfg.max_features;
}
/*
@@ -231,6 +223,7 @@ static void __init fpu__init_system_ctx_switch(void)
*/
void __init fpu__init_system(struct cpuinfo_x86 *c)
{
+ fpstate_reset(&current->thread.fpu);
fpu__init_system_early_generic(c);
/*
@@ -241,8 +234,7 @@ void __init fpu__init_system(struct cpuinfo_x86 *c)
fpu__init_system_generic();
fpu__init_system_xstate_size_legacy();
- fpu__init_system_xstate();
+ fpu__init_system_xstate(fpu_kernel_cfg.max_size);
fpu__init_task_struct_size();
-
- fpu__init_system_ctx_switch();
+ fpu__init_init_fpstate();
}
diff --git a/arch/x86/kernel/fpu/internal.h b/arch/x86/kernel/fpu/internal.h
new file mode 100644
index 000000000000..dbdb31f55fc7
--- /dev/null
+++ b/arch/x86/kernel/fpu/internal.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __X86_KERNEL_FPU_INTERNAL_H
+#define __X86_KERNEL_FPU_INTERNAL_H
+
+extern struct fpstate init_fpstate;
+
+/* CPU feature check wrappers */
+static __always_inline __pure bool use_xsave(void)
+{
+ return cpu_feature_enabled(X86_FEATURE_XSAVE);
+}
+
+static __always_inline __pure bool use_fxsr(void)
+{
+ return cpu_feature_enabled(X86_FEATURE_FXSR);
+}
+
+#ifdef CONFIG_X86_DEBUG_FPU
+# define WARN_ON_FPU(x) WARN_ON_ONCE(x)
+#else
+# define WARN_ON_FPU(x) ({ (void)(x); 0; })
+#endif
+
+/* Used in init.c */
+extern void fpstate_init_user(struct fpstate *fpstate);
+extern void fpstate_reset(struct fpu *fpu);
+
+#endif
diff --git a/arch/x86/kernel/fpu/legacy.h b/arch/x86/kernel/fpu/legacy.h
new file mode 100644
index 000000000000..17c26b164c63
--- /dev/null
+++ b/arch/x86/kernel/fpu/legacy.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __X86_KERNEL_FPU_LEGACY_H
+#define __X86_KERNEL_FPU_LEGACY_H
+
+#include <asm/fpu/types.h>
+
+extern unsigned int mxcsr_feature_mask;
+
+static inline void ldmxcsr(u32 mxcsr)
+{
+ asm volatile("ldmxcsr %0" :: "m" (mxcsr));
+}
+
+/*
+ * Returns 0 on success or the trap number when the operation raises an
+ * exception.
+ */
+#define user_insn(insn, output, input...) \
+({ \
+ int err; \
+ \
+ might_fault(); \
+ \
+ asm volatile(ASM_STAC "\n" \
+ "1: " #insn "\n" \
+ "2: " ASM_CLAC "\n" \
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE) \
+ : [err] "=a" (err), output \
+ : "0"(0), input); \
+ err; \
+})
+
+#define kernel_insn_err(insn, output, input...) \
+({ \
+ int err; \
+ asm volatile("1:" #insn "\n\t" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl $-1,%[err]\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ _ASM_EXTABLE(1b, 3b) \
+ : [err] "=r" (err), output \
+ : "0"(0), input); \
+ err; \
+})
+
+#define kernel_insn(insn, output, input...) \
+ asm volatile("1:" #insn "\n\t" \
+ "2:\n" \
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FPU_RESTORE) \
+ : output : input)
+
+static inline int fnsave_to_user_sigframe(struct fregs_state __user *fx)
+{
+ return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx));
+}
+
+static inline int fxsave_to_user_sigframe(struct fxregs_state __user *fx)
+{
+ if (IS_ENABLED(CONFIG_X86_32))
+ return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
+ else
+ return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
+
+}
+
+static inline void fxrstor(struct fxregs_state *fx)
+{
+ if (IS_ENABLED(CONFIG_X86_32))
+ kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+ else
+ kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
+}
+
+static inline int fxrstor_safe(struct fxregs_state *fx)
+{
+ if (IS_ENABLED(CONFIG_X86_32))
+ return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+ else
+ return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
+}
+
+static inline int fxrstor_from_user_sigframe(struct fxregs_state __user *fx)
+{
+ if (IS_ENABLED(CONFIG_X86_32))
+ return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+ else
+ return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
+}
+
+static inline void frstor(struct fregs_state *fx)
+{
+ kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+}
+
+static inline int frstor_safe(struct fregs_state *fx)
+{
+ return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+}
+
+static inline int frstor_from_user_sigframe(struct fregs_state __user *fx)
+{
+ return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+}
+
+static inline void fxsave(struct fxregs_state *fx)
+{
+ if (IS_ENABLED(CONFIG_X86_32))
+ asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx));
+ else
+ asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx));
+}
+
+#endif
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 66ed317ebc0d..437d7c930c0b 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -5,10 +5,14 @@
#include <linux/sched/task_stack.h>
#include <linux/vmalloc.h>
-#include <asm/fpu/internal.h>
+#include <asm/fpu/api.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/regset.h>
-#include <asm/fpu/xstate.h>
+
+#include "context.h"
+#include "internal.h"
+#include "legacy.h"
+#include "xstate.h"
/*
* The xstateregs_active() routine is the same as the regset_fpregs_active() routine,
@@ -74,8 +78,8 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
sync_fpstate(fpu);
if (!use_xsave()) {
- return membuf_write(&to, &fpu->state.fxsave,
- sizeof(fpu->state.fxsave));
+ return membuf_write(&to, &fpu->fpstate->regs.fxsave,
+ sizeof(fpu->fpstate->regs.fxsave));
}
copy_xstate_to_uabi_buf(to, target, XSTATE_COPY_FX);
@@ -110,15 +114,15 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
fpu_force_restore(fpu);
/* Copy the state */
- memcpy(&fpu->state.fxsave, &newstate, sizeof(newstate));
+ memcpy(&fpu->fpstate->regs.fxsave, &newstate, sizeof(newstate));
/* Clear xmm8..15 */
- BUILD_BUG_ON(sizeof(fpu->state.fxsave.xmm_space) != 16 * 16);
- memset(&fpu->state.fxsave.xmm_space[8], 0, 8 * 16);
+ BUILD_BUG_ON(sizeof(fpu->__fpstate.regs.fxsave.xmm_space) != 16 * 16);
+ memset(&fpu->fpstate->regs.fxsave.xmm_space[8], 0, 8 * 16);
/* Mark FP and SSE as in use when XSAVE is enabled */
if (use_xsave())
- fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
+ fpu->fpstate->regs.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
return 0;
}
@@ -149,7 +153,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
/*
* A whole standard-format XSAVE buffer is needed:
*/
- if (pos != 0 || count != fpu_user_xstate_size)
+ if (pos != 0 || count != fpu_user_cfg.max_size)
return -EFAULT;
if (!kbuf) {
@@ -164,7 +168,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
}
fpu_force_restore(fpu);
- ret = copy_uabi_from_kernel_to_xstate(&fpu->state.xsave, kbuf ?: tmpbuf);
+ ret = copy_uabi_from_kernel_to_xstate(fpu->fpstate, kbuf ?: tmpbuf);
out:
vfree(tmpbuf);
@@ -283,7 +287,7 @@ static void __convert_from_fxsr(struct user_i387_ia32_struct *env,
void
convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
{
- __convert_from_fxsr(env, tsk, &tsk->thread.fpu.state.fxsave);
+ __convert_from_fxsr(env, tsk, &tsk->thread.fpu.fpstate->regs.fxsave);
}
void convert_to_fxsr(struct fxregs_state *fxsave,
@@ -326,7 +330,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
return fpregs_soft_get(target, regset, to);
if (!cpu_feature_enabled(X86_FEATURE_FXSR)) {
- return membuf_write(&to, &fpu->state.fsave,
+ return membuf_write(&to, &fpu->fpstate->regs.fsave,
sizeof(struct fregs_state));
}
@@ -337,7 +341,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
copy_xstate_to_uabi_buf(mb, target, XSTATE_COPY_FP);
fx = &fxsave;
} else {
- fx = &fpu->state.fxsave;
+ fx = &fpu->fpstate->regs.fxsave;
}
__convert_from_fxsr(&env, target, fx);
@@ -366,16 +370,16 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
fpu_force_restore(fpu);
if (cpu_feature_enabled(X86_FEATURE_FXSR))
- convert_to_fxsr(&fpu->state.fxsave, &env);
+ convert_to_fxsr(&fpu->fpstate->regs.fxsave, &env);
else
- memcpy(&fpu->state.fsave, &env, sizeof(env));
+ memcpy(&fpu->fpstate->regs.fsave, &env, sizeof(env));
/*
* Update the header bit in the xsave header, indicating the
* presence of FP.
*/
if (cpu_feature_enabled(X86_FEATURE_XSAVE))
- fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP;
+ fpu->fpstate->regs.xsave.header.xfeatures |= XFEATURE_MASK_FP;
return 0;
}
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index fa17a27390ab..cc977da6e128 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -7,23 +7,25 @@
#include <linux/cpu.h>
#include <linux/pagemap.h>
-#include <asm/fpu/internal.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/regset.h>
#include <asm/fpu/xstate.h>
#include <asm/sigframe.h>
+#include <asm/trapnr.h>
#include <asm/trace/fpu.h>
-static struct _fpx_sw_bytes fx_sw_reserved __ro_after_init;
-static struct _fpx_sw_bytes fx_sw_reserved_ia32 __ro_after_init;
+#include "context.h"
+#include "internal.h"
+#include "legacy.h"
+#include "xstate.h"
/*
* Check for the presence of extended state information in the
* user fpstate pointer in the sigcontext.
*/
-static inline int check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
- struct _fpx_sw_bytes *fx_sw)
+static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
+ struct _fpx_sw_bytes *fx_sw)
{
int min_xstate_size = sizeof(struct fxregs_state) +
sizeof(struct xstate_header);
@@ -31,12 +33,12 @@ static inline int check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
unsigned int magic2;
if (__copy_from_user(fx_sw, &fxbuf->sw_reserved[0], sizeof(*fx_sw)))
- return -EFAULT;
+ return false;
/* Check for the first magic field and other error scenarios. */
if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
fx_sw->xstate_size < min_xstate_size ||
- fx_sw->xstate_size > fpu_user_xstate_size ||
+ fx_sw->xstate_size > current->thread.fpu.fpstate->user_size ||
fx_sw->xstate_size > fx_sw->extended_size)
goto setfx;
@@ -47,10 +49,10 @@ static inline int check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
* in the memory layout.
*/
if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)))
- return -EFAULT;
+ return false;
if (likely(magic2 == FP_XSTATE_MAGIC2))
- return 0;
+ return true;
setfx:
trace_x86_fpu_xstate_check_failed(&current->thread.fpu);
@@ -58,22 +60,22 @@ setfx:
fx_sw->magic1 = 0;
fx_sw->xstate_size = sizeof(struct fxregs_state);
fx_sw->xfeatures = XFEATURE_MASK_FPSSE;
- return 0;
+ return true;
}
/*
* Signal frame handlers.
*/
-static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
+static inline bool save_fsave_header(struct task_struct *tsk, void __user *buf)
{
if (use_fxsr()) {
- struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
+ struct xregs_state *xsave = &tsk->thread.fpu.fpstate->regs.xsave;
struct user_i387_ia32_struct env;
struct _fpstate_32 __user *fp = buf;
fpregs_lock();
if (!test_thread_flag(TIF_NEED_FPU_LOAD))
- fxsave(&tsk->thread.fpu.state.fxsave);
+ fxsave(&tsk->thread.fpu.fpstate->regs.fxsave);
fpregs_unlock();
convert_from_fxsr(&env, tsk);
@@ -81,33 +83,54 @@ static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
if (__copy_to_user(buf, &env, sizeof(env)) ||
__put_user(xsave->i387.swd, &fp->status) ||
__put_user(X86_FXSR_MAGIC, &fp->magic))
- return -1;
+ return false;
} else {
struct fregs_state __user *fp = buf;
u32 swd;
+
if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status))
- return -1;
+ return false;
}
- return 0;
+ return true;
+}
+
+/*
+ * Prepare the SW reserved portion of the fxsave memory layout, indicating
+ * the presence of the extended state information in the memory layout
+ * pointed to by the fpstate pointer in the sigcontext.
+ * This is saved when ever the FP and extended state context is
+ * saved on the user stack during the signal handler delivery to the user.
+ */
+static inline void save_sw_bytes(struct _fpx_sw_bytes *sw_bytes, bool ia32_frame,
+ struct fpstate *fpstate)
+{
+ sw_bytes->magic1 = FP_XSTATE_MAGIC1;
+ sw_bytes->extended_size = fpstate->user_size + FP_XSTATE_MAGIC2_SIZE;
+ sw_bytes->xfeatures = fpstate->user_xfeatures;
+ sw_bytes->xstate_size = fpstate->user_size;
+
+ if (ia32_frame)
+ sw_bytes->extended_size += sizeof(struct fregs_state);
}
-static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
+static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
+ struct fpstate *fpstate)
{
struct xregs_state __user *x = buf;
- struct _fpx_sw_bytes *sw_bytes;
+ struct _fpx_sw_bytes sw_bytes;
u32 xfeatures;
int err;
/* Setup the bytes not touched by the [f]xsave and reserved for SW. */
- sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved;
- err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes));
+ save_sw_bytes(&sw_bytes, ia32_frame, fpstate);
+ err = __copy_to_user(&x->i387.sw_reserved, &sw_bytes, sizeof(sw_bytes));
if (!use_xsave())
- return err;
+ return !err;
err |= __put_user(FP_XSTATE_MAGIC2,
- (__u32 __user *)(buf + fpu_user_xstate_size));
+ (__u32 __user *)(buf + fpstate->user_size));
/*
* Read the xfeatures which we copied (directly from the cpu or
@@ -130,23 +153,17 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
err |= __put_user(xfeatures, (__u32 __user *)&x->header.xfeatures);
- return err;
+ return !err;
}
static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
{
- int err;
-
if (use_xsave())
- err = xsave_to_user_sigframe(buf);
- else if (use_fxsr())
- err = fxsave_to_user_sigframe((struct fxregs_state __user *) buf);
+ return xsave_to_user_sigframe(buf);
+ if (use_fxsr())
+ return fxsave_to_user_sigframe((struct fxregs_state __user *) buf);
else
- err = fnsave_to_user_sigframe((struct fregs_state __user *) buf);
-
- if (unlikely(err) && __clear_user(buf, fpu_user_xstate_size))
- err = -EFAULT;
- return err;
+ return fnsave_to_user_sigframe((struct fregs_state __user *) buf);
}
/*
@@ -159,10 +176,8 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
* buf == buf_fx for 64-bit frames and 32-bit fsave frame.
* buf != buf_fx for 32-bit frames with fxstate.
*
- * Try to save it directly to the user frame with disabled page fault handler.
- * If this fails then do the slow path where the FPU state is first saved to
- * task's fpu->state and then copy it to the user frame pointed to by the
- * aligned pointer 'buf_fx'.
+ * Save it directly to the user frame with disabled page fault handler. If
+ * that faults, try to clear the frame which handles the page fault.
*
* If this is a 32-bit frame with fxstate, put a fsave header before
* the aligned state at 'buf_fx'.
@@ -170,10 +185,11 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
* For [f]xsave state, update the SW reserved fields in the [f]xsave frame
* indicating the absence/presence of the extended state to the user.
*/
-int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
+bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
{
struct task_struct *tsk = current;
- int ia32_fxstate = (buf != buf_fx);
+ struct fpstate *fpstate = tsk->thread.fpu.fpstate;
+ bool ia32_fxstate = (buf != buf_fx);
int ret;
ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) ||
@@ -181,13 +197,25 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
if (!static_cpu_has(X86_FEATURE_FPU)) {
struct user_i387_ia32_struct fp;
+
fpregs_soft_get(current, NULL, (struct membuf){.p = &fp,
.left = sizeof(fp)});
- return copy_to_user(buf, &fp, sizeof(fp)) ? -EFAULT : 0;
+ return !copy_to_user(buf, &fp, sizeof(fp));
}
if (!access_ok(buf, size))
- return -EACCES;
+ return false;
+
+ if (use_xsave()) {
+ struct xregs_state __user *xbuf = buf_fx;
+
+ /*
+ * Clear the xsave header first, so that reserved fields are
+ * initialized to zero.
+ */
+ if (__clear_user(&xbuf->header, sizeof(xbuf->header)))
+ return false;
+ }
retry:
/*
* Load the FPU registers if they are not valid for the current task.
@@ -205,26 +233,26 @@ retry:
fpregs_unlock();
if (ret) {
- if (!fault_in_pages_writeable(buf_fx, fpu_user_xstate_size))
+ if (!__clear_user(buf_fx, fpstate->user_size))
goto retry;
- return -EFAULT;
+ return false;
}
/* Save the fsave header for the 32-bit frames. */
- if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf))
- return -1;
+ if ((ia32_fxstate || !use_fxsr()) && !save_fsave_header(tsk, buf))
+ return false;
- if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate))
- return -1;
+ if (use_fxsr() && !save_xstate_epilog(buf_fx, ia32_fxstate, fpstate))
+ return false;
- return 0;
+ return true;
}
-static int __restore_fpregs_from_user(void __user *buf, u64 xrestore,
- bool fx_only)
+static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures,
+ u64 xrestore, bool fx_only)
{
if (use_xsave()) {
- u64 init_bv = xfeatures_mask_uabi() & ~xrestore;
+ u64 init_bv = ufeatures & ~xrestore;
int ret;
if (likely(!fx_only))
@@ -233,7 +261,7 @@ static int __restore_fpregs_from_user(void __user *buf, u64 xrestore,
ret = fxrstor_from_user_sigframe(buf);
if (!ret && unlikely(init_bv))
- os_xrstor(&init_fpstate.xsave, init_bv);
+ os_xrstor(&init_fpstate, init_bv);
return ret;
} else if (use_fxsr()) {
return fxrstor_from_user_sigframe(buf);
@@ -246,16 +274,19 @@ static int __restore_fpregs_from_user(void __user *buf, u64 xrestore,
* Attempt to restore the FPU registers directly from user memory.
* Pagefaults are handled and any errors returned are fatal.
*/
-static int restore_fpregs_from_user(void __user *buf, u64 xrestore,
- bool fx_only, unsigned int size)
+static bool restore_fpregs_from_user(void __user *buf, u64 xrestore,
+ bool fx_only, unsigned int size)
{
struct fpu *fpu = &current->thread.fpu;
int ret;
retry:
fpregs_lock();
+ /* Ensure that XFD is up to date */
+ xfd_update_state(fpu->fpstate);
pagefault_disable();
- ret = __restore_fpregs_from_user(buf, xrestore, fx_only);
+ ret = __restore_fpregs_from_user(buf, fpu->fpstate->user_xfeatures,
+ xrestore, fx_only);
pagefault_enable();
if (unlikely(ret)) {
@@ -275,13 +306,12 @@ retry:
fpregs_unlock();
/* Try to handle #PF, but anything else is fatal. */
- if (ret != -EFAULT)
- return -EINVAL;
+ if (ret != X86_TRAP_PF)
+ return false;
- ret = fault_in_pages_readable(buf, size);
- if (!ret)
+ if (!fault_in_pages_readable(buf, size))
goto retry;
- return ret;
+ return false;
}
/*
@@ -294,45 +324,40 @@ retry:
* been restored from a user buffer directly.
*/
if (test_thread_flag(TIF_NEED_FPU_LOAD) && xfeatures_mask_supervisor())
- os_xrstor(&fpu->state.xsave, xfeatures_mask_supervisor());
+ os_xrstor_supervisor(fpu->fpstate);
fpregs_mark_activate();
fpregs_unlock();
- return 0;
+ return true;
}
-static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
- bool ia32_fxstate)
+static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
+ bool ia32_fxstate)
{
- int state_size = fpu_kernel_xstate_size;
struct task_struct *tsk = current;
struct fpu *fpu = &tsk->thread.fpu;
struct user_i387_ia32_struct env;
+ bool success, fx_only = false;
+ union fpregs_state *fpregs;
+ unsigned int state_size;
u64 user_xfeatures = 0;
- bool fx_only = false;
- int ret;
if (use_xsave()) {
struct _fpx_sw_bytes fx_sw_user;
- ret = check_xstate_in_sigframe(buf_fx, &fx_sw_user);
- if (unlikely(ret))
- return ret;
+ if (!check_xstate_in_sigframe(buf_fx, &fx_sw_user))
+ return false;
fx_only = !fx_sw_user.magic1;
state_size = fx_sw_user.xstate_size;
user_xfeatures = fx_sw_user.xfeatures;
} else {
user_xfeatures = XFEATURE_MASK_FPSSE;
+ state_size = fpu->fpstate->user_size;
}
if (likely(!ia32_fxstate)) {
- /*
- * Attempt to restore the FPU registers directly from user
- * memory. For that to succeed, the user access cannot cause page
- * faults. If it does, fall back to the slow path below, going
- * through the kernel buffer with the enabled pagefault handler.
- */
+ /* Restore the FPU registers directly from user memory. */
return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only,
state_size);
}
@@ -342,9 +367,8 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
* to be ignored for histerical raisins. The legacy state is folded
* in once the larger state has been copied.
*/
- ret = __copy_from_user(&env, buf, sizeof(env));
- if (ret)
- return ret;
+ if (__copy_from_user(&env, buf, sizeof(env)))
+ return false;
/*
* By setting TIF_NEED_FPU_LOAD it is ensured that our xstate is
@@ -363,38 +387,38 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
* the right place in memory. It's ia32 mode. Shrug.
*/
if (xfeatures_mask_supervisor())
- os_xsave(&fpu->state.xsave);
+ os_xsave(fpu->fpstate);
set_thread_flag(TIF_NEED_FPU_LOAD);
}
__fpu_invalidate_fpregs_state(fpu);
__cpu_invalidate_fpregs_state();
fpregs_unlock();
+ fpregs = &fpu->fpstate->regs;
if (use_xsave() && !fx_only) {
- ret = copy_sigframe_from_user_to_xstate(&fpu->state.xsave, buf_fx);
- if (ret)
- return ret;
+ if (copy_sigframe_from_user_to_xstate(fpu->fpstate, buf_fx))
+ return false;
} else {
- if (__copy_from_user(&fpu->state.fxsave, buf_fx,
- sizeof(fpu->state.fxsave)))
- return -EFAULT;
+ if (__copy_from_user(&fpregs->fxsave, buf_fx,
+ sizeof(fpregs->fxsave)))
+ return false;
if (IS_ENABLED(CONFIG_X86_64)) {
/* Reject invalid MXCSR values. */
- if (fpu->state.fxsave.mxcsr & ~mxcsr_feature_mask)
- return -EINVAL;
+ if (fpregs->fxsave.mxcsr & ~mxcsr_feature_mask)
+ return false;
} else {
/* Mask invalid bits out for historical reasons (broken hardware). */
- fpu->state.fxsave.mxcsr &= ~mxcsr_feature_mask;
+ fpregs->fxsave.mxcsr &= mxcsr_feature_mask;
}
/* Enforce XFEATURE_MASK_FPSSE when XSAVE is enabled */
if (use_xsave())
- fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
+ fpregs->xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
}
/* Fold the legacy FP storage */
- convert_to_fxsr(&fpu->state.fxsave, &env);
+ convert_to_fxsr(&fpregs->fxsave, &env);
fpregs_lock();
if (use_xsave()) {
@@ -409,40 +433,45 @@ static int __fpu_restore_sig(void __user *buf, void __user *buf_fx,
*/
u64 mask = user_xfeatures | xfeatures_mask_supervisor();
- fpu->state.xsave.header.xfeatures &= mask;
- ret = os_xrstor_safe(&fpu->state.xsave, xfeatures_mask_all);
+ fpregs->xsave.header.xfeatures &= mask;
+ success = !os_xrstor_safe(fpu->fpstate,
+ fpu_kernel_cfg.max_features);
} else {
- ret = fxrstor_safe(&fpu->state.fxsave);
+ success = !fxrstor_safe(&fpregs->fxsave);
}
- if (likely(!ret))
+ if (likely(success))
fpregs_mark_activate();
fpregs_unlock();
- return ret;
+ return success;
}
-static inline int xstate_sigframe_size(void)
+
+static inline unsigned int xstate_sigframe_size(struct fpstate *fpstate)
{
- return use_xsave() ? fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE :
- fpu_user_xstate_size;
+ unsigned int size = fpstate->user_size;
+
+ return use_xsave() ? size + FP_XSTATE_MAGIC2_SIZE : size;
}
/*
* Restore FPU state from a sigframe:
*/
-int fpu__restore_sig(void __user *buf, int ia32_frame)
+bool fpu__restore_sig(void __user *buf, int ia32_frame)
{
- unsigned int size = xstate_sigframe_size();
struct fpu *fpu = &current->thread.fpu;
void __user *buf_fx = buf;
bool ia32_fxstate = false;
- int ret;
+ bool success = false;
+ unsigned int size;
if (unlikely(!buf)) {
fpu__clear_user_states(fpu);
- return 0;
+ return true;
}
+ size = xstate_sigframe_size(fpu->fpstate);
+
ia32_frame &= (IS_ENABLED(CONFIG_X86_32) ||
IS_ENABLED(CONFIG_IA32_EMULATION));
@@ -456,30 +485,28 @@ int fpu__restore_sig(void __user *buf, int ia32_frame)
ia32_fxstate = true;
}
- if (!access_ok(buf, size)) {
- ret = -EACCES;
+ if (!access_ok(buf, size))
goto out;
- }
if (!IS_ENABLED(CONFIG_X86_64) && !cpu_feature_enabled(X86_FEATURE_FPU)) {
- ret = fpregs_soft_set(current, NULL, 0,
- sizeof(struct user_i387_ia32_struct),
- NULL, buf);
+ success = !fpregs_soft_set(current, NULL, 0,
+ sizeof(struct user_i387_ia32_struct),
+ NULL, buf);
} else {
- ret = __fpu_restore_sig(buf, buf_fx, ia32_fxstate);
+ success = __fpu_restore_sig(buf, buf_fx, ia32_fxstate);
}
out:
- if (unlikely(ret))
+ if (unlikely(!success))
fpu__clear_user_states(fpu);
- return ret;
+ return success;
}
unsigned long
fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
unsigned long *buf_fx, unsigned long *size)
{
- unsigned long frame_size = xstate_sigframe_size();
+ unsigned long frame_size = xstate_sigframe_size(current->thread.fpu.fpstate);
*buf_fx = sp = round_down(sp - frame_size, 64);
if (ia32_frame && use_fxsr()) {
@@ -492,9 +519,12 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
return sp;
}
-unsigned long fpu__get_fpstate_size(void)
+unsigned long __init fpu__get_fpstate_size(void)
{
- unsigned long ret = xstate_sigframe_size();
+ unsigned long ret = fpu_user_cfg.max_size;
+
+ if (use_xsave())
+ ret += FP_XSTATE_MAGIC2_SIZE;
/*
* This space is needed on (most) 32-bit kernels, or when a 32-bit
@@ -510,28 +540,3 @@ unsigned long fpu__get_fpstate_size(void)
return ret;
}
-/*
- * Prepare the SW reserved portion of the fxsave memory layout, indicating
- * the presence of the extended state information in the memory layout
- * pointed by the fpstate pointer in the sigcontext.
- * This will be saved when ever the FP and extended state context is
- * saved on the user stack during the signal handler delivery to the user.
- */
-void fpu__init_prepare_fx_sw_frame(void)
-{
- int size = fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE;
-
- fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
- fx_sw_reserved.extended_size = size;
- fx_sw_reserved.xfeatures = xfeatures_mask_uabi();
- fx_sw_reserved.xstate_size = fpu_user_xstate_size;
-
- if (IS_ENABLED(CONFIG_IA32_EMULATION) ||
- IS_ENABLED(CONFIG_X86_32)) {
- int fsave_header_size = sizeof(struct fregs_state);
-
- fx_sw_reserved_ia32 = fx_sw_reserved;
- fx_sw_reserved_ia32.extended_size = size + fsave_header_size;
- }
-}
-
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c8def1b7f8fb..d28829403ed0 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -4,21 +4,33 @@
*
* Author: Suresh Siddha <suresh.b.siddha@intel.com>
*/
+#include <linux/bitops.h>
#include <linux/compat.h>
#include <linux/cpu.h>
#include <linux/mman.h>
+#include <linux/nospec.h>
#include <linux/pkeys.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
+#include <linux/vmalloc.h>
#include <asm/fpu/api.h>
-#include <asm/fpu/internal.h>
-#include <asm/fpu/signal.h>
#include <asm/fpu/regset.h>
-#include <asm/fpu/xstate.h>
+#include <asm/fpu/signal.h>
+#include <asm/fpu/xcr.h>
#include <asm/tlbflush.h>
-#include <asm/cpufeature.h>
+#include <asm/prctl.h>
+#include <asm/elf.h>
+
+#include "context.h"
+#include "internal.h"
+#include "legacy.h"
+#include "xstate.h"
+
+#define for_each_extended_xfeature(bit, mask) \
+ (bit) = FIRST_EXTENDED_XFEATURE; \
+ for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask))
/*
* Although we spell it out in here, the Processor Trace
@@ -39,29 +51,32 @@ static const char *xfeature_names[] =
"Protection Keys User registers",
"PASID state",
"unknown xstate feature" ,
+ "unknown xstate feature" ,
+ "unknown xstate feature" ,
+ "unknown xstate feature" ,
+ "unknown xstate feature" ,
+ "unknown xstate feature" ,
+ "AMX Tile config" ,
+ "AMX Tile data" ,
+ "unknown xstate feature" ,
};
-static short xsave_cpuid_features[] __initdata = {
- X86_FEATURE_FPU,
- X86_FEATURE_XMM,
- X86_FEATURE_AVX,
- X86_FEATURE_MPX,
- X86_FEATURE_MPX,
- X86_FEATURE_AVX512F,
- X86_FEATURE_AVX512F,
- X86_FEATURE_AVX512F,
- X86_FEATURE_INTEL_PT,
- X86_FEATURE_PKU,
- X86_FEATURE_ENQCMD,
+static unsigned short xsave_cpuid_features[] __initdata = {
+ [XFEATURE_FP] = X86_FEATURE_FPU,
+ [XFEATURE_SSE] = X86_FEATURE_XMM,
+ [XFEATURE_YMM] = X86_FEATURE_AVX,
+ [XFEATURE_BNDREGS] = X86_FEATURE_MPX,
+ [XFEATURE_BNDCSR] = X86_FEATURE_MPX,
+ [XFEATURE_OPMASK] = X86_FEATURE_AVX512F,
+ [XFEATURE_ZMM_Hi256] = X86_FEATURE_AVX512F,
+ [XFEATURE_Hi16_ZMM] = X86_FEATURE_AVX512F,
+ [XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT,
+ [XFEATURE_PKRU] = X86_FEATURE_PKU,
+ [XFEATURE_PASID] = X86_FEATURE_ENQCMD,
+ [XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE,
+ [XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE,
};
-/*
- * This represents the full set of bits that should ever be set in a kernel
- * XSAVE buffer, both supervisor and user xstates.
- */
-u64 xfeatures_mask_all __ro_after_init;
-EXPORT_SYMBOL_GPL(xfeatures_mask_all);
-
static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
{ [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
@@ -72,20 +87,13 @@ static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] __ro_after_init
{ [ 0 ... XFEATURE_MAX - 1] = -1};
/*
- * The XSAVE area of kernel can be in standard or compacted format;
- * it is always in standard format for user mode. This is the user
- * mode standard format size used for signal and ptrace frames.
- */
-unsigned int fpu_user_xstate_size __ro_after_init;
-
-/*
* Return whether the system supports a given xfeature.
*
* Also return the name of the (most advanced) feature that the caller requested:
*/
int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
{
- u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask_all;
+ u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features;
if (unlikely(feature_name)) {
long xfeature_idx, max_idx;
@@ -135,17 +143,26 @@ static bool xfeature_is_supervisor(int xfeature_nr)
*/
void fpu__init_cpu_xstate(void)
{
- if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask_all)
+ if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features)
return;
cr4_set_bits(X86_CR4_OSXSAVE);
/*
+ * Must happen after CR4 setup and before xsetbv() to allow KVM
+ * lazy passthrough. Write independent of the dynamic state static
+ * key as that does not work on the boot CPU. This also ensures
+ * that any stale state is wiped out from XFD.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_XFD))
+ wrmsrl(MSR_IA32_XFD, init_fpstate.xfd);
+
+ /*
* XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
* managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user
* states can be set here.
*/
- xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_uabi());
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
/*
* MSR_IA32_XSS sets supervisor states managed by XSAVES.
@@ -158,7 +175,7 @@ void fpu__init_cpu_xstate(void)
static bool xfeature_enabled(enum xfeature xfeature)
{
- return xfeatures_mask_all & BIT_ULL(xfeature);
+ return fpu_kernel_cfg.max_features & BIT_ULL(xfeature);
}
/*
@@ -184,10 +201,7 @@ static void __init setup_xstate_features(void)
xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state,
xmm_space);
- for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
- if (!xfeature_enabled(i))
- continue;
-
+ for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
xstate_sizes[i] = eax;
@@ -236,6 +250,8 @@ static void __init print_xstate_features(void)
print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
print_xstate_feature(XFEATURE_MASK_PKRU);
print_xstate_feature(XFEATURE_MASK_PASID);
+ print_xstate_feature(XFEATURE_MASK_XTILE_CFG);
+ print_xstate_feature(XFEATURE_MASK_XTILE_DATA);
}
/*
@@ -291,20 +307,15 @@ static void __init setup_xstate_comp_offsets(void)
xstate_comp_offsets[XFEATURE_SSE] = offsetof(struct fxregs_state,
xmm_space);
- if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
- for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
- if (xfeature_enabled(i))
- xstate_comp_offsets[i] = xstate_offsets[i];
- }
+ if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) {
+ for_each_extended_xfeature(i, fpu_kernel_cfg.max_features)
+ xstate_comp_offsets[i] = xstate_offsets[i];
return;
}
next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;
- for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
- if (!xfeature_enabled(i))
- continue;
-
+ for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
if (xfeature_is_aligned(i))
next_offset = ALIGN(next_offset, 64);
@@ -328,8 +339,8 @@ static void __init setup_supervisor_only_offsets(void)
next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;
- for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
- if (!xfeature_enabled(i) || !xfeature_is_supervisor(i))
+ for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
+ if (!xfeature_is_supervisor(i))
continue;
if (xfeature_is_aligned(i))
@@ -347,15 +358,36 @@ static void __init print_xstate_offset_size(void)
{
int i;
- for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
- if (!xfeature_enabled(i))
- continue;
+ for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
i, xstate_comp_offsets[i], i, xstate_sizes[i]);
}
}
/*
+ * This function is called only during boot time when x86 caps are not set
+ * up and alternative can not be used yet.
+ */
+static __init void os_xrstor_booting(struct xregs_state *xstate)
+{
+ u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE;
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err;
+
+ if (cpu_feature_enabled(X86_FEATURE_XSAVES))
+ XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
+ else
+ XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
+
+ /*
+ * We should never fault when copying from a kernel buffer, and the FPU
+ * state we set at boot time should be valid.
+ */
+ WARN_ON_FPU(err);
+}
+
+/*
* All supported features have either init state all zeros or are
* handled in setup_init_fpu() individually. This is an explicit
* feature list and does not use XFEATURE_MASK*SUPPORTED to catch
@@ -372,36 +404,30 @@ static void __init print_xstate_offset_size(void)
XFEATURE_MASK_PKRU | \
XFEATURE_MASK_BNDREGS | \
XFEATURE_MASK_BNDCSR | \
- XFEATURE_MASK_PASID)
+ XFEATURE_MASK_PASID | \
+ XFEATURE_MASK_XTILE)
/*
* setup the xstate image representing the init state
*/
static void __init setup_init_fpu_buf(void)
{
- static int on_boot_cpu __initdata = 1;
-
BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
XFEATURES_INIT_FPSTATE_HANDLED);
- WARN_ON_FPU(!on_boot_cpu);
- on_boot_cpu = 0;
-
if (!boot_cpu_has(X86_FEATURE_XSAVE))
return;
setup_xstate_features();
print_xstate_features();
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT |
- xfeatures_mask_all;
+ xstate_init_xcomp_bv(&init_fpstate.regs.xsave, fpu_kernel_cfg.max_features);
/*
* Init all the features state with header.xfeatures being 0x0
*/
- os_xrstor_booting(&init_fpstate.xsave);
+ os_xrstor_booting(&init_fpstate.regs.xsave);
/*
* All components are now in init state. Read the state back so
@@ -419,7 +445,7 @@ static void __init setup_init_fpu_buf(void)
* state is all zeroes or if not to add the necessary handling
* here.
*/
- fxsave(&init_fpstate.fxsave);
+ fxsave(&init_fpstate.regs.fxsave);
}
static int xfeature_uncompacted_offset(int xfeature_nr)
@@ -451,10 +477,11 @@ int xfeature_size(int xfeature_nr)
}
/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
-static int validate_user_xstate_header(const struct xstate_header *hdr)
+static int validate_user_xstate_header(const struct xstate_header *hdr,
+ struct fpstate *fpstate)
{
/* No unknown or supervisor features may be set */
- if (hdr->xfeatures & ~xfeatures_mask_uabi())
+ if (hdr->xfeatures & ~fpstate->user_xfeatures)
return -EINVAL;
/* Userspace must use the uncompacted format */
@@ -474,7 +501,7 @@ static int validate_user_xstate_header(const struct xstate_header *hdr)
return 0;
}
-static void __xstate_dump_leaves(void)
+static void __init __xstate_dump_leaves(void)
{
int i;
u32 eax, ebx, ecx, edx;
@@ -509,12 +536,73 @@ static void __xstate_dump_leaves(void)
} \
} while (0)
+/**
+ * check_xtile_data_against_struct - Check tile data state size.
+ *
+ * Calculate the state size by multiplying the single tile size which is
+ * recorded in a C struct, and the number of tiles that the CPU informs.
+ * Compare the provided size with the calculation.
+ *
+ * @size: The tile data state size
+ *
+ * Returns: 0 on success, -EINVAL on mismatch.
+ */
+static int __init check_xtile_data_against_struct(int size)
+{
+ u32 max_palid, palid, state_size;
+ u32 eax, ebx, ecx, edx;
+ u16 max_tile;
+
+ /*
+ * Check the maximum palette id:
+ * eax: the highest numbered palette subleaf.
+ */
+ cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx);
+
+ /*
+ * Cross-check each tile size and find the maximum number of
+ * supported tiles.
+ */
+ for (palid = 1, max_tile = 0; palid <= max_palid; palid++) {
+ u16 tile_size, max;
+
+ /*
+ * Check the tile size info:
+ * eax[31:16]: bytes per title
+ * ebx[31:16]: the max names (or max number of tiles)
+ */
+ cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx);
+ tile_size = eax >> 16;
+ max = ebx >> 16;
+
+ if (tile_size != sizeof(struct xtile_data)) {
+ pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n",
+ __stringify(XFEATURE_XTILE_DATA),
+ sizeof(struct xtile_data), tile_size);
+ __xstate_dump_leaves();
+ return -EINVAL;
+ }
+
+ if (max > max_tile)
+ max_tile = max;
+ }
+
+ state_size = sizeof(struct xtile_data) * max_tile;
+ if (size != state_size) {
+ pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n",
+ __stringify(XFEATURE_XTILE_DATA), state_size, size);
+ __xstate_dump_leaves();
+ return -EINVAL;
+ }
+ return 0;
+}
+
/*
* We have a C struct for each 'xstate'. We need to ensure
* that our software representation matches what the CPU
* tells us about the state's size.
*/
-static void check_xstate_against_struct(int nr)
+static bool __init check_xstate_against_struct(int nr)
{
/*
* Ask the CPU for the size of the state.
@@ -532,6 +620,11 @@ static void check_xstate_against_struct(int nr)
XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state);
XCHECK_SZ(sz, nr, XFEATURE_PKRU, struct pkru_state);
XCHECK_SZ(sz, nr, XFEATURE_PASID, struct ia32_pasid_state);
+ XCHECK_SZ(sz, nr, XFEATURE_XTILE_CFG, struct xtile_cfg);
+
+ /* The tile data size varies between implementations. */
+ if (nr == XFEATURE_XTILE_DATA)
+ check_xtile_data_against_struct(sz);
/*
* Make *SURE* to add any feature numbers in below if
@@ -541,10 +634,39 @@ static void check_xstate_against_struct(int nr)
if ((nr < XFEATURE_YMM) ||
(nr >= XFEATURE_MAX) ||
(nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) ||
- ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_LBR))) {
+ ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_RSRVD_COMP_16))) {
WARN_ONCE(1, "no structure for xstate: %d\n", nr);
XSTATE_WARN_ON(1);
+ return false;
+ }
+ return true;
+}
+
+static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
+{
+ unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+ int i;
+
+ for_each_extended_xfeature(i, xfeatures) {
+ /* Align from the end of the previous feature */
+ if (xfeature_is_aligned(i))
+ size = ALIGN(size, 64);
+ /*
+ * In compacted format the enabled features are packed,
+ * i.e. disabled features do not occupy space.
+ *
+ * In non-compacted format the offsets are fixed and
+ * disabled states still occupy space in the memory buffer.
+ */
+ if (!compacted)
+ size = xfeature_uncompacted_offset(i);
+ /*
+ * Add the feature size even for non-compacted format
+ * to make the end result correct
+ */
+ size += xfeature_size(i);
}
+ return size;
}
/*
@@ -556,44 +678,29 @@ static void check_xstate_against_struct(int nr)
* covered by these checks. Only the size of the buffer for task->fpu
* is checked here.
*/
-static void do_extra_xstate_size_checks(void)
+static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
{
- int paranoid_xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+ bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
+ unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
int i;
- for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
- if (!xfeature_enabled(i))
- continue;
-
- check_xstate_against_struct(i);
+ for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
+ if (!check_xstate_against_struct(i))
+ return false;
/*
* Supervisor state components can be managed only by
* XSAVES.
*/
- if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
- XSTATE_WARN_ON(xfeature_is_supervisor(i));
-
- /* Align from the end of the previous feature */
- if (xfeature_is_aligned(i))
- paranoid_xstate_size = ALIGN(paranoid_xstate_size, 64);
- /*
- * The offset of a given state in the non-compacted
- * format is given to us in a CPUID leaf. We check
- * them for being ordered (increasing offsets) in
- * setup_xstate_features(). XSAVES uses compacted format.
- */
- if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
- paranoid_xstate_size = xfeature_uncompacted_offset(i);
- /*
- * The compacted-format offset always depends on where
- * the previous state ended.
- */
- paranoid_xstate_size += xfeature_size(i);
+ if (!compacted && xfeature_is_supervisor(i)) {
+ XSTATE_WARN_ON(1);
+ return false;
+ }
}
- XSTATE_WARN_ON(paranoid_xstate_size != fpu_kernel_xstate_size);
+ size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted);
+ XSTATE_WARN_ON(size != kernel_size);
+ return size == kernel_size;
}
-
/*
* Get total size of enabled xstates in XCR0 | IA32_XSS.
*
@@ -644,7 +751,7 @@ static unsigned int __init get_xsaves_size_no_independent(void)
return size;
}
-static unsigned int __init get_xsave_size(void)
+static unsigned int __init get_xsave_size_user(void)
{
unsigned int eax, ebx, ecx, edx;
/*
@@ -662,44 +769,54 @@ static unsigned int __init get_xsave_size(void)
* Will the runtime-enumerated 'xstate_size' fit in the init
* task's statically-allocated buffer?
*/
-static bool is_supported_xstate_size(unsigned int test_xstate_size)
+static bool __init is_supported_xstate_size(unsigned int test_xstate_size)
{
- if (test_xstate_size <= sizeof(union fpregs_state))
+ if (test_xstate_size <= sizeof(init_fpstate.regs))
return true;
pr_warn("x86/fpu: xstate buffer too small (%zu < %d), disabling xsave\n",
- sizeof(union fpregs_state), test_xstate_size);
+ sizeof(init_fpstate.regs), test_xstate_size);
return false;
}
static int __init init_xstate_size(void)
{
/* Recompute the context size for enabled features: */
- unsigned int possible_xstate_size;
- unsigned int xsave_size;
+ unsigned int user_size, kernel_size, kernel_default_size;
+ bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
- xsave_size = get_xsave_size();
+ /* Uncompacted user space size */
+ user_size = get_xsave_size_user();
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- possible_xstate_size = get_xsaves_size_no_independent();
+ /*
+ * XSAVES kernel size includes supervisor states and
+ * uses compacted format when available.
+ *
+ * XSAVE does not support supervisor states so
+ * kernel and user size is identical.
+ */
+ if (compacted)
+ kernel_size = get_xsaves_size_no_independent();
else
- possible_xstate_size = xsave_size;
+ kernel_size = user_size;
+
+ kernel_default_size =
+ xstate_calculate_size(fpu_kernel_cfg.default_features, compacted);
- /* Ensure we have the space to store all enabled: */
- if (!is_supported_xstate_size(possible_xstate_size))
+ /* Ensure we have the space to store all default enabled features. */
+ if (!is_supported_xstate_size(kernel_default_size))
return -EINVAL;
- /*
- * The size is OK, we are definitely going to use xsave,
- * make it known to the world that we need more space.
- */
- fpu_kernel_xstate_size = possible_xstate_size;
- do_extra_xstate_size_checks();
+ if (!paranoid_xstate_size_valid(kernel_size))
+ return -EINVAL;
+
+ fpu_kernel_cfg.max_size = kernel_size;
+ fpu_user_cfg.max_size = user_size;
+
+ fpu_kernel_cfg.default_size = kernel_default_size;
+ fpu_user_cfg.default_size =
+ xstate_calculate_size(fpu_user_cfg.default_features, false);
- /*
- * User space is always in standard format.
- */
- fpu_user_xstate_size = xsave_size;
return 0;
}
@@ -707,28 +824,38 @@ static int __init init_xstate_size(void)
* We enabled the XSAVE hardware, but something went wrong and
* we can not use it. Disable it.
*/
-static void fpu__init_disable_system_xstate(void)
+static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
{
- xfeatures_mask_all = 0;
+ fpu_kernel_cfg.max_features = 0;
cr4_clear_bits(X86_CR4_OSXSAVE);
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
+
+ /* Restore the legacy size.*/
+ fpu_kernel_cfg.max_size = legacy_size;
+ fpu_kernel_cfg.default_size = legacy_size;
+ fpu_user_cfg.max_size = legacy_size;
+ fpu_user_cfg.default_size = legacy_size;
+
+ /*
+ * Prevent enabling the static branch which enables writes to the
+ * XFD MSR.
+ */
+ init_fpstate.xfd = 0;
+
+ fpstate_reset(&current->thread.fpu);
}
/*
* Enable and initialize the xsave feature.
* Called once per system bootup.
*/
-void __init fpu__init_system_xstate(void)
+void __init fpu__init_system_xstate(unsigned int legacy_size)
{
unsigned int eax, ebx, ecx, edx;
- static int on_boot_cpu __initdata = 1;
u64 xfeatures;
int err;
int i;
- WARN_ON_FPU(!on_boot_cpu);
- on_boot_cpu = 0;
-
if (!boot_cpu_has(X86_FEATURE_FPU)) {
pr_info("x86/fpu: No FPU detected\n");
return;
@@ -749,22 +876,22 @@ void __init fpu__init_system_xstate(void)
* Find user xstates supported by the processor.
*/
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
- xfeatures_mask_all = eax + ((u64)edx << 32);
+ fpu_kernel_cfg.max_features = eax + ((u64)edx << 32);
/*
* Find supervisor xstates supported by the processor.
*/
cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
- xfeatures_mask_all |= ecx + ((u64)edx << 32);
+ fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32);
- if ((xfeatures_mask_uabi() & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
+ if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
/*
* This indicates that something really unexpected happened
* with the enumeration. Disable XSAVE and try to continue
* booting without it. This is too early to BUG().
*/
pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
- xfeatures_mask_all);
+ fpu_kernel_cfg.max_features);
goto out_disable;
}
@@ -772,15 +899,39 @@ void __init fpu__init_system_xstate(void)
* Clear XSAVE features that are disabled in the normal CPUID.
*/
for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
- if (!boot_cpu_has(xsave_cpuid_features[i]))
- xfeatures_mask_all &= ~BIT_ULL(i);
+ unsigned short cid = xsave_cpuid_features[i];
+
+ /* Careful: X86_FEATURE_FPU is 0! */
+ if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid))
+ fpu_kernel_cfg.max_features &= ~BIT_ULL(i);
}
- xfeatures_mask_all &= XFEATURE_MASK_USER_SUPPORTED |
+ if (!cpu_feature_enabled(X86_FEATURE_XFD))
+ fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;
+
+ fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
XFEATURE_MASK_SUPERVISOR_SUPPORTED;
+ fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
+ fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
+
+ /* Clean out dynamic features from default */
+ fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features;
+ fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
+
+ fpu_user_cfg.default_features = fpu_user_cfg.max_features;
+ fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
+
/* Store it for paranoia check at the end */
- xfeatures = xfeatures_mask_all;
+ xfeatures = fpu_kernel_cfg.max_features;
+
+ /*
+ * Initialize the default XFD state in initfp_state and enable the
+ * dynamic sizing mechanism if dynamic states are available. The
+ * static key cannot be enabled here because this runs before
+ * jump_label_init(). This is delayed to an initcall.
+ */
+ init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;
/* Enable xstate instructions to be able to continue with initialization: */
fpu__init_cpu_xstate();
@@ -788,13 +939,16 @@ void __init fpu__init_system_xstate(void)
if (err)
goto out_disable;
+ /* Reset the state for the current task */
+ fpstate_reset(&current->thread.fpu);
+
/*
* Update info used for ptrace frames; use standard-format size and no
* supervisor xstates:
*/
- update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask_uabi());
+ update_regset_xstate_info(fpu_user_cfg.max_size,
+ fpu_user_cfg.max_features);
- fpu__init_prepare_fx_sw_frame();
setup_init_fpu_buf();
setup_xstate_comp_offsets();
setup_supervisor_only_offsets();
@@ -803,22 +957,22 @@ void __init fpu__init_system_xstate(void)
* Paranoia check whether something in the setup modified the
* xfeatures mask.
*/
- if (xfeatures != xfeatures_mask_all) {
+ if (xfeatures != fpu_kernel_cfg.max_features) {
pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n",
- xfeatures, xfeatures_mask_all);
+ xfeatures, fpu_kernel_cfg.max_features);
goto out_disable;
}
print_xstate_offset_size();
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
- xfeatures_mask_all,
- fpu_kernel_xstate_size,
+ fpu_kernel_cfg.max_features,
+ fpu_kernel_cfg.max_size,
boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
return;
out_disable:
/* something went wrong, try to boot without any XSAVE support */
- fpu__init_disable_system_xstate();
+ fpu__init_disable_system_xstate(legacy_size);
}
/*
@@ -830,7 +984,7 @@ void fpu__resume_cpu(void)
* Restore XCR0 on xsave capable CPUs:
*/
if (cpu_feature_enabled(X86_FEATURE_XSAVE))
- xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_uabi());
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
/*
* Restore IA32_XSS. The same CPUID bit enumerates support
@@ -840,6 +994,9 @@ void fpu__resume_cpu(void)
wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
xfeatures_mask_independent());
}
+
+ if (fpu_state_size_dynamic())
+ wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd);
}
/*
@@ -886,7 +1043,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
* We should not ever be requesting features that we
* have not enabled.
*/
- WARN_ONCE(!(xfeatures_mask_all & BIT_ULL(xfeature_nr)),
+ WARN_ONCE(!(fpu_kernel_cfg.max_features & BIT_ULL(xfeature_nr)),
"get of unsupported state");
/*
* This assumes the last 'xsave*' instruction to
@@ -904,7 +1061,6 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
return __raw_xsave_addr(xsave, xfeature_nr);
}
-EXPORT_SYMBOL_GPL(get_xsave_addr);
#ifdef CONFIG_ARCH_HAS_PKEYS
@@ -961,9 +1117,10 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
}
/**
- * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
+ * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
* @to: membuf descriptor
- * @tsk: The task from which to copy the saved xstate
+ * @fpstate: The fpstate buffer from which to copy
+ * @pkru_val: The PKRU value to store in the PKRU component
* @copy_mode: The requested copy mode
*
* Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
@@ -972,14 +1129,15 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
*
* It supports partial copy but @to.pos always starts from zero.
*/
-void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
- enum xstate_copy_mode copy_mode)
+void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
+ u32 pkru_val, enum xstate_copy_mode copy_mode)
{
const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
- struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
- struct xregs_state *xinit = &init_fpstate.xsave;
+ struct xregs_state *xinit = &init_fpstate.regs.xsave;
+ struct xregs_state *xsave = &fpstate->regs.xsave;
struct xstate_header header;
unsigned int zerofrom;
+ u64 mask;
int i;
memset(&header, 0, sizeof(header));
@@ -996,7 +1154,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
break;
case XSTATE_COPY_XSAVE:
- header.xfeatures &= xfeatures_mask_uabi();
+ header.xfeatures &= fpstate->user_xfeatures;
break;
}
@@ -1033,17 +1191,15 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
zerofrom = offsetof(struct xregs_state, extended_state_area);
- for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
- /*
- * The ptrace buffer is in non-compacted XSAVE format.
- * In non-compacted format disabled features still occupy
- * state space, but there is no state to copy from in the
- * compacted init_fpstate. The gap tracking will zero this
- * later.
- */
- if (!(xfeatures_mask_uabi() & BIT_ULL(i)))
- continue;
+ /*
+ * The ptrace buffer is in non-compacted XSAVE format. In
+ * non-compacted format disabled features still occupy state space,
+ * but there is no state to copy from in the compacted
+ * init_fpstate. The gap tracking will zero these states.
+ */
+ mask = fpstate->user_xfeatures;
+ for_each_extended_xfeature(i, mask) {
/*
* If there was a feature or alignment gap, zero the space
* in the destination buffer.
@@ -1055,10 +1211,9 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
struct pkru_state pkru = {0};
/*
* PKRU is not necessarily up to date in the
- * thread's XSAVE buffer. Fill this part from the
- * per-thread storage.
+ * XSAVE buffer. Use the provided value.
*/
- pkru.pkru = tsk->thread.pkru;
+ pkru.pkru = pkru_val;
membuf_write(&to, &pkru, sizeof(pkru));
} else {
copy_feature(header.xfeatures & BIT_ULL(i), &to,
@@ -1078,6 +1233,25 @@ out:
membuf_zero(&to, to.left);
}
+/**
+ * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
+ * @to: membuf descriptor
+ * @tsk: The task from which to copy the saved xstate
+ * @copy_mode: The requested copy mode
+ *
+ * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
+ * format, i.e. from the kernel internal hardware dependent storage format
+ * to the requested @mode. UABI XSTATE is always uncompacted!
+ *
+ * It supports partial copy but @to.pos always starts from zero.
+ */
+void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
+ enum xstate_copy_mode copy_mode)
+{
+ __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
+ tsk->thread.pkru, copy_mode);
+}
+
static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
const void *kbuf, const void __user *ubuf)
{
@@ -1091,9 +1265,10 @@ static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
}
-static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf,
+static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
const void __user *ubuf)
{
+ struct xregs_state *xsave = &fpstate->regs.xsave;
unsigned int offset, size;
struct xstate_header hdr;
u64 mask;
@@ -1103,7 +1278,7 @@ static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf,
if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf))
return -EFAULT;
- if (validate_user_xstate_header(&hdr))
+ if (validate_user_xstate_header(&hdr, fpstate))
return -EINVAL;
/* Validate MXCSR when any of the related features is in use */
@@ -1156,12 +1331,11 @@ static int copy_uabi_to_xstate(struct xregs_state *xsave, const void *kbuf,
/*
* Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S]
- * format and copy to the target thread. This is called from
- * xstateregs_set().
+ * format and copy to the target thread. Used by ptrace and KVM.
*/
-int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
+int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf)
{
- return copy_uabi_to_xstate(xsave, kbuf, NULL);
+ return copy_uabi_to_xstate(fpstate, kbuf, NULL);
}
/*
@@ -1169,26 +1343,20 @@ int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
* XSAVE[S] format and copy to the target thread. This is called from the
* sigreturn() and rt_sigreturn() system calls.
*/
-int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave,
+int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate,
const void __user *ubuf)
{
- return copy_uabi_to_xstate(xsave, NULL, ubuf);
+ return copy_uabi_to_xstate(fpstate, NULL, ubuf);
}
-static bool validate_xsaves_xrstors(u64 mask)
+static bool validate_independent_components(u64 mask)
{
u64 xchk;
if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES)))
return false;
- /*
- * Validate that this is either a task->fpstate related component
- * subset or an independent one.
- */
- if (mask & xfeatures_mask_independent())
- xchk = ~xfeatures_mask_independent();
- else
- xchk = ~xfeatures_mask_all;
+
+ xchk = ~xfeatures_mask_independent();
if (WARN_ON_ONCE(!mask || mask & xchk))
return false;
@@ -1206,14 +1374,13 @@ static bool validate_xsaves_xrstors(u64 mask)
* buffer should be zeroed otherwise a consecutive XRSTORS from that buffer
* can #GP.
*
- * The feature mask must either be a subset of the independent features or
- * a subset of the task->fpstate related features.
+ * The feature mask must be a subset of the independent features.
*/
void xsaves(struct xregs_state *xstate, u64 mask)
{
int err;
- if (!validate_xsaves_xrstors(mask))
+ if (!validate_independent_components(mask))
return;
XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err);
@@ -1231,20 +1398,379 @@ void xsaves(struct xregs_state *xstate, u64 mask)
* Proper usage is to restore the state which was saved with
* xsaves() into @xstate.
*
- * The feature mask must either be a subset of the independent features or
- * a subset of the task->fpstate related features.
+ * The feature mask must be a subset of the independent features.
*/
void xrstors(struct xregs_state *xstate, u64 mask)
{
int err;
- if (!validate_xsaves_xrstors(mask))
+ if (!validate_independent_components(mask))
return;
XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err);
WARN_ON_ONCE(err);
}
+#if IS_ENABLED(CONFIG_KVM)
+void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature)
+{
+ void *addr = get_xsave_addr(&fps->regs.xsave, xfeature);
+
+ if (addr)
+ memset(addr, 0, xstate_sizes[xfeature]);
+}
+EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component);
+#endif
+
+#ifdef CONFIG_X86_64
+
+#ifdef CONFIG_X86_DEBUG_FPU
+/*
+ * Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask
+ * can safely operate on the @fpstate buffer.
+ */
+static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor)
+{
+ u64 xfd = __this_cpu_read(xfd_state);
+
+ if (fpstate->xfd == xfd)
+ return true;
+
+ /*
+ * The XFD MSR does not match fpstate->xfd. That's invalid when
+ * the passed in fpstate is current's fpstate.
+ */
+ if (fpstate->xfd == current->thread.fpu.fpstate->xfd)
+ return false;
+
+ /*
+ * XRSTOR(S) from init_fpstate are always correct as it will just
+ * bring all components into init state and not read from the
+ * buffer. XSAVE(S) raises #PF after init.
+ */
+ if (fpstate == &init_fpstate)
+ return rstor;
+
+ /*
+ * XSAVE(S): clone(), fpu_swap_kvm_fpu()
+ * XRSTORS(S): fpu_swap_kvm_fpu()
+ */
+
+ /*
+ * No XSAVE/XRSTOR instructions (except XSAVE itself) touch
+ * the buffer area for XFD-disabled state components.
+ */
+ mask &= ~xfd;
+
+ /*
+ * Remove features which are valid in fpstate. They
+ * have space allocated in fpstate.
+ */
+ mask &= ~fpstate->xfeatures;
+
+ /*
+ * Any remaining state components in 'mask' might be written
+ * by XSAVE/XRSTOR. Fail validation it found.
+ */
+ return !mask;
+}
+
+void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor)
+{
+ WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor));
+}
+#endif /* CONFIG_X86_DEBUG_FPU */
+
+static int __init xfd_update_static_branch(void)
+{
+ /*
+ * If init_fpstate.xfd has bits set then dynamic features are
+ * available and the dynamic sizing must be enabled.
+ */
+ if (init_fpstate.xfd)
+ static_branch_enable(&__fpu_state_size_dynamic);
+ return 0;
+}
+arch_initcall(xfd_update_static_branch)
+
+void fpstate_free(struct fpu *fpu)
+{
+ if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate)
+ vfree(fpu->fpstate);
+}
+
+/**
+ * fpu_install_fpstate - Update the active fpstate in the FPU
+ *
+ * @fpu: A struct fpu * pointer
+ * @newfps: A struct fpstate * pointer
+ *
+ * Returns: A null pointer if the last active fpstate is the embedded
+ * one or the new fpstate is already installed;
+ * otherwise, a pointer to the old fpstate which has to
+ * be freed by the caller.
+ */
+static struct fpstate *fpu_install_fpstate(struct fpu *fpu,
+ struct fpstate *newfps)
+{
+ struct fpstate *oldfps = fpu->fpstate;
+
+ if (fpu->fpstate == newfps)
+ return NULL;
+
+ fpu->fpstate = newfps;
+ return oldfps != &fpu->__fpstate ? oldfps : NULL;
+}
+
+/**
+ * fpstate_realloc - Reallocate struct fpstate for the requested new features
+ *
+ * @xfeatures: A bitmap of xstate features which extend the enabled features
+ * of that task
+ * @ksize: The required size for the kernel buffer
+ * @usize: The required size for user space buffers
+ *
+ * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
+ * terminates quickly, vfree()-induced IPIs may be a concern, but tasks
+ * with large states are likely to live longer.
+ *
+ * Returns: 0 on success, -ENOMEM on allocation error.
+ */
+static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
+ unsigned int usize)
+{
+ struct fpu *fpu = &current->thread.fpu;
+ struct fpstate *curfps, *newfps = NULL;
+ unsigned int fpsize;
+
+ curfps = fpu->fpstate;
+ fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);
+
+ newfps = vzalloc(fpsize);
+ if (!newfps)
+ return -ENOMEM;
+ newfps->size = ksize;
+ newfps->user_size = usize;
+ newfps->is_valloc = true;
+
+ fpregs_lock();
+ /*
+ * Ensure that the current state is in the registers before
+ * swapping fpstate as that might invalidate it due to layout
+ * changes.
+ */
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ fpregs_restore_userregs();
+
+ newfps->xfeatures = curfps->xfeatures | xfeatures;
+ newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
+ newfps->xfd = curfps->xfd & ~xfeatures;
+
+ curfps = fpu_install_fpstate(fpu, newfps);
+
+ /* Do the final updates within the locked region */
+ xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);
+ xfd_update_state(newfps);
+
+ fpregs_unlock();
+
+ vfree(curfps);
+ return 0;
+}
+
+static int validate_sigaltstack(unsigned int usize)
+{
+ struct task_struct *thread, *leader = current->group_leader;
+ unsigned long framesize = get_sigframe_size();
+
+ lockdep_assert_held(&current->sighand->siglock);
+
+ /* get_sigframe_size() is based on fpu_user_cfg.max_size */
+ framesize -= fpu_user_cfg.max_size;
+ framesize += usize;
+ for_each_thread(leader, thread) {
+ if (thread->sas_ss_size && thread->sas_ss_size < framesize)
+ return -ENOSPC;
+ }
+ return 0;
+}
+
+static int __xstate_request_perm(u64 permitted, u64 requested)
+{
+ /*
+ * This deliberately does not exclude !XSAVES as we still might
+ * decide to optionally context switch XCR0 or talk the silicon
+ * vendors into extending XFD for the pre AMX states, especially
+ * AVX512.
+ */
+ bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
+ struct fpu *fpu = &current->group_leader->thread.fpu;
+ unsigned int ksize, usize;
+ u64 mask;
+ int ret;
+
+ /* Check whether fully enabled */
+ if ((permitted & requested) == requested)
+ return 0;
+
+ /* Calculate the resulting kernel state size */
+ mask = permitted | requested;
+ ksize = xstate_calculate_size(mask, compacted);
+
+ /* Calculate the resulting user state size */
+ mask &= XFEATURE_MASK_USER_SUPPORTED;
+ usize = xstate_calculate_size(mask, false);
+
+ ret = validate_sigaltstack(usize);
+ if (ret)
+ return ret;
+
+ /* Pairs with the READ_ONCE() in xstate_get_group_perm() */
+ WRITE_ONCE(fpu->perm.__state_perm, requested);
+ /* Protected by sighand lock */
+ fpu->perm.__state_size = ksize;
+ fpu->perm.__user_state_size = usize;
+ return ret;
+}
+
+/*
+ * Permissions array to map facilities with more than one component
+ */
+static const u64 xstate_prctl_req[XFEATURE_MAX] = {
+ [XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA,
+};
+
+static int xstate_request_perm(unsigned long idx)
+{
+ u64 permitted, requested;
+ int ret;
+
+ if (idx >= XFEATURE_MAX)
+ return -EINVAL;
+
+ /*
+ * Look up the facility mask which can require more than
+ * one xstate component.
+ */
+ idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req));
+ requested = xstate_prctl_req[idx];
+ if (!requested)
+ return -EOPNOTSUPP;
+
+ if ((fpu_user_cfg.max_features & requested) != requested)
+ return -EOPNOTSUPP;
+
+ /* Lockless quick check */
+ permitted = xstate_get_host_group_perm();
+ if ((permitted & requested) == requested)
+ return 0;
+
+ /* Protect against concurrent modifications */
+ spin_lock_irq(&current->sighand->siglock);
+ permitted = xstate_get_host_group_perm();
+ ret = __xstate_request_perm(permitted, requested);
+ spin_unlock_irq(&current->sighand->siglock);
+ return ret;
+}
+
+int xfd_enable_feature(u64 xfd_err)
+{
+ u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;
+ unsigned int ksize, usize;
+ struct fpu *fpu;
+
+ if (!xfd_event) {
+ pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
+ return 0;
+ }
+
+ /* Protect against concurrent modifications */
+ spin_lock_irq(&current->sighand->siglock);
+
+ /* If not permitted let it die */
+ if ((xstate_get_host_group_perm() & xfd_event) != xfd_event) {
+ spin_unlock_irq(&current->sighand->siglock);
+ return -EPERM;
+ }
+
+ fpu = &current->group_leader->thread.fpu;
+ ksize = fpu->perm.__state_size;
+ usize = fpu->perm.__user_state_size;
+ /*
+ * The feature is permitted. State size is sufficient. Dropping
+ * the lock is safe here even if more features are added from
+ * another task, the retrieved buffer sizes are valid for the
+ * currently requested feature(s).
+ */
+ spin_unlock_irq(&current->sighand->siglock);
+
+ /*
+ * Try to allocate a new fpstate. If that fails there is no way
+ * out.
+ */
+ if (fpstate_realloc(xfd_event, ksize, usize))
+ return -EFAULT;
+ return 0;
+}
+#else /* CONFIG_X86_64 */
+static inline int xstate_request_perm(unsigned long idx)
+{
+ return -EPERM;
+}
+#endif /* !CONFIG_X86_64 */
+
+/**
+ * fpu_xstate_prctl - xstate permission operations
+ * @tsk: Redundant pointer to current
+ * @option: A subfunction of arch_prctl()
+ * @arg2: option argument
+ * Return: 0 if successful; otherwise, an error code
+ *
+ * Option arguments:
+ *
+ * ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info
+ * ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info
+ * ARCH_REQ_XCOMP_PERM: Facility number requested
+ *
+ * For facilities which require more than one XSTATE component, the request
+ * must be the highest state component number related to that facility,
+ * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
+ * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
+ */
+long fpu_xstate_prctl(struct task_struct *tsk, int option, unsigned long arg2)
+{
+ u64 __user *uptr = (u64 __user *)arg2;
+ u64 permitted, supported;
+ unsigned long idx = arg2;
+
+ if (tsk != current)
+ return -EPERM;
+
+ switch (option) {
+ case ARCH_GET_XCOMP_SUPP:
+ supported = fpu_user_cfg.max_features | fpu_user_cfg.legacy_features;
+ return put_user(supported, uptr);
+
+ case ARCH_GET_XCOMP_PERM:
+ /*
+ * Lockless snapshot as it can also change right after the
+ * dropping the lock.
+ */
+ permitted = xstate_get_host_group_perm();
+ permitted &= XFEATURE_MASK_USER_SUPPORTED;
+ return put_user(permitted, uptr);
+
+ case ARCH_REQ_XCOMP_PERM:
+ if (!IS_ENABLED(CONFIG_X86_64))
+ return -EOPNOTSUPP;
+
+ return xstate_request_perm(idx);
+
+ default:
+ return -EINVAL;
+ }
+}
+
#ifdef CONFIG_PROC_PID_ARCH_STATUS
/*
* Report the amount of time elapsed in millisecond since last AVX512
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
new file mode 100644
index 000000000000..e18210dff88c
--- /dev/null
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -0,0 +1,278 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __X86_KERNEL_FPU_XSTATE_H
+#define __X86_KERNEL_FPU_XSTATE_H
+
+#include <asm/cpufeature.h>
+#include <asm/fpu/xstate.h>
+
+#ifdef CONFIG_X86_64
+DECLARE_PER_CPU(u64, xfd_state);
+#endif
+
+static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask)
+{
+ /*
+ * XRSTORS requires these bits set in xcomp_bv, or it will
+ * trigger #GP:
+ */
+ if (cpu_feature_enabled(X86_FEATURE_XSAVES))
+ xsave->header.xcomp_bv = mask | XCOMP_BV_COMPACTED_FORMAT;
+}
+
+static inline u64 xstate_get_host_group_perm(void)
+{
+ /* Pairs with WRITE_ONCE() in xstate_request_perm() */
+ return READ_ONCE(current->group_leader->thread.fpu.perm.__state_perm);
+}
+
+enum xstate_copy_mode {
+ XSTATE_COPY_FP,
+ XSTATE_COPY_FX,
+ XSTATE_COPY_XSAVE,
+};
+
+struct membuf;
+extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
+ u32 pkru_val, enum xstate_copy_mode copy_mode);
+extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
+ enum xstate_copy_mode mode);
+extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf);
+extern int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate, const void __user *ubuf);
+
+
+extern void fpu__init_cpu_xstate(void);
+extern void fpu__init_system_xstate(unsigned int legacy_size);
+
+extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
+
+static inline u64 xfeatures_mask_supervisor(void)
+{
+ return fpu_kernel_cfg.max_features & XFEATURE_MASK_SUPERVISOR_SUPPORTED;
+}
+
+static inline u64 xfeatures_mask_independent(void)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR))
+ return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR;
+
+ return XFEATURE_MASK_INDEPENDENT;
+}
+
+/* XSAVE/XRSTOR wrapper functions */
+
+#ifdef CONFIG_X86_64
+#define REX_PREFIX "0x48, "
+#else
+#define REX_PREFIX
+#endif
+
+/* These macros all use (%edi)/(%rdi) as the single memory argument. */
+#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
+#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
+#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
+#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
+#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
+
+/*
+ * After this @err contains 0 on success or the trap number when the
+ * operation raises an exception.
+ */
+#define XSTATE_OP(op, st, lmask, hmask, err) \
+ asm volatile("1:" op "\n\t" \
+ "xor %[err], %[err]\n" \
+ "2:\n\t" \
+ _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE) \
+ : [err] "=a" (err) \
+ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
+ : "memory")
+
+/*
+ * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact
+ * format and supervisor states in addition to modified optimization in
+ * XSAVEOPT.
+ *
+ * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
+ * supports modified optimization which is not supported by XSAVE.
+ *
+ * We use XSAVE as a fallback.
+ *
+ * The 661 label is defined in the ALTERNATIVE* macros as the address of the
+ * original instruction which gets replaced. We need to use it here as the
+ * address of the instruction where we might get an exception at.
+ */
+#define XSTATE_XSAVE(st, lmask, hmask, err) \
+ asm volatile(ALTERNATIVE_2(XSAVE, \
+ XSAVEOPT, X86_FEATURE_XSAVEOPT, \
+ XSAVES, X86_FEATURE_XSAVES) \
+ "\n" \
+ "xor %[err], %[err]\n" \
+ "3:\n" \
+ ".pushsection .fixup,\"ax\"\n" \
+ "4: movl $-2, %[err]\n" \
+ "jmp 3b\n" \
+ ".popsection\n" \
+ _ASM_EXTABLE(661b, 4b) \
+ : [err] "=r" (err) \
+ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
+ : "memory")
+
+/*
+ * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
+ * XSAVE area format.
+ */
+#define XSTATE_XRESTORE(st, lmask, hmask) \
+ asm volatile(ALTERNATIVE(XRSTOR, \
+ XRSTORS, X86_FEATURE_XSAVES) \
+ "\n" \
+ "3:\n" \
+ _ASM_EXTABLE_TYPE(661b, 3b, EX_TYPE_FPU_RESTORE) \
+ : \
+ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
+ : "memory")
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_X86_DEBUG_FPU)
+extern void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor);
+#else
+static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor) { }
+#endif
+
+#ifdef CONFIG_X86_64
+static inline void xfd_update_state(struct fpstate *fpstate)
+{
+ if (fpu_state_size_dynamic()) {
+ u64 xfd = fpstate->xfd;
+
+ if (__this_cpu_read(xfd_state) != xfd) {
+ wrmsrl(MSR_IA32_XFD, xfd);
+ __this_cpu_write(xfd_state, xfd);
+ }
+ }
+}
+#else
+static inline void xfd_update_state(struct fpstate *fpstate) { }
+#endif
+
+/*
+ * Save processor xstate to xsave area.
+ *
+ * Uses either XSAVE or XSAVEOPT or XSAVES depending on the CPU features
+ * and command line options. The choice is permanent until the next reboot.
+ */
+static inline void os_xsave(struct fpstate *fpstate)
+{
+ u64 mask = fpstate->xfeatures;
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err;
+
+ WARN_ON_FPU(!alternatives_patched);
+ xfd_validate_state(fpstate, mask, false);
+
+ XSTATE_XSAVE(&fpstate->regs.xsave, lmask, hmask, err);
+
+ /* We should never fault when copying to a kernel buffer: */
+ WARN_ON_FPU(err);
+}
+
+/*
+ * Restore processor xstate from xsave area.
+ *
+ * Uses XRSTORS when XSAVES is used, XRSTOR otherwise.
+ */
+static inline void os_xrstor(struct fpstate *fpstate, u64 mask)
+{
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+
+ xfd_validate_state(fpstate, mask, true);
+ XSTATE_XRESTORE(&fpstate->regs.xsave, lmask, hmask);
+}
+
+/* Restore of supervisor state. Does not require XFD */
+static inline void os_xrstor_supervisor(struct fpstate *fpstate)
+{
+ u64 mask = xfeatures_mask_supervisor();
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+
+ XSTATE_XRESTORE(&fpstate->regs.xsave, lmask, hmask);
+}
+
+/*
+ * Save xstate to user space xsave area.
+ *
+ * We don't use modified optimization because xrstor/xrstors might track
+ * a different application.
+ *
+ * We don't use compacted format xsave area for backward compatibility for
+ * old applications which don't understand the compacted format of the
+ * xsave area.
+ *
+ * The caller has to zero buf::header before calling this because XSAVE*
+ * does not touch the reserved fields in the header.
+ */
+static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
+{
+ /*
+ * Include the features which are not xsaved/rstored by the kernel
+ * internally, e.g. PKRU. That's user space ABI and also required
+ * to allow the signal handler to modify PKRU.
+ */
+ struct fpstate *fpstate = current->thread.fpu.fpstate;
+ u64 mask = fpstate->user_xfeatures;
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err;
+
+ xfd_validate_state(fpstate, mask, false);
+
+ stac();
+ XSTATE_OP(XSAVE, buf, lmask, hmask, err);
+ clac();
+
+ return err;
+}
+
+/*
+ * Restore xstate from user space xsave area.
+ */
+static inline int xrstor_from_user_sigframe(struct xregs_state __user *buf, u64 mask)
+{
+ struct xregs_state *xstate = ((__force struct xregs_state *)buf);
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err;
+
+ xfd_validate_state(current->thread.fpu.fpstate, mask, true);
+
+ stac();
+ XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
+ clac();
+
+ return err;
+}
+
+/*
+ * Restore xstate from kernel space xsave area, return an error code instead of
+ * an exception.
+ */
+static inline int os_xrstor_safe(struct fpstate *fpstate, u64 mask)
+{
+ struct xregs_state *xstate = &fpstate->regs.xsave;
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err;
+
+ /* Ensure that XFD is up to date */
+ xfd_update_state(fpstate);
+
+ if (cpu_feature_enabled(X86_FEATURE_XSAVES))
+ XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
+ else
+ XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
+
+ return err;
+}
+
+
+#endif
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index de01903c3735..fc5371a7e9d1 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -19,7 +19,7 @@
#include <linux/start_kernel.h>
#include <linux/io.h>
#include <linux/memblock.h>
-#include <linux/mem_encrypt.h>
+#include <linux/cc_platform.h>
#include <linux/pgtable.h>
#include <asm/processor.h>
@@ -284,8 +284,13 @@ unsigned long __head __startup_64(unsigned long physaddr,
* The bss section will be memset to zero later in the initialization so
* there is no need to zero it after changing the memory encryption
* attribute.
+ *
+ * This is early code, use an open coded check for SME instead of
+ * using cc_platform_has(). This eliminates worries about removing
+ * instrumentation or checking boot_cpu_data in the cc_platform_has()
+ * function.
*/
- if (mem_encrypt_active()) {
+ if (sme_get_me_mask()) {
vaddr = (unsigned long)__start_bss_decrypted;
vaddr_end = (unsigned long)__end_bss_decrypted;
for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 044902d5a3c4..e5dd6da78713 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -132,6 +132,7 @@ int irq_init_percpu_irqstack(unsigned int cpu)
return 0;
}
+#ifndef CONFIG_PREEMPT_RT
void do_softirq_own_stack(void)
{
struct irq_stack *irqstk;
@@ -148,6 +149,7 @@ void do_softirq_own_stack(void)
call_on_stack(__do_softirq, isp);
}
+#endif
void __handle_irq(struct irq_desc *desc, struct pt_regs *regs)
{
diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S
index 8ef35063964b..760e1f293093 100644
--- a/arch/x86/kernel/irqflags.S
+++ b/arch/x86/kernel/irqflags.S
@@ -7,9 +7,11 @@
/*
* unsigned long native_save_fl(void)
*/
+.pushsection .noinstr.text, "ax"
SYM_FUNC_START(native_save_fl)
pushf
pop %_ASM_AX
ret
SYM_FUNC_END(native_save_fl)
+.popsection
EXPORT_SYMBOL(native_save_fl)
diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
index 1afbdd1dd777..9ff480e94511 100644
--- a/arch/x86/kernel/itmt.c
+++ b/arch/x86/kernel/itmt.c
@@ -198,7 +198,7 @@ void sched_set_itmt_core_prio(int prio, int core_cpu)
* of the priority chain and only used when
* all other high priority cpus are out of capacity.
*/
- smt_prio = prio * smp_num_siblings / i;
+ smt_prio = prio * smp_num_siblings / (i * i);
per_cpu(sched_core_priority, cpu) = smt_prio;
i++;
}
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b656456c3a94..8863d1941f1b 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -27,6 +27,7 @@
#include <linux/nmi.h>
#include <linux/swait.h>
#include <linux/syscore_ops.h>
+#include <linux/cc_platform.h>
#include <asm/timer.h>
#include <asm/cpu.h>
#include <asm/traps.h>
@@ -418,7 +419,7 @@ static void __init sev_map_percpu_data(void)
{
int cpu;
- if (!sev_active())
+ if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
return;
for_each_possible_cpu(cpu) {
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 73c74b961d0f..462dd8e9b03d 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -16,9 +16,9 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/set_memory.h>
+#include <linux/cc_platform.h>
#include <asm/hypervisor.h>
-#include <asm/mem_encrypt.h>
#include <asm/x86_init.h>
#include <asm/kvmclock.h>
@@ -223,7 +223,7 @@ static void __init kvmclock_init_mem(void)
* hvclock is shared between the guest and the hypervisor, must
* be mapped decrypted.
*/
- if (sev_active()) {
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
r = set_memory_decrypted((unsigned long) hvclock_mem,
1UL << order);
if (r) {
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 131f30fdcfbd..f5da4a18070a 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -17,6 +17,7 @@
#include <linux/suspend.h>
#include <linux/vmalloc.h>
#include <linux/efi.h>
+#include <linux/cc_platform.h>
#include <asm/init.h>
#include <asm/tlbflush.h>
@@ -166,7 +167,7 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
}
pte = pte_offset_kernel(pmd, vaddr);
- if (sev_active())
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
prot = PAGE_KERNEL_EXEC;
set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
@@ -206,7 +207,7 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
level4p = (pgd_t *)__va(start_pgtable);
clear_page(level4p);
- if (sev_active()) {
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
info.page_flag |= _PAGE_ENC;
info.kernpg_flag |= _PAGE_ENC;
}
@@ -358,7 +359,7 @@ void machine_kexec(struct kimage *image)
(unsigned long)page_list,
image->start,
image->preserve_context,
- sme_active());
+ cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT));
#ifdef CONFIG_KEXEC_JUMP
if (image->preserve_context)
@@ -569,12 +570,12 @@ void arch_kexec_unprotect_crashkres(void)
*/
int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
{
- if (sev_active())
+ if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
return 0;
/*
- * If SME is active we need to be sure that kexec pages are
- * not encrypted because when we boot to the new kernel the
+ * If host memory encryption is active we need to be sure that kexec
+ * pages are not encrypted because when we boot to the new kernel the
* pages won't be accessed encrypted (initially).
*/
return set_memory_decrypted((unsigned long)vaddr, pages);
@@ -582,12 +583,12 @@ int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp)
void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages)
{
- if (sev_active())
+ if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
return;
/*
- * If SME is active we need to reset the pages back to being
- * an encrypted mapping before freeing them.
+ * If host memory encryption is active we need to reset the pages back
+ * to being an encrypted mapping before freeing them.
*/
set_memory_encrypted((unsigned long)vaddr, pages);
}
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 5e9a34b5bd74..169fb6f4cd2e 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -251,7 +251,8 @@ int module_finalize(const Elf_Ehdr *hdr,
struct module *me)
{
const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
- *para = NULL, *orc = NULL, *orc_ip = NULL;
+ *para = NULL, *orc = NULL, *orc_ip = NULL,
+ *retpolines = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
@@ -267,8 +268,14 @@ int module_finalize(const Elf_Ehdr *hdr,
orc = s;
if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name))
orc_ip = s;
+ if (!strcmp(".retpoline_sites", secstrings + s->sh_name))
+ retpolines = s;
}
+ if (retpolines) {
+ void *rseg = (void *)retpolines->sh_addr;
+ apply_retpolines(rseg, rseg + retpolines->sh_size);
+ }
if (alt) {
/* patch .altinstructions */
void *aseg = (void *)alt->sh_addr;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 04cafc057bed..ebc45360ffd4 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -218,6 +218,36 @@ void paravirt_end_context_switch(struct task_struct *next)
if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
arch_enter_lazy_mmu_mode();
}
+
+static noinstr unsigned long pv_native_read_cr2(void)
+{
+ return native_read_cr2();
+}
+
+static noinstr void pv_native_write_cr2(unsigned long val)
+{
+ native_write_cr2(val);
+}
+
+static noinstr unsigned long pv_native_get_debugreg(int regno)
+{
+ return native_get_debugreg(regno);
+}
+
+static noinstr void pv_native_set_debugreg(int regno, unsigned long val)
+{
+ native_set_debugreg(regno, val);
+}
+
+static noinstr void pv_native_irq_enable(void)
+{
+ native_irq_enable();
+}
+
+static noinstr void pv_native_irq_disable(void)
+{
+ native_irq_disable();
+}
#endif
enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
@@ -244,8 +274,8 @@ struct paravirt_patch_template pv_ops = {
#ifdef CONFIG_PARAVIRT_XXL
.cpu.cpuid = native_cpuid,
- .cpu.get_debugreg = native_get_debugreg,
- .cpu.set_debugreg = native_set_debugreg,
+ .cpu.get_debugreg = pv_native_get_debugreg,
+ .cpu.set_debugreg = pv_native_set_debugreg,
.cpu.read_cr0 = native_read_cr0,
.cpu.write_cr0 = native_write_cr0,
.cpu.write_cr4 = native_write_cr4,
@@ -281,8 +311,8 @@ struct paravirt_patch_template pv_ops = {
/* Irq ops. */
.irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
- .irq.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
- .irq.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
+ .irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable),
+ .irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable),
.irq.safe_halt = native_safe_halt,
.irq.halt = native_halt,
#endif /* CONFIG_PARAVIRT_XXL */
@@ -298,8 +328,8 @@ struct paravirt_patch_template pv_ops = {
.mmu.exit_mmap = paravirt_nop,
#ifdef CONFIG_PARAVIRT_XXL
- .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(native_read_cr2),
- .mmu.write_cr2 = native_write_cr2,
+ .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(pv_native_read_cr2),
+ .mmu.write_cr2 = pv_native_write_cr2,
.mmu.read_cr3 = __native_read_cr3,
.mmu.write_cr3 = native_write_cr3,
@@ -371,9 +401,6 @@ struct paravirt_patch_template pv_ops = {
};
#ifdef CONFIG_PARAVIRT_XXL
-/* At this point, native_get/set_debugreg has real function entries */
-NOKPROBE_SYMBOL(native_get_debugreg);
-NOKPROBE_SYMBOL(native_set_debugreg);
NOKPROBE_SYMBOL(native_load_idt);
void (*paravirt_iret)(void) = native_iret;
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index c2cfa5e7c152..814ab46a0dad 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -6,7 +6,7 @@
#include <linux/swiotlb.h>
#include <linux/memblock.h>
#include <linux/dma-direct.h>
-#include <linux/mem_encrypt.h>
+#include <linux/cc_platform.h>
#include <asm/iommu.h>
#include <asm/swiotlb.h>
@@ -45,11 +45,10 @@ int __init pci_swiotlb_detect_4gb(void)
swiotlb = 1;
/*
- * If SME is active then swiotlb will be set to 1 so that bounce
- * buffers are allocated and used for devices that do not support
- * the addressing range required for the encryption mask.
+ * Set swiotlb to 1 so that bounce buffers are allocated and used for
+ * devices that can't support DMA to encrypted memory.
*/
- if (sme_active())
+ if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
swiotlb = 1;
return swiotlb;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 1d9463e3096b..eb470be0e5ae 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -30,7 +30,9 @@
#include <asm/apic.h>
#include <linux/uaccess.h>
#include <asm/mwait.h>
-#include <asm/fpu/internal.h>
+#include <asm/fpu/api.h>
+#include <asm/fpu/sched.h>
+#include <asm/fpu/xstate.h>
#include <asm/debugreg.h>
#include <asm/nmi.h>
#include <asm/tlbflush.h>
@@ -43,6 +45,7 @@
#include <asm/io_bitmap.h>
#include <asm/proto.h>
#include <asm/frame.h>
+#include <asm/unwind.h>
#include "process.h"
@@ -87,9 +90,20 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
#ifdef CONFIG_VM86
dst->thread.vm86 = NULL;
#endif
- return fpu_clone(dst);
+ /* Drop the copied pointer to current's fpstate */
+ dst->thread.fpu.fpstate = NULL;
+
+ return 0;
}
+#ifdef CONFIG_X86_64
+void arch_release_task_struct(struct task_struct *tsk)
+{
+ if (fpu_state_size_dynamic())
+ fpstate_free(&tsk->thread.fpu);
+}
+#endif
+
/*
* Free thread data structures etc..
*/
@@ -154,6 +168,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
frame->flags = X86_EFLAGS_FIXED;
#endif
+ fpu_clone(p, clone_flags);
+
/* Kernel thread ? */
if (unlikely(p->flags & PF_KTHREAD)) {
p->thread.pkru = pkru_get_init_value();
@@ -942,70 +958,36 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
* because the task might wake up and we might look at a stack
* changing under us.
*/
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
{
- unsigned long start, bottom, top, sp, fp, ip, ret = 0;
- int count = 0;
-
- if (p == current || task_is_running(p))
- return 0;
-
- if (!try_get_task_stack(p))
- return 0;
-
- start = (unsigned long)task_stack_page(p);
- if (!start)
- goto out;
+ struct unwind_state state;
+ unsigned long addr = 0;
- /*
- * Layout of the stack page:
- *
- * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long)
- * PADDING
- * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
- * stack
- * ----------- bottom = start
- *
- * The tasks stack pointer points at the location where the
- * framepointer is stored. The data on the stack is:
- * ... IP FP ... IP FP
- *
- * We need to read FP and IP, so we need to adjust the upper
- * bound by another unsigned long.
- */
- top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
- top -= 2 * sizeof(unsigned long);
- bottom = start;
-
- sp = READ_ONCE(p->thread.sp);
- if (sp < bottom || sp > top)
- goto out;
-
- fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp);
- do {
- if (fp < bottom || fp > top)
- goto out;
- ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long)));
- if (!in_sched_functions(ip)) {
- ret = ip;
- goto out;
- }
- fp = READ_ONCE_NOCHECK(*(unsigned long *)fp);
- } while (count++ < 16 && !task_is_running(p));
+ for (unwind_start(&state, p, NULL, NULL); !unwind_done(&state);
+ unwind_next_frame(&state)) {
+ addr = unwind_get_return_address(&state);
+ if (!addr)
+ break;
+ if (in_sched_functions(addr))
+ continue;
+ break;
+ }
-out:
- put_task_stack(p);
- return ret;
+ return addr;
}
long do_arch_prctl_common(struct task_struct *task, int option,
- unsigned long cpuid_enabled)
+ unsigned long arg2)
{
switch (option) {
case ARCH_GET_CPUID:
return get_cpuid_mode();
case ARCH_SET_CPUID:
- return set_cpuid_mode(task, cpuid_enabled);
+ return set_cpuid_mode(task, arg2);
+ case ARCH_GET_XCOMP_SUPP:
+ case ARCH_GET_XCOMP_PERM:
+ case ARCH_REQ_XCOMP_PERM:
+ return fpu_xstate_prctl(task, option, arg2);
}
return -EINVAL;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 4f2f54e1281c..26edb1cd07a4 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -41,7 +41,7 @@
#include <asm/ldt.h>
#include <asm/processor.h>
-#include <asm/fpu/internal.h>
+#include <asm/fpu/sched.h>
#include <asm/desc.h>
#include <linux/err.h>
@@ -160,7 +160,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread;
struct fpu *prev_fpu = &prev->fpu;
- struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
@@ -213,7 +212,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
this_cpu_write(current_task, next_p);
- switch_fpu_finish(next_fpu);
+ switch_fpu_finish();
/* Load the Intel cache allocation PQR MSR. */
resctrl_sched_in();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ec0d836a13b1..3402edec236c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -42,7 +42,7 @@
#include <asm/processor.h>
#include <asm/pkru.h>
-#include <asm/fpu/internal.h>
+#include <asm/fpu/sched.h>
#include <asm/mmu_context.h>
#include <asm/prctl.h>
#include <asm/desc.h>
@@ -559,7 +559,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct thread_struct *prev = &prev_p->thread;
struct thread_struct *next = &next_p->thread;
struct fpu *prev_fpu = &prev->fpu;
- struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
@@ -620,7 +619,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
this_cpu_write(current_task, next_p);
this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
- switch_fpu_finish(next_fpu);
+ switch_fpu_finish();
/* Reload sp0. */
update_task_stack(next_p);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 4c208ea3bd9f..6d2244c94799 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -29,9 +29,9 @@
#include <linux/uaccess.h>
#include <asm/processor.h>
-#include <asm/fpu/internal.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/regset.h>
+#include <asm/fpu/xstate.h>
#include <asm/debugreg.h>
#include <asm/ldt.h>
#include <asm/desc.h>
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index c53271aebb64..c8fe74a28143 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -47,7 +47,7 @@ SYM_CODE_START_NOALIGN(relocate_kernel)
* %rsi page_list
* %rdx start address
* %rcx preserve_context
- * %r8 sme_active
+ * %r8 host_mem_enc_active
*/
/* Save the CPU context, used for jumping back */
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index 0aacd6047ef2..787dc5f568b5 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -64,7 +64,7 @@ static bool sev_es_negotiate_protocol(void)
static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
{
ghcb->save.sw_exit_code = 0;
- memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
+ __builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
}
static bool vc_decoding_needed(unsigned long exit_code)
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 113d3ae51297..c195ffe58049 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -11,7 +11,7 @@
#include <linux/sched/debug.h> /* For show_regs() */
#include <linux/percpu-defs.h>
-#include <linux/mem_encrypt.h>
+#include <linux/cc_platform.h>
#include <linux/printk.h>
#include <linux/mm_types.h>
#include <linux/set_memory.h>
@@ -23,7 +23,7 @@
#include <asm/stacktrace.h>
#include <asm/sev.h>
#include <asm/insn-eval.h>
-#include <asm/fpu/internal.h>
+#include <asm/fpu/xcr.h>
#include <asm/processor.h>
#include <asm/realmode.h>
#include <asm/traps.h>
@@ -615,7 +615,7 @@ int __init sev_es_efi_map_ghcbs(pgd_t *pgd)
int cpu;
u64 pfn;
- if (!sev_es_active())
+ if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
return 0;
pflags = _PAGE_NX | _PAGE_RW;
@@ -775,7 +775,7 @@ void __init sev_es_init_vc_handling(void)
BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE);
- if (!sev_es_active())
+ if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
return;
if (!sev_es_check_cpu_features())
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index f4d21e470083..ec71e06ae364 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -15,6 +15,7 @@
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/kernel.h>
+#include <linux/kstrtox.h>
#include <linux/errno.h>
#include <linux/wait.h>
#include <linux/tracehook.h>
@@ -30,8 +31,8 @@
#include <asm/processor.h>
#include <asm/ucontext.h>
-#include <asm/fpu/internal.h>
#include <asm/fpu/signal.h>
+#include <asm/fpu/xstate.h>
#include <asm/vdso.h>
#include <asm/mce.h>
#include <asm/sighandling.h>
@@ -41,6 +42,7 @@
#include <linux/compat.h>
#include <asm/proto.h>
#include <asm/ia32_unistd.h>
+#include <asm/fpu/xstate.h>
#endif /* CONFIG_X86_64 */
#include <asm/syscall.h>
@@ -79,9 +81,9 @@ static void force_valid_ss(struct pt_regs *regs)
# define CONTEXT_COPY_SIZE sizeof(struct sigcontext)
#endif
-static int restore_sigcontext(struct pt_regs *regs,
- struct sigcontext __user *usc,
- unsigned long uc_flags)
+static bool restore_sigcontext(struct pt_regs *regs,
+ struct sigcontext __user *usc,
+ unsigned long uc_flags)
{
struct sigcontext sc;
@@ -89,7 +91,7 @@ static int restore_sigcontext(struct pt_regs *regs,
current->restart_block.fn = do_no_restart_syscall;
if (copy_from_user(&sc, usc, CONTEXT_COPY_SIZE))
- return -EFAULT;
+ return false;
#ifdef CONFIG_X86_32
set_user_gs(regs, sc.gs);
@@ -244,7 +246,6 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
unsigned long math_size = 0;
unsigned long sp = regs->sp;
unsigned long buf_fx = 0;
- int ret;
/* redzone */
if (IS_ENABLED(CONFIG_X86_64))
@@ -292,8 +293,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
}
/* save i387 and extended state */
- ret = copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size);
- if (ret < 0)
+ if (!copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size))
return (void __user *)-1L;
return (void __user *)sp;
@@ -643,7 +643,7 @@ SYSCALL_DEFINE0(sigreturn)
* x86_32 has no uc_flags bits relevant to restore_sigcontext.
* Save a few cycles by skipping the __get_user.
*/
- if (restore_sigcontext(regs, &frame->sc, 0))
+ if (!restore_sigcontext(regs, &frame->sc, 0))
goto badframe;
return regs->ax;
@@ -671,7 +671,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
set_current_blocked(&set);
- if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
+ if (!restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
goto badframe;
if (restore_altstack(&frame->uc.uc_stack))
@@ -721,12 +721,15 @@ badframe:
/* max_frame_size tells userspace the worst case signal stack size. */
static unsigned long __ro_after_init max_frame_size;
+static unsigned int __ro_after_init fpu_default_state_size;
void __init init_sigframe_size(void)
{
+ fpu_default_state_size = fpu__get_fpstate_size();
+
max_frame_size = MAX_FRAME_SIGINFO_UCTXT_SIZE + MAX_FRAME_PADDING;
- max_frame_size += fpu__get_fpstate_size() + MAX_XSAVE_PADDING;
+ max_frame_size += fpu_default_state_size + MAX_XSAVE_PADDING;
/* Userspace expects an aligned size. */
max_frame_size = round_up(max_frame_size, FRAME_ALIGNMENT);
@@ -910,6 +913,62 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
force_sig(SIGSEGV);
}
+#ifdef CONFIG_DYNAMIC_SIGFRAME
+#ifdef CONFIG_STRICT_SIGALTSTACK_SIZE
+static bool strict_sigaltstack_size __ro_after_init = true;
+#else
+static bool strict_sigaltstack_size __ro_after_init = false;
+#endif
+
+static int __init strict_sas_size(char *arg)
+{
+ return kstrtobool(arg, &strict_sigaltstack_size);
+}
+__setup("strict_sas_size", strict_sas_size);
+
+/*
+ * MINSIGSTKSZ is 2048 and can't be changed despite the fact that AVX512
+ * exceeds that size already. As such programs might never use the
+ * sigaltstack they just continued to work. While always checking against
+ * the real size would be correct, this might be considered a regression.
+ *
+ * Therefore avoid the sanity check, unless enforced by kernel
+ * configuration or command line option.
+ *
+ * When dynamic FPU features are supported, the check is also enforced when
+ * the task has permissions to use dynamic features. Tasks which have no
+ * permission are checked against the size of the non-dynamic feature set
+ * if strict checking is enabled. This avoids forcing all tasks on the
+ * system to allocate large sigaltstacks even if they are never going
+ * to use a dynamic feature. As this is serialized via sighand::siglock
+ * any permission request for a dynamic feature either happened already
+ * or will see the newly install sigaltstack size in the permission checks.
+ */
+bool sigaltstack_size_valid(size_t ss_size)
+{
+ unsigned long fsize = max_frame_size - fpu_default_state_size;
+ u64 mask;
+
+ lockdep_assert_held(&current->sighand->siglock);
+
+ if (!fpu_state_size_dynamic() && !strict_sigaltstack_size)
+ return true;
+
+ fsize += current->group_leader->thread.fpu.perm.__user_state_size;
+ if (likely(ss_size > fsize))
+ return true;
+
+ if (strict_sigaltstack_size)
+ return ss_size > fsize;
+
+ mask = current->group_leader->thread.fpu.perm.__state_perm;
+ if (mask & XFEATURE_MASK_USER_DYNAMIC)
+ return ss_size > fsize;
+
+ return true;
+}
+#endif /* CONFIG_DYNAMIC_SIGFRAME */
+
#ifdef CONFIG_X86_X32_ABI
COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn)
{
@@ -929,7 +988,7 @@ COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn)
set_current_blocked(&set);
- if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
+ if (!restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
goto badframe;
if (compat_restore_altstack(&frame->uc.uc_stack))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 85f6e242b6b4..8241927addff 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -70,7 +70,7 @@
#include <asm/mwait.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
-#include <asm/fpu/internal.h>
+#include <asm/fpu/api.h>
#include <asm/setup.h>
#include <asm/uv/uv.h>
#include <linux/mc146818rtc.h>
@@ -101,6 +101,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map);
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
+
/* Per CPU bogomips and other parameters */
DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
@@ -464,6 +466,21 @@ static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
return false;
}
+static bool match_l2c(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+{
+ int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
+
+ /* If the arch didn't set up l2c_id, fall back to SMT */
+ if (per_cpu(cpu_l2c_id, cpu1) == BAD_APICID)
+ return match_smt(c, o);
+
+ /* Do not match if L2 cache id does not match: */
+ if (per_cpu(cpu_l2c_id, cpu1) != per_cpu(cpu_l2c_id, cpu2))
+ return false;
+
+ return topology_sane(c, o, "l2c");
+}
+
/*
* Unlike the other levels, we do not enforce keeping a
* multicore group inside a NUMA node. If this happens, we will
@@ -523,7 +540,7 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
}
-#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
+#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_CLUSTER) || defined(CONFIG_SCHED_MC)
static inline int x86_sched_itmt_flags(void)
{
return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0;
@@ -541,12 +558,21 @@ static int x86_smt_flags(void)
return cpu_smt_flags() | x86_sched_itmt_flags();
}
#endif
+#ifdef CONFIG_SCHED_CLUSTER
+static int x86_cluster_flags(void)
+{
+ return cpu_cluster_flags() | x86_sched_itmt_flags();
+}
+#endif
#endif
static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
#ifdef CONFIG_SCHED_SMT
{ cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
#endif
+#ifdef CONFIG_SCHED_CLUSTER
+ { cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) },
+#endif
#ifdef CONFIG_SCHED_MC
{ cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
#endif
@@ -557,6 +583,9 @@ static struct sched_domain_topology_level x86_topology[] = {
#ifdef CONFIG_SCHED_SMT
{ cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
#endif
+#ifdef CONFIG_SCHED_CLUSTER
+ { cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) },
+#endif
#ifdef CONFIG_SCHED_MC
{ cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
#endif
@@ -584,6 +613,7 @@ void set_cpu_sibling_map(int cpu)
if (!has_mp) {
cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
+ cpumask_set_cpu(cpu, cpu_l2c_shared_mask(cpu));
cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
cpumask_set_cpu(cpu, topology_die_cpumask(cpu));
c->booted_cores = 1;
@@ -602,6 +632,9 @@ void set_cpu_sibling_map(int cpu)
if ((i == cpu) || (has_mp && match_llc(c, o)))
link_mask(cpu_llc_shared_mask, cpu, i);
+ if ((i == cpu) || (has_mp && match_l2c(c, o)))
+ link_mask(cpu_l2c_shared_mask, cpu, i);
+
if ((i == cpu) || (has_mp && match_die(c, o)))
link_mask(topology_die_cpumask, cpu, i);
}
@@ -652,6 +685,11 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
return cpu_llc_shared_mask(cpu);
}
+const struct cpumask *cpu_clustergroup_mask(int cpu)
+{
+ return cpu_l2c_shared_mask(cpu);
+}
+
static void impress_friends(void)
{
int cpu;
@@ -1335,6 +1373,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
+ zalloc_cpumask_var(&per_cpu(cpu_l2c_shared_map, i), GFP_KERNEL);
}
/*
@@ -1564,7 +1603,10 @@ static void remove_siblinginfo(int cpu)
for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
+ for_each_cpu(sibling, cpu_l2c_shared_mask(cpu))
+ cpumask_clear_cpu(cpu, cpu_l2c_shared_mask(sibling));
cpumask_clear(cpu_llc_shared_mask(cpu));
+ cpumask_clear(cpu_l2c_shared_mask(cpu));
cpumask_clear(topology_sibling_cpumask(cpu));
cpumask_clear(topology_core_cpumask(cpu));
cpumask_clear(topology_die_cpumask(cpu));
@@ -2166,7 +2208,7 @@ DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
void arch_scale_freq_tick(void)
{
- u64 freq_scale = SCHED_CAPACITY_SCALE;
+ u64 freq_scale;
u64 aperf, mperf;
u64 acnt, mcnt;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index f516f2b4797e..43f70bc01762 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -48,7 +48,7 @@
#include <asm/ftrace.h>
#include <asm/traps.h>
#include <asm/desc.h>
-#include <asm/fpu/internal.h>
+#include <asm/fpu/api.h>
#include <asm/cpu.h>
#include <asm/cpu_entry_area.h>
#include <asm/mce.h>
@@ -1108,10 +1108,48 @@ DEFINE_IDTENTRY(exc_spurious_interrupt_bug)
*/
}
+static bool handle_xfd_event(struct pt_regs *regs)
+{
+ u64 xfd_err;
+ int err;
+
+ if (!IS_ENABLED(CONFIG_X86_64) || !cpu_feature_enabled(X86_FEATURE_XFD))
+ return false;
+
+ rdmsrl(MSR_IA32_XFD_ERR, xfd_err);
+ if (!xfd_err)
+ return false;
+
+ wrmsrl(MSR_IA32_XFD_ERR, 0);
+
+ /* Die if that happens in kernel space */
+ if (WARN_ON(!user_mode(regs)))
+ return false;
+
+ local_irq_enable();
+
+ err = xfd_enable_feature(xfd_err);
+
+ switch (err) {
+ case -EPERM:
+ force_sig_fault(SIGILL, ILL_ILLOPC, error_get_trap_addr(regs));
+ break;
+ case -EFAULT:
+ force_sig(SIGSEGV);
+ break;
+ }
+
+ local_irq_disable();
+ return true;
+}
+
DEFINE_IDTENTRY(exc_device_not_available)
{
unsigned long cr0 = read_cr0();
+ if (handle_xfd_event(regs))
+ return;
+
#ifdef CONFIG_MATH_EMULATION
if (!boot_cpu_has(X86_FEATURE_FPU) && (cr0 & X86_CR0_EM)) {
struct math_emu_info info = { };
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 576b47e7523d..5a4b21389b1d 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -92,8 +92,8 @@ static const char * const umip_insns[5] = {
#define umip_pr_err(regs, fmt, ...) \
umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__)
-#define umip_pr_warn(regs, fmt, ...) \
- umip_printk(regs, KERN_WARNING, fmt, ##__VA_ARGS__)
+#define umip_pr_debug(regs, fmt, ...) \
+ umip_printk(regs, KERN_DEBUG, fmt, ##__VA_ARGS__)
/**
* umip_printk() - Print a rate-limited message
@@ -361,10 +361,10 @@ bool fixup_umip_exception(struct pt_regs *regs)
if (umip_inst < 0)
return false;
- umip_pr_warn(regs, "%s instruction cannot be used by applications.\n",
+ umip_pr_debug(regs, "%s instruction cannot be used by applications.\n",
umip_insns[umip_inst]);
- umip_pr_warn(regs, "For now, expensive software emulation returns the result.\n");
+ umip_pr_debug(regs, "For now, expensive software emulation returns the result.\n");
if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size,
user_64bit_mode(regs)))
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index efd9e9ea17f2..3d6dc12d198f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -272,6 +272,20 @@ SECTIONS
__parainstructions_end = .;
}
+#ifdef CONFIG_RETPOLINE
+ /*
+ * List of instructions that call/jmp/jcc to retpoline thunks
+ * __x86_indirect_thunk_*(). These instructions can be patched along
+ * with alternatives, after which the section can be freed.
+ */
+ . = ALIGN(8);
+ .retpoline_sites : AT(ADDR(.retpoline_sites) - LOAD_OFFSET) {
+ __retpoline_sites = .;
+ *(.retpoline_sites)
+ __retpoline_sites_end = .;
+ }
+#endif
+
/*
* struct alt_inst entries. From the header (alternative.h):
* "Alternative instructions for different CPU types or capabilities"
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 76fb00921203..d6ac32f3f650 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2321,13 +2321,14 @@ EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
{
struct kvm_lapic *apic = vcpu->arch.apic;
+ u64 msr_val;
int i;
if (!init_event) {
- vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE |
- MSR_IA32_APICBASE_ENABLE;
+ msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
if (kvm_vcpu_is_reset_bsp(vcpu))
- vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
+ msr_val |= MSR_IA32_APICBASE_BSP;
+ kvm_lapic_set_base(vcpu, msr_val);
}
if (!apic)
@@ -2336,11 +2337,9 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
/* Stop the timer in case it's a reset to an active apic */
hrtimer_cancel(&apic->lapic_timer.timer);
- if (!init_event) {
- apic->base_address = APIC_DEFAULT_PHYS_BASE;
-
+ /* The xAPIC ID is set at RESET even if the APIC was already enabled. */
+ if (!init_event)
kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
- }
kvm_apic_set_version(apic->vcpu);
for (i = 0; i < KVM_APIC_LVT_NUM; i++)
@@ -2481,6 +2480,11 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
lapic_timer_advance_dynamic = false;
}
+ /*
+ * Stuff the APIC ENABLE bit in lieu of temporarily incrementing
+ * apic_hw_disabled; the full RESET value is set by kvm_lapic_reset().
+ */
+ vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
@@ -2942,5 +2946,7 @@ int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
void kvm_lapic_exit(void)
{
static_key_deferred_flush(&apic_hw_disabled);
+ WARN_ON(static_branch_unlikely(&apic_hw_disabled.key));
static_key_deferred_flush(&apic_sw_disabled);
+ WARN_ON(static_branch_unlikely(&apic_sw_disabled.key));
}
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 1a64ba5b9437..0cc58901bf7a 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4596,10 +4596,10 @@ static void update_pkru_bitmask(struct kvm_mmu *mmu)
unsigned bit;
bool wp;
- if (!is_cr4_pke(mmu)) {
- mmu->pkru_mask = 0;
+ mmu->pkru_mask = 0;
+
+ if (!is_cr4_pke(mmu))
return;
- }
wp = is_cr0_wp(mmu);
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index c36b5fe4c27c..5847b05d29da 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -17,10 +17,10 @@
#include <linux/misc_cgroup.h>
#include <linux/processor.h>
#include <linux/trace_events.h>
-#include <asm/fpu/internal.h>
#include <asm/pkru.h>
#include <asm/trapnr.h>
+#include <asm/fpu/xcr.h>
#include "x86.h"
#include "svm.h"
@@ -618,7 +618,12 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
vmsa.handle = to_kvm_svm(kvm)->sev_info.handle;
vmsa.address = __sme_pa(svm->vmsa);
vmsa.len = PAGE_SIZE;
- return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
+ ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
+ if (ret)
+ return ret;
+
+ vcpu->arch.guest_state_protected = true;
+ return 0;
}
static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
@@ -1479,6 +1484,13 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
goto e_free_trans;
}
+ /*
+ * Flush (on non-coherent CPUs) before RECEIVE_UPDATE_DATA, the PSP
+ * encrypts the written data with the guest's key, and the cache may
+ * contain dirty, unencrypted data.
+ */
+ sev_clflush_pages(guest_page, n);
+
/* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */
data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
data.guest_address |= sev_me_mask;
@@ -2579,11 +2591,20 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
{
- if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2))
+ int count;
+ int bytes;
+
+ if (svm->vmcb->control.exit_info_2 > INT_MAX)
+ return -EINVAL;
+
+ count = svm->vmcb->control.exit_info_2;
+ if (unlikely(check_mul_overflow(count, size, &bytes)))
+ return -EINVAL;
+
+ if (!setup_vmgexit_scratch(svm, in, bytes))
return -EINVAL;
- return kvm_sev_es_string_io(&svm->vcpu, size, port,
- svm->ghcb_sa, svm->ghcb_sa_len, in);
+ return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in);
}
void sev_es_init_vmcb(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 989685098b3e..226482daa6eb 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -25,6 +25,7 @@
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/rwsem.h>
+#include <linux/cc_platform.h>
#include <asm/apic.h>
#include <asm/perf_event.h>
@@ -36,6 +37,7 @@
#include <asm/spec-ctrl.h>
#include <asm/cpu_device_id.h>
#include <asm/traps.h>
+#include <asm/fpu/api.h>
#include <asm/virtext.h>
#include "trace.h"
@@ -455,7 +457,7 @@ static int has_svm(void)
return 0;
}
- if (sev_active()) {
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
pr_info("KVM is unsupported when running as an SEV guest\n");
return 0;
}
@@ -1346,10 +1348,10 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
/*
* SEV-ES guests maintain an encrypted version of their FPU
* state which is restored and saved on VMRUN and VMEXIT.
- * Free the fpu structure to prevent KVM from attempting to
- * access the FPU state.
+ * Mark vcpu->arch.guest_fpu->fpstate as scratch so it won't
+ * do xsave/xrstor on it.
*/
- kvm_free_guest_fpu(vcpu);
+ fpstate_set_confidential(&vcpu->arch.guest_fpu);
}
err = avic_init_vcpu(svm);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 128a54b1fbf1..e63ac08115cf 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -191,7 +191,7 @@ struct vcpu_svm {
/* SEV-ES scratch area support */
void *ghcb_sa;
- u64 ghcb_sa_len;
+ u32 ghcb_sa_len;
bool ghcb_sa_sync;
bool ghcb_sa_free;
@@ -218,12 +218,12 @@ DECLARE_PER_CPU(struct svm_cpu_data *, svm_data);
void recalc_intercepts(struct vcpu_svm *svm);
-static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
+static __always_inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
{
return container_of(kvm, struct kvm_svm, kvm);
}
-static inline bool sev_guest(struct kvm *kvm)
+static __always_inline bool sev_guest(struct kvm *kvm)
{
#ifdef CONFIG_KVM_AMD_SEV
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
@@ -234,7 +234,7 @@ static inline bool sev_guest(struct kvm *kvm)
#endif
}
-static inline bool sev_es_guest(struct kvm *kvm)
+static __always_inline bool sev_es_guest(struct kvm *kvm)
{
#ifdef CONFIG_KVM_AMD_SEV
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
@@ -271,7 +271,7 @@ static inline bool vmcb_is_dirty(struct vmcb *vmcb, int bit)
return !test_bit(bit, (unsigned long *)&vmcb->control.clean);
}
-static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
+static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
{
return container_of(vcpu, struct vcpu_svm, vcpu);
}
diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h
index 22e2b019de37..9430d6437c9f 100644
--- a/arch/x86/kvm/svm/svm_ops.h
+++ b/arch/x86/kvm/svm/svm_ops.h
@@ -56,12 +56,12 @@ static inline void invlpga(unsigned long addr, u32 asid)
* VMSAVE, VMLOAD, etc... is still controlled by the effective address size,
* hence 'unsigned long' instead of 'hpa_t'.
*/
-static inline void vmsave(unsigned long pa)
+static __always_inline void vmsave(unsigned long pa)
{
svm_asm1(vmsave, "a" (pa), "memory");
}
-static inline void vmload(unsigned long pa)
+static __always_inline void vmload(unsigned long pa)
{
svm_asm1(vmload, "a" (pa), "memory");
}
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index 152ab0aa82cf..16731d2cf231 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -93,7 +93,7 @@ static __always_inline int get_evmcs_offset(unsigned long field,
return evmcs_field->offset;
}
-static inline void evmcs_write64(unsigned long field, u64 value)
+static __always_inline void evmcs_write64(unsigned long field, u64 value)
{
u16 clean_field;
int offset = get_evmcs_offset(field, &clean_field);
@@ -183,7 +183,7 @@ static inline void evmcs_load(u64 phys_addr)
__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf);
#else /* !IS_ENABLED(CONFIG_HYPERV) */
-static inline void evmcs_write64(unsigned long field, u64 value) {}
+static __always_inline void evmcs_write64(unsigned long field, u64 value) {}
static inline void evmcs_write32(unsigned long field, u32 value) {}
static inline void evmcs_write16(unsigned long field, u16 value) {}
static inline u64 evmcs_read64(unsigned long field) { return 0; }
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 116b08904ac3..fb9e4ac3df22 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -35,7 +35,7 @@
#include <asm/cpu_device_id.h>
#include <asm/debugreg.h>
#include <asm/desc.h>
-#include <asm/fpu/internal.h>
+#include <asm/fpu/api.h>
#include <asm/idtentry.h>
#include <asm/io.h>
#include <asm/irq_remapping.h>
@@ -5562,9 +5562,13 @@ static int handle_encls(struct kvm_vcpu *vcpu)
static int handle_bus_lock_vmexit(struct kvm_vcpu *vcpu)
{
- vcpu->run->exit_reason = KVM_EXIT_X86_BUS_LOCK;
- vcpu->run->flags |= KVM_RUN_X86_BUS_LOCK;
- return 0;
+ /*
+ * Hardware may or may not set the BUS_LOCK_DETECTED flag on BUS_LOCK
+ * VM-Exits. Unconditionally set the flag here and leave the handling to
+ * vmx_handle_exit().
+ */
+ to_vmx(vcpu)->exit_reason.bus_lock_detected = true;
+ return 1;
}
/*
@@ -6051,9 +6055,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
int ret = __vmx_handle_exit(vcpu, exit_fastpath);
/*
- * Even when current exit reason is handled by KVM internally, we
- * still need to exit to user space when bus lock detected to inform
- * that there is a bus lock in guest.
+ * Exit to user space when bus lock detected to inform that there is
+ * a bus lock in guest.
*/
if (to_vmx(vcpu)->exit_reason.bus_lock_detected) {
if (ret > 0)
@@ -6302,18 +6305,13 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
/*
* If we are running L2 and L1 has a new pending interrupt
- * which can be injected, we should re-evaluate
- * what should be done with this new L1 interrupt.
- * If L1 intercepts external-interrupts, we should
- * exit from L2 to L1. Otherwise, interrupt should be
- * delivered directly to L2.
+ * which can be injected, this may cause a vmexit or it may
+ * be injected into L2. Either way, this interrupt will be
+ * processed via KVM_REQ_EVENT, not RVI, because we do not use
+ * virtual interrupt delivery to inject L1 interrupts into L2.
*/
- if (is_guest_mode(vcpu) && max_irr_updated) {
- if (nested_exit_on_intr(vcpu))
- kvm_vcpu_exiting_guest_mode(vcpu);
- else
- kvm_make_request(KVM_REQ_EVENT, vcpu);
- }
+ if (is_guest_mode(vcpu) && max_irr_updated)
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
} else {
max_irr = kvm_lapic_find_highest_irr(vcpu);
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index aabd3a2ec1bc..2686f2edb47c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -68,7 +68,9 @@
#include <asm/mce.h>
#include <asm/pkru.h>
#include <linux/kernel_stat.h>
-#include <asm/fpu/internal.h> /* Ugh! */
+#include <asm/fpu/api.h>
+#include <asm/fpu/xcr.h>
+#include <asm/fpu/xstate.h>
#include <asm/pvclock.h>
#include <asm/div64.h>
#include <asm/irq_remapping.h>
@@ -293,8 +295,6 @@ u64 __read_mostly host_xcr0;
u64 __read_mostly supported_xcr0;
EXPORT_SYMBOL_GPL(supported_xcr0);
-static struct kmem_cache *x86_fpu_cache;
-
static struct kmem_cache *x86_emulator_cache;
/*
@@ -2542,7 +2542,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
kvm_vcpu_write_tsc_offset(vcpu, offset);
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
- spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
+ raw_spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
if (!matched) {
kvm->arch.nr_vcpus_matched_tsc = 0;
} else if (!already_matched) {
@@ -2550,7 +2550,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
}
kvm_track_tsc_matching(vcpu);
- spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
+ raw_spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
}
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
@@ -2780,9 +2780,9 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
kvm_make_mclock_inprogress_request(kvm);
/* no guest entries from this point */
- spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+ raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
pvclock_update_vm_gtod_copy(kvm);
- spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+ raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -2800,15 +2800,15 @@ u64 get_kvmclock_ns(struct kvm *kvm)
unsigned long flags;
u64 ret;
- spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+ raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
if (!ka->use_master_clock) {
- spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+ raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
return get_kvmclock_base_ns() + ka->kvmclock_offset;
}
hv_clock.tsc_timestamp = ka->master_cycle_now;
hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
- spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+ raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
/* both __this_cpu_read() and rdtsc() should be on the same cpu */
get_cpu();
@@ -2902,13 +2902,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
* If the host uses TSC clock, then passthrough TSC as stable
* to the guest.
*/
- spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+ raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
use_master_clock = ka->use_master_clock;
if (use_master_clock) {
host_tsc = ka->master_cycle_now;
kernel_ns = ka->master_kernel_ns;
}
- spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+ raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
/* Keep irq disabled to prevent changes to the clock */
local_irq_save(flags);
@@ -4700,144 +4700,27 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
return 0;
}
-#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
-
-static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
-{
- struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
- u64 xstate_bv = xsave->header.xfeatures;
- u64 valid;
-
- /*
- * Copy legacy XSAVE area, to avoid complications with CPUID
- * leaves 0 and 1 in the loop below.
- */
- memcpy(dest, xsave, XSAVE_HDR_OFFSET);
-
- /* Set XSTATE_BV */
- xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE;
- *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
-
- /*
- * Copy each region from the possibly compacted offset to the
- * non-compacted offset.
- */
- valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
- while (valid) {
- u32 size, offset, ecx, edx;
- u64 xfeature_mask = valid & -valid;
- int xfeature_nr = fls64(xfeature_mask) - 1;
- void *src;
-
- cpuid_count(XSTATE_CPUID, xfeature_nr,
- &size, &offset, &ecx, &edx);
-
- if (xfeature_nr == XFEATURE_PKRU) {
- memcpy(dest + offset, &vcpu->arch.pkru,
- sizeof(vcpu->arch.pkru));
- } else {
- src = get_xsave_addr(xsave, xfeature_nr);
- if (src)
- memcpy(dest + offset, src, size);
- }
-
- valid -= xfeature_mask;
- }
-}
-
-static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
-{
- struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
- u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
- u64 valid;
-
- /*
- * Copy legacy XSAVE area, to avoid complications with CPUID
- * leaves 0 and 1 in the loop below.
- */
- memcpy(xsave, src, XSAVE_HDR_OFFSET);
-
- /* Set XSTATE_BV and possibly XCOMP_BV. */
- xsave->header.xfeatures = xstate_bv;
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
-
- /*
- * Copy each region from the non-compacted offset to the
- * possibly compacted offset.
- */
- valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
- while (valid) {
- u32 size, offset, ecx, edx;
- u64 xfeature_mask = valid & -valid;
- int xfeature_nr = fls64(xfeature_mask) - 1;
-
- cpuid_count(XSTATE_CPUID, xfeature_nr,
- &size, &offset, &ecx, &edx);
-
- if (xfeature_nr == XFEATURE_PKRU) {
- memcpy(&vcpu->arch.pkru, src + offset,
- sizeof(vcpu->arch.pkru));
- } else {
- void *dest = get_xsave_addr(xsave, xfeature_nr);
-
- if (dest)
- memcpy(dest, src + offset, size);
- }
-
- valid -= xfeature_mask;
- }
-}
-
static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
- if (!vcpu->arch.guest_fpu)
+ if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return;
- if (boot_cpu_has(X86_FEATURE_XSAVE)) {
- memset(guest_xsave, 0, sizeof(struct kvm_xsave));
- fill_xsave((u8 *) guest_xsave->region, vcpu);
- } else {
- memcpy(guest_xsave->region,
- &vcpu->arch.guest_fpu->state.fxsave,
- sizeof(struct fxregs_state));
- *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
- XFEATURE_MASK_FPSSE;
- }
+ fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu,
+ guest_xsave->region,
+ sizeof(guest_xsave->region),
+ vcpu->arch.pkru);
}
-#define XSAVE_MXCSR_OFFSET 24
-
static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
struct kvm_xsave *guest_xsave)
{
- u64 xstate_bv;
- u32 mxcsr;
-
- if (!vcpu->arch.guest_fpu)
+ if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return 0;
- xstate_bv = *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
- mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
-
- if (boot_cpu_has(X86_FEATURE_XSAVE)) {
- /*
- * Here we allow setting states that are not present in
- * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility
- * with old userspace.
- */
- if (xstate_bv & ~supported_xcr0 || mxcsr & ~mxcsr_feature_mask)
- return -EINVAL;
- load_xsave(vcpu, (u8 *)guest_xsave->region);
- } else {
- if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
- mxcsr & ~mxcsr_feature_mask)
- return -EINVAL;
- memcpy(&vcpu->arch.guest_fpu->state.fxsave,
- guest_xsave->region, sizeof(struct fxregs_state));
- }
- return 0;
+ return fpu_copy_uabi_to_guest_fpstate(&vcpu->arch.guest_fpu,
+ guest_xsave->region,
+ supported_xcr0, &vcpu->arch.pkru);
}
static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
@@ -6100,13 +5983,13 @@ set_pit2_out:
* is slightly ahead) here we risk going negative on unsigned
* 'system_time' when 'user_ns.clock' is very small.
*/
- spin_lock_irq(&ka->pvclock_gtod_sync_lock);
+ raw_spin_lock_irq(&ka->pvclock_gtod_sync_lock);
if (kvm->arch.use_master_clock)
now_ns = ka->master_kernel_ns;
else
now_ns = get_kvmclock_base_ns();
ka->kvmclock_offset = user_ns.clock - now_ns;
- spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
+ raw_spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
break;
@@ -6906,7 +6789,7 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
}
static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
- unsigned short port, void *val,
+ unsigned short port,
unsigned int count, bool in)
{
vcpu->arch.pio.port = port;
@@ -6914,10 +6797,8 @@ static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
vcpu->arch.pio.count = count;
vcpu->arch.pio.size = size;
- if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
- vcpu->arch.pio.count = 0;
+ if (!kernel_pio(vcpu, vcpu->arch.pio_data))
return 1;
- }
vcpu->run->exit_reason = KVM_EXIT_IO;
vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
@@ -6929,26 +6810,39 @@ static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
return 0;
}
-static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
- unsigned short port, void *val, unsigned int count)
+static int __emulator_pio_in(struct kvm_vcpu *vcpu, int size,
+ unsigned short port, unsigned int count)
{
- int ret;
+ WARN_ON(vcpu->arch.pio.count);
+ memset(vcpu->arch.pio_data, 0, size * count);
+ return emulator_pio_in_out(vcpu, size, port, count, true);
+}
- if (vcpu->arch.pio.count)
- goto data_avail;
+static void complete_emulator_pio_in(struct kvm_vcpu *vcpu, void *val)
+{
+ int size = vcpu->arch.pio.size;
+ unsigned count = vcpu->arch.pio.count;
+ memcpy(val, vcpu->arch.pio_data, size * count);
+ trace_kvm_pio(KVM_PIO_IN, vcpu->arch.pio.port, size, count, vcpu->arch.pio_data);
+ vcpu->arch.pio.count = 0;
+}
- memset(vcpu->arch.pio_data, 0, size * count);
+static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
+ unsigned short port, void *val, unsigned int count)
+{
+ if (vcpu->arch.pio.count) {
+ /* Complete previous iteration. */
+ } else {
+ int r = __emulator_pio_in(vcpu, size, port, count);
+ if (!r)
+ return r;
- ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
- if (ret) {
-data_avail:
- memcpy(val, vcpu->arch.pio_data, size * count);
- trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data);
- vcpu->arch.pio.count = 0;
- return 1;
+ /* Results already available, fall through. */
}
- return 0;
+ WARN_ON(count != vcpu->arch.pio.count);
+ complete_emulator_pio_in(vcpu, val);
+ return 1;
}
static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
@@ -6963,9 +6857,15 @@ static int emulator_pio_out(struct kvm_vcpu *vcpu, int size,
unsigned short port, const void *val,
unsigned int count)
{
+ int ret;
+
memcpy(vcpu->arch.pio_data, val, size * count);
trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
- return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
+ ret = emulator_pio_in_out(vcpu, size, port, count, false);
+ if (ret)
+ vcpu->arch.pio.count = 0;
+
+ return ret;
}
static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
@@ -8139,9 +8039,9 @@ static void kvm_hyperv_tsc_notifier(void)
list_for_each_entry(kvm, &vm_list, vm_list) {
struct kvm_arch *ka = &kvm->arch;
- spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
+ raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
pvclock_update_vm_gtod_copy(kvm);
- spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
+ raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
kvm_for_each_vcpu(cpu, vcpu, kvm)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -8417,18 +8317,11 @@ int kvm_arch_init(void *opaque)
}
r = -ENOMEM;
- x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu),
- __alignof__(struct fpu), SLAB_ACCOUNT,
- NULL);
- if (!x86_fpu_cache) {
- printk(KERN_ERR "kvm: failed to allocate cache for x86 fpu\n");
- goto out;
- }
x86_emulator_cache = kvm_alloc_emulator_cache();
if (!x86_emulator_cache) {
pr_err("kvm: failed to allocate cache for x86 emulator\n");
- goto out_free_x86_fpu_cache;
+ goto out;
}
user_return_msrs = alloc_percpu(struct kvm_user_return_msrs);
@@ -8466,8 +8359,6 @@ out_free_percpu:
free_percpu(user_return_msrs);
out_free_x86_emulator_cache:
kmem_cache_destroy(x86_emulator_cache);
-out_free_x86_fpu_cache:
- kmem_cache_destroy(x86_fpu_cache);
out:
return r;
}
@@ -8494,7 +8385,6 @@ void kvm_arch_exit(void)
kvm_mmu_module_exit();
free_percpu(user_return_msrs);
kmem_cache_destroy(x86_emulator_cache);
- kmem_cache_destroy(x86_fpu_cache);
#ifdef CONFIG_KVM_XEN
static_key_deferred_flush(&kvm_xen_enabled);
WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
@@ -8783,9 +8673,17 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
+
+ /*
+ * The call to kvm_ready_for_interrupt_injection() may end up in
+ * kvm_xen_has_interrupt() which may require the srcu lock to be
+ * held, to protect against changes in the vcpu_info address.
+ */
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
kvm_run->ready_for_interrupt_injection =
pic_in_kernel(vcpu->kvm) ||
kvm_vcpu_ready_for_interrupt_injection(vcpu);
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (is_smm(vcpu))
kvm_run->flags |= KVM_RUN_X86_SMM;
@@ -9643,14 +9541,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
break;
- if (unlikely(kvm_vcpu_exit_request(vcpu))) {
+ if (vcpu->arch.apicv_active)
+ static_call(kvm_x86_sync_pir_to_irr)(vcpu);
+
+ if (unlikely(kvm_vcpu_exit_request(vcpu))) {
exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
break;
}
-
- if (vcpu->arch.apicv_active)
- static_call(kvm_x86_sync_pir_to_irr)(vcpu);
- }
+ }
/*
* Do this here before restoring debug registers on the host. And
@@ -9913,58 +9811,21 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
return 0;
}
-static void kvm_save_current_fpu(struct fpu *fpu)
-{
- /*
- * If the target FPU state is not resident in the CPU registers, just
- * memcpy() from current, else save CPU state directly to the target.
- */
- if (test_thread_flag(TIF_NEED_FPU_LOAD))
- memcpy(&fpu->state, &current->thread.fpu.state,
- fpu_kernel_xstate_size);
- else
- save_fpregs_to_fpstate(fpu);
-}
-
/* Swap (qemu) user FPU context for the guest FPU context. */
static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
- fpregs_lock();
-
- kvm_save_current_fpu(vcpu->arch.user_fpu);
-
/*
- * Guests with protected state can't have it set by the hypervisor,
- * so skip trying to set it.
+ * Exclude PKRU from restore as restored separately in
+ * kvm_x86_ops.run().
*/
- if (vcpu->arch.guest_fpu)
- /* PKRU is separately restored in kvm_x86_ops.run. */
- __restore_fpregs_from_fpstate(&vcpu->arch.guest_fpu->state,
- ~XFEATURE_MASK_PKRU);
-
- fpregs_mark_activate();
- fpregs_unlock();
-
+ fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, true);
trace_kvm_fpu(1);
}
/* When vcpu_run ends, restore user space FPU context. */
static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
- fpregs_lock();
-
- /*
- * Guests with protected state can't have it read by the hypervisor,
- * so skip trying to save it.
- */
- if (vcpu->arch.guest_fpu)
- kvm_save_current_fpu(vcpu->arch.guest_fpu);
-
- restore_fpregs_from_fpstate(&vcpu->arch.user_fpu->state);
-
- fpregs_mark_activate();
- fpregs_unlock();
-
+ fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, false);
++vcpu->stat.fpu_reload;
trace_kvm_fpu(0);
}
@@ -10545,12 +10406,12 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
struct fxregs_state *fxsave;
- if (!vcpu->arch.guest_fpu)
+ if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return 0;
vcpu_load(vcpu);
- fxsave = &vcpu->arch.guest_fpu->state.fxsave;
+ fxsave = &vcpu->arch.guest_fpu.fpstate->regs.fxsave;
memcpy(fpu->fpr, fxsave->st_space, 128);
fpu->fcw = fxsave->cwd;
fpu->fsw = fxsave->swd;
@@ -10568,12 +10429,12 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
struct fxregs_state *fxsave;
- if (!vcpu->arch.guest_fpu)
+ if (fpstate_is_confidential(&vcpu->arch.guest_fpu))
return 0;
vcpu_load(vcpu);
- fxsave = &vcpu->arch.guest_fpu->state.fxsave;
+ fxsave = &vcpu->arch.guest_fpu.fpstate->regs.fxsave;
memcpy(fxsave->st_space, fpu->fpr, 128);
fxsave->cwd = fpu->fcw;
@@ -10626,14 +10487,6 @@ static int sync_regs(struct kvm_vcpu *vcpu)
static void fx_init(struct kvm_vcpu *vcpu)
{
- if (!vcpu->arch.guest_fpu)
- return;
-
- fpstate_init(&vcpu->arch.guest_fpu->state);
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
- host_xcr0 | XSTATE_COMPACTION_ENABLED;
-
/*
* Ensure guest xcr0 is valid for loading
*/
@@ -10642,15 +10495,6 @@ static void fx_init(struct kvm_vcpu *vcpu)
vcpu->arch.cr0 |= X86_CR0_ET;
}
-void kvm_free_guest_fpu(struct kvm_vcpu *vcpu)
-{
- if (vcpu->arch.guest_fpu) {
- kmem_cache_free(x86_fpu_cache, vcpu->arch.guest_fpu);
- vcpu->arch.guest_fpu = NULL;
- }
-}
-EXPORT_SYMBOL_GPL(kvm_free_guest_fpu);
-
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
{
if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
@@ -10707,19 +10551,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (!alloc_emulate_ctxt(vcpu))
goto free_wbinvd_dirty_mask;
- vcpu->arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
- GFP_KERNEL_ACCOUNT);
- if (!vcpu->arch.user_fpu) {
- pr_err("kvm: failed to allocate userspace's fpu\n");
+ if (!fpu_alloc_guest_fpstate(&vcpu->arch.guest_fpu)) {
+ pr_err("kvm: failed to allocate vcpu's fpu\n");
goto free_emulate_ctxt;
}
- vcpu->arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
- GFP_KERNEL_ACCOUNT);
- if (!vcpu->arch.guest_fpu) {
- pr_err("kvm: failed to allocate vcpu's fpu\n");
- goto free_user_fpu;
- }
fx_init(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
@@ -10752,9 +10588,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
return 0;
free_guest_fpu:
- kvm_free_guest_fpu(vcpu);
-free_user_fpu:
- kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
+ fpu_free_guest_fpstate(&vcpu->arch.guest_fpu);
free_emulate_ctxt:
kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
free_wbinvd_dirty_mask:
@@ -10803,8 +10637,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
- kmem_cache_free(x86_fpu_cache, vcpu->arch.user_fpu);
- kvm_free_guest_fpu(vcpu);
+ fpu_free_guest_fpstate(&vcpu->arch.guest_fpu);
kvm_hv_vcpu_uninit(vcpu);
kvm_pmu_destroy(vcpu);
@@ -10856,8 +10689,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_async_pf_hash_reset(vcpu);
vcpu->arch.apf.halted = false;
- if (vcpu->arch.guest_fpu && kvm_mpx_supported()) {
- void *mpx_state_buffer;
+ if (vcpu->arch.guest_fpu.fpstate && kvm_mpx_supported()) {
+ struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate;
/*
* To avoid have the INIT path from kvm_apic_has_events() that be
@@ -10865,14 +10698,10 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
*/
if (init_event)
kvm_put_guest_fpu(vcpu);
- mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
- XFEATURE_BNDREGS);
- if (mpx_state_buffer)
- memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
- mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
- XFEATURE_BNDCSR);
- if (mpx_state_buffer)
- memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
+
+ fpstate_clear_xstate_component(fpstate, XFEATURE_BNDREGS);
+ fpstate_clear_xstate_component(fpstate, XFEATURE_BNDCSR);
+
if (init_event)
kvm_load_guest_fpu(vcpu);
}
@@ -11182,7 +11011,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
mutex_init(&kvm->arch.apic_map_lock);
- spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
+ raw_spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
pvclock_update_vm_gtod_copy(kvm);
@@ -11392,7 +11221,8 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot,
int level = i + 1;
int lpages = __kvm_mmu_slot_lpages(slot, npages, level);
- WARN_ON(slot->arch.rmap[i]);
+ if (slot->arch.rmap[i])
+ continue;
slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT);
if (!slot->arch.rmap[i]) {
@@ -12367,44 +12197,81 @@ int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
}
EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read);
-static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
+static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
+ unsigned int port);
+
+static int complete_sev_es_emulated_outs(struct kvm_vcpu *vcpu)
{
- memcpy(vcpu->arch.guest_ins_data, vcpu->arch.pio_data,
- vcpu->arch.pio.count * vcpu->arch.pio.size);
- vcpu->arch.pio.count = 0;
+ int size = vcpu->arch.pio.size;
+ int port = vcpu->arch.pio.port;
+ vcpu->arch.pio.count = 0;
+ if (vcpu->arch.sev_pio_count)
+ return kvm_sev_es_outs(vcpu, size, port);
return 1;
}
static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size,
- unsigned int port, void *data, unsigned int count)
+ unsigned int port)
{
- int ret;
-
- ret = emulator_pio_out_emulated(vcpu->arch.emulate_ctxt, size, port,
- data, count);
- if (ret)
- return ret;
+ for (;;) {
+ unsigned int count =
+ min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count);
+ int ret = emulator_pio_out(vcpu, size, port, vcpu->arch.sev_pio_data, count);
+
+ /* memcpy done already by emulator_pio_out. */
+ vcpu->arch.sev_pio_count -= count;
+ vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size;
+ if (!ret)
+ break;
- vcpu->arch.pio.count = 0;
+ /* Emulation done by the kernel. */
+ if (!vcpu->arch.sev_pio_count)
+ return 1;
+ }
+ vcpu->arch.complete_userspace_io = complete_sev_es_emulated_outs;
return 0;
}
static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
- unsigned int port, void *data, unsigned int count)
+ unsigned int port);
+
+static void advance_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
{
- int ret;
+ unsigned count = vcpu->arch.pio.count;
+ complete_emulator_pio_in(vcpu, vcpu->arch.sev_pio_data);
+ vcpu->arch.sev_pio_count -= count;
+ vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size;
+}
- ret = emulator_pio_in_emulated(vcpu->arch.emulate_ctxt, size, port,
- data, count);
- if (ret) {
- vcpu->arch.pio.count = 0;
- } else {
- vcpu->arch.guest_ins_data = data;
- vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins;
+static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu)
+{
+ int size = vcpu->arch.pio.size;
+ int port = vcpu->arch.pio.port;
+
+ advance_sev_es_emulated_ins(vcpu);
+ if (vcpu->arch.sev_pio_count)
+ return kvm_sev_es_ins(vcpu, size, port);
+ return 1;
+}
+
+static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size,
+ unsigned int port)
+{
+ for (;;) {
+ unsigned int count =
+ min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count);
+ if (!__emulator_pio_in(vcpu, size, port, count))
+ break;
+
+ /* Emulation done by the kernel. */
+ advance_sev_es_emulated_ins(vcpu);
+ if (!vcpu->arch.sev_pio_count)
+ return 1;
}
+ vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins;
return 0;
}
@@ -12412,8 +12279,10 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
unsigned int port, void *data, unsigned int count,
int in)
{
- return in ? kvm_sev_es_ins(vcpu, size, port, data, count)
- : kvm_sev_es_outs(vcpu, size, port, data, count);
+ vcpu->arch.sev_pio_data = data;
+ vcpu->arch.sev_pio_count = count;
+ return in ? kvm_sev_es_ins(vcpu, size, port)
+ : kvm_sev_es_outs(vcpu, size, port);
}
EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 9ea9c3dabe37..8f62baebd028 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -190,6 +190,7 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
{
+ int err;
u8 rc = 0;
/*
@@ -216,13 +217,29 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
if (likely(slots->generation == ghc->generation &&
!kvm_is_error_hva(ghc->hva) && ghc->memslot)) {
/* Fast path */
- __get_user(rc, (u8 __user *)ghc->hva + offset);
- } else {
- /* Slow path */
- kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset,
- sizeof(rc));
+ pagefault_disable();
+ err = __get_user(rc, (u8 __user *)ghc->hva + offset);
+ pagefault_enable();
+ if (!err)
+ return rc;
}
+ /* Slow path */
+
+ /*
+ * This function gets called from kvm_vcpu_block() after setting the
+ * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately
+ * from a HLT. So we really mustn't sleep. If the page ended up absent
+ * at that point, just return 1 in order to trigger an immediate wake,
+ * and we'll end up getting called again from a context where we *can*
+ * fault in the page and wait for it.
+ */
+ if (in_atomic() || !task_is_running(current))
+ return 1;
+
+ kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset,
+ sizeof(rc));
+
return rc;
}
diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S
index e5f77e293034..7334055157ba 100644
--- a/arch/x86/lib/copy_mc_64.S
+++ b/arch/x86/lib/copy_mc_64.S
@@ -107,9 +107,9 @@ SYM_FUNC_END(copy_mc_fragile)
.previous
- _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
- _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
- _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
+ _ASM_EXTABLE_TYPE(.L_read_leading_bytes, .E_leading_bytes, EX_TYPE_DEFAULT_MCE_SAFE)
+ _ASM_EXTABLE_TYPE(.L_read_words, .E_read_words, EX_TYPE_DEFAULT_MCE_SAFE)
+ _ASM_EXTABLE_TYPE(.L_read_trailing_bytes, .E_trailing_bytes, EX_TYPE_DEFAULT_MCE_SAFE)
_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
_ASM_EXTABLE(.L_write_words, .E_write_words)
_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
@@ -149,5 +149,5 @@ SYM_FUNC_END(copy_mc_enhanced_fast_string)
.previous
- _ASM_EXTABLE_FAULT(.L_copy, .E_copy)
+ _ASM_EXTABLE_TYPE(.L_copy, .E_copy, EX_TYPE_DEFAULT_MCE_SAFE)
#endif /* !CONFIG_UML */
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 57b79c577496..2797e630b9b1 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -234,24 +234,11 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
*/
SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
movl %edx,%ecx
- cmp $X86_TRAP_MC,%eax /* check if X86_TRAP_MC */
- je 3f
1: rep movsb
2: mov %ecx,%eax
ASM_CLAC
ret
- /*
- * Return zero to pretend that this copy succeeded. This
- * is counter-intuitive, but needed to prevent the code
- * in lib/iov_iter.c from retrying and running back into
- * the poison cache line again. The machine check handler
- * will ensure that a SIGBUS is sent to the task.
- */
-3: xorl %eax,%eax
- ASM_CLAC
- ret
-
_ASM_EXTABLE_CPY(1b, 2b)
SYM_CODE_END(.Lcopy_user_handle_tail)
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index c565def611e2..55e371cc69fd 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -13,6 +13,7 @@
#endif
#include <asm/inat.h> /*__ignore_sync_check__ */
#include <asm/insn.h> /* __ignore_sync_check__ */
+#include <asm/unaligned.h> /* __ignore_sync_check__ */
#include <linux/errno.h>
#include <linux/kconfig.h>
@@ -37,10 +38,10 @@
((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
#define __get_next(t, insn) \
- ({ t r; memcpy(&r, insn->next_byte, sizeof(t)); insn->next_byte += sizeof(t); leXX_to_cpu(t, r); })
+ ({ t r = get_unaligned((t *)(insn)->next_byte); (insn)->next_byte += sizeof(t); leXX_to_cpu(t, r); })
#define __peek_nbyte_next(t, insn, n) \
- ({ t r; memcpy(&r, (insn)->next_byte + n, sizeof(t)); leXX_to_cpu(t, r); })
+ ({ t r = get_unaligned((t *)(insn)->next_byte + n); leXX_to_cpu(t, r); })
#define get_next(t, insn) \
({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index ec9922cba30a..cf0b39f97adc 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -28,46 +28,14 @@
.macro THUNK reg
- .align 32
-
-SYM_FUNC_START(__x86_indirect_thunk_\reg)
+ .align RETPOLINE_THUNK_SIZE
+SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
+ UNWIND_HINT_EMPTY
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
__stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
-SYM_FUNC_END(__x86_indirect_thunk_\reg)
-
-.endm
-
-/*
- * This generates .altinstr_replacement symbols for use by objtool. They,
- * however, must not actually live in .altinstr_replacement since that will be
- * discarded after init, but module alternatives will also reference these
- * symbols.
- *
- * Their names matches the "__x86_indirect_" prefix to mark them as retpolines.
- */
-.macro ALT_THUNK reg
-
- .align 1
-
-SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg)
- ANNOTATE_RETPOLINE_SAFE
-1: call *%\reg
-2: .skip 5-(2b-1b), 0x90
-SYM_FUNC_END(__x86_indirect_alt_call_\reg)
-
-STACK_FRAME_NON_STANDARD(__x86_indirect_alt_call_\reg)
-
-SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg)
- ANNOTATE_RETPOLINE_SAFE
-1: jmp *%\reg
-2: .skip 5-(2b-1b), 0x90
-SYM_FUNC_END(__x86_indirect_alt_jmp_\reg)
-
-STACK_FRAME_NON_STANDARD(__x86_indirect_alt_jmp_\reg)
-
.endm
/*
@@ -85,22 +53,16 @@ STACK_FRAME_NON_STANDARD(__x86_indirect_alt_jmp_\reg)
#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
-#undef GEN
+ .align RETPOLINE_THUNK_SIZE
+SYM_CODE_START(__x86_indirect_thunk_array)
+
#define GEN(reg) THUNK reg
#include <asm/GEN-for-each-reg.h>
-
#undef GEN
-#define GEN(reg) EXPORT_THUNK(reg)
-#include <asm/GEN-for-each-reg.h>
-#undef GEN
-#define GEN(reg) ALT_THUNK reg
-#include <asm/GEN-for-each-reg.h>
+ .align RETPOLINE_THUNK_SIZE
+SYM_CODE_END(__x86_indirect_thunk_array)
-#undef GEN
-#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg)
+#define GEN(reg) EXPORT_THUNK(reg)
#include <asm/GEN-for-each-reg.h>
-
#undef GEN
-#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg)
-#include <asm/GEN-for-each-reg.h>
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c
index 034748459482..d62662bdd460 100644
--- a/arch/x86/math-emu/fpu_aux.c
+++ b/arch/x86/math-emu/fpu_aux.c
@@ -53,7 +53,7 @@ void fpstate_init_soft(struct swregs_state *soft)
void finit(void)
{
- fpstate_init_soft(&current->thread.fpu.state.soft);
+ fpstate_init_soft(&current->thread.fpu.fpstate->regs.soft);
}
/*
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 8679a9d6c47f..7fe56c594aa6 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -31,7 +31,7 @@
#include <linux/uaccess.h>
#include <asm/traps.h>
#include <asm/user.h>
-#include <asm/fpu/internal.h>
+#include <asm/fpu/api.h>
#include "fpu_system.h"
#include "fpu_emu.h"
@@ -640,7 +640,7 @@ int fpregs_soft_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct swregs_state *s387 = &target->thread.fpu.state.soft;
+ struct swregs_state *s387 = &target->thread.fpu.fpstate->regs.soft;
void *space = s387->st_space;
int ret;
int offset, other, i, tags, regnr, tag, newtop;
@@ -691,7 +691,7 @@ int fpregs_soft_get(struct task_struct *target,
const struct user_regset *regset,
struct membuf to)
{
- struct swregs_state *s387 = &target->thread.fpu.state.soft;
+ struct swregs_state *s387 = &target->thread.fpu.fpstate->regs.soft;
const void *space = s387->st_space;
int offset = (S387->ftop & 7) * 10, other = 80 - offset;
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h
index 9b41391867dc..eec3e4805c75 100644
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -73,7 +73,7 @@ static inline bool seg_writable(struct desc_struct *d)
return (d->type & SEG_TYPE_EXECUTE_MASK) == SEG_TYPE_WRITABLE;
}
-#define I387 (&current->thread.fpu.state)
+#define I387 (&current->thread.fpu.fpstate->regs)
#define FPU_info (I387->soft.info)
#define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs))
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index e1664e9f969c..5cd2a88930a9 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -4,46 +4,30 @@
#include <linux/sched/debug.h>
#include <xen/xen.h>
-#include <asm/fpu/internal.h>
+#include <asm/fpu/api.h>
#include <asm/sev.h>
#include <asm/traps.h>
#include <asm/kdebug.h>
-typedef bool (*ex_handler_t)(const struct exception_table_entry *,
- struct pt_regs *, int, unsigned long,
- unsigned long);
-
static inline unsigned long
ex_fixup_addr(const struct exception_table_entry *x)
{
return (unsigned long)&x->fixup + x->fixup;
}
-static inline ex_handler_t
-ex_fixup_handler(const struct exception_table_entry *x)
-{
- return (ex_handler_t)((unsigned long)&x->handler + x->handler);
-}
-__visible bool ex_handler_default(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr,
- unsigned long error_code,
- unsigned long fault_addr)
+static bool ex_handler_default(const struct exception_table_entry *fixup,
+ struct pt_regs *regs)
{
regs->ip = ex_fixup_addr(fixup);
return true;
}
-EXPORT_SYMBOL(ex_handler_default);
-__visible bool ex_handler_fault(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr,
- unsigned long error_code,
- unsigned long fault_addr)
+static bool ex_handler_fault(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
- regs->ip = ex_fixup_addr(fixup);
regs->ax = trapnr;
- return true;
+ return ex_handler_default(fixup, regs);
}
-EXPORT_SYMBOL_GPL(ex_handler_fault);
/*
* Handler for when we fail to restore a task's FPU state. We should never get
@@ -55,65 +39,47 @@ EXPORT_SYMBOL_GPL(ex_handler_fault);
* of vulnerability by restoring from the initial state (essentially, zeroing
* out all the FPU registers) if we can't restore from the task's FPU state.
*/
-__visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr,
- unsigned long error_code,
- unsigned long fault_addr)
+static bool ex_handler_fprestore(const struct exception_table_entry *fixup,
+ struct pt_regs *regs)
{
regs->ip = ex_fixup_addr(fixup);
WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
(void *)instruction_pointer(regs));
- __restore_fpregs_from_fpstate(&init_fpstate, xfeatures_mask_fpstate());
+ fpu_reset_from_exception_fixup();
return true;
}
-EXPORT_SYMBOL_GPL(ex_handler_fprestore);
-__visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr,
- unsigned long error_code,
- unsigned long fault_addr)
+static bool ex_handler_uaccess(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
- regs->ip = ex_fixup_addr(fixup);
- return true;
+ return ex_handler_default(fixup, regs);
}
-EXPORT_SYMBOL(ex_handler_uaccess);
-__visible bool ex_handler_copy(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr,
- unsigned long error_code,
- unsigned long fault_addr)
+static bool ex_handler_copy(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
{
WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
- regs->ip = ex_fixup_addr(fixup);
- regs->ax = trapnr;
- return true;
+ return ex_handler_fault(fixup, regs, trapnr);
}
-EXPORT_SYMBOL(ex_handler_copy);
-__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr,
- unsigned long error_code,
- unsigned long fault_addr)
+static bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
+ struct pt_regs *regs)
{
if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, regs->ip, (void *)regs->ip))
show_stack_regs(regs);
/* Pretend that the read succeeded and returned 0. */
- regs->ip = ex_fixup_addr(fixup);
regs->ax = 0;
regs->dx = 0;
- return true;
+ return ex_handler_default(fixup, regs);
}
-EXPORT_SYMBOL(ex_handler_rdmsr_unsafe);
-__visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr,
- unsigned long error_code,
- unsigned long fault_addr)
+static bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup,
+ struct pt_regs *regs)
{
if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, (unsigned int)regs->dx,
@@ -121,45 +87,29 @@ __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup
show_stack_regs(regs);
/* Pretend that the write succeeded. */
- regs->ip = ex_fixup_addr(fixup);
- return true;
+ return ex_handler_default(fixup, regs);
}
-EXPORT_SYMBOL(ex_handler_wrmsr_unsafe);
-__visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr,
- unsigned long error_code,
- unsigned long fault_addr)
+static bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
+ struct pt_regs *regs)
{
if (static_cpu_has(X86_BUG_NULL_SEG))
asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
asm volatile ("mov %0, %%fs" : : "rm" (0));
- return ex_handler_default(fixup, regs, trapnr, error_code, fault_addr);
+ return ex_handler_default(fixup, regs);
}
-EXPORT_SYMBOL(ex_handler_clear_fs);
-enum handler_type ex_get_fault_handler_type(unsigned long ip)
+int ex_get_fixup_type(unsigned long ip)
{
- const struct exception_table_entry *e;
- ex_handler_t handler;
+ const struct exception_table_entry *e = search_exception_tables(ip);
- e = search_exception_tables(ip);
- if (!e)
- return EX_HANDLER_NONE;
- handler = ex_fixup_handler(e);
- if (handler == ex_handler_fault)
- return EX_HANDLER_FAULT;
- else if (handler == ex_handler_uaccess || handler == ex_handler_copy)
- return EX_HANDLER_UACCESS;
- else
- return EX_HANDLER_OTHER;
+ return e ? e->type : EX_TYPE_NONE;
}
int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
unsigned long fault_addr)
{
const struct exception_table_entry *e;
- ex_handler_t handler;
#ifdef CONFIG_PNPBIOS
if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
@@ -179,8 +129,35 @@ int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
if (!e)
return 0;
- handler = ex_fixup_handler(e);
- return handler(e, regs, trapnr, error_code, fault_addr);
+ switch (e->type) {
+ case EX_TYPE_DEFAULT:
+ case EX_TYPE_DEFAULT_MCE_SAFE:
+ return ex_handler_default(e, regs);
+ case EX_TYPE_FAULT:
+ case EX_TYPE_FAULT_MCE_SAFE:
+ return ex_handler_fault(e, regs, trapnr);
+ case EX_TYPE_UACCESS:
+ return ex_handler_uaccess(e, regs, trapnr);
+ case EX_TYPE_COPY:
+ return ex_handler_copy(e, regs, trapnr);
+ case EX_TYPE_CLEAR_FS:
+ return ex_handler_clear_fs(e, regs);
+ case EX_TYPE_FPU_RESTORE:
+ return ex_handler_fprestore(e, regs);
+ case EX_TYPE_RDMSR:
+ return ex_handler_rdmsr_unsafe(e, regs);
+ case EX_TYPE_WRMSR:
+ return ex_handler_wrmsr_unsafe(e, regs);
+ case EX_TYPE_BPF:
+ return ex_handler_bpf(e, regs);
+ case EX_TYPE_RDMSR_IN_MCE:
+ ex_handler_msr_mce(regs, false);
+ break;
+ case EX_TYPE_WRMSR_IN_MCE:
+ ex_handler_msr_mce(regs, true);
+ break;
+ }
+ BUG();
}
extern unsigned int early_recursion_flag;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 60ade7dd71bd..026031b3b782 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -14,7 +14,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mmiotrace.h>
-#include <linux/mem_encrypt.h>
+#include <linux/cc_platform.h>
#include <linux/efi.h>
#include <linux/pgtable.h>
@@ -92,7 +92,7 @@ static unsigned int __ioremap_check_ram(struct resource *res)
*/
static unsigned int __ioremap_check_encrypted(struct resource *res)
{
- if (!sev_active())
+ if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
return 0;
switch (res->desc) {
@@ -112,7 +112,7 @@ static unsigned int __ioremap_check_encrypted(struct resource *res)
*/
static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc)
{
- if (!sev_active())
+ if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
return;
if (!IS_ENABLED(CONFIG_EFI))
@@ -508,6 +508,7 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
memunmap((void *)((unsigned long)addr & PAGE_MASK));
}
+#ifdef CONFIG_AMD_MEM_ENCRYPT
/*
* Examine the physical address to determine if it is an area of memory
* that should be mapped decrypted. If the memory is not part of the
@@ -555,7 +556,7 @@ static bool memremap_should_map_decrypted(resource_size_t phys_addr,
case E820_TYPE_NVS:
case E820_TYPE_UNUSABLE:
/* For SEV, these areas are encrypted */
- if (sev_active())
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
break;
fallthrough;
@@ -693,7 +694,7 @@ static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
unsigned long flags)
{
- if (!mem_encrypt_active())
+ if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
return true;
if (flags & MEMREMAP_ENC)
@@ -702,7 +703,7 @@ bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
if (flags & MEMREMAP_DEC)
return false;
- if (sme_active()) {
+ if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
if (memremap_is_setup_data(phys_addr, size) ||
memremap_is_efi_data(phys_addr, size))
return false;
@@ -723,12 +724,12 @@ pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
{
bool encrypted_prot;
- if (!mem_encrypt_active())
+ if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
return prot;
encrypted_prot = true;
- if (sme_active()) {
+ if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
if (early_memremap_is_setup_data(phys_addr, size) ||
memremap_is_efi_data(phys_addr, size))
encrypted_prot = false;
@@ -746,7 +747,6 @@ bool phys_mem_access_encrypted(unsigned long phys_addr, unsigned long size)
return arch_memremap_can_ram_remap(phys_addr, size, 0);
}
-#ifdef CONFIG_AMD_MEM_ENCRYPT
/* Remap memory with encryption */
void __init *early_memremap_encrypted(resource_size_t phys_addr,
unsigned long size)
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index ff08dc463634..23d54b810f08 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -20,6 +20,7 @@
#include <linux/bitops.h>
#include <linux/dma-mapping.h>
#include <linux/virtio_config.h>
+#include <linux/cc_platform.h>
#include <asm/tlbflush.h>
#include <asm/fixmap.h>
@@ -143,7 +144,7 @@ void __init sme_unmap_bootdata(char *real_mode_data)
struct boot_params *boot_data;
unsigned long cmdline_paddr;
- if (!sme_active())
+ if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
return;
/* Get the command line address before unmapping the real_mode_data */
@@ -163,7 +164,7 @@ void __init sme_map_bootdata(char *real_mode_data)
struct boot_params *boot_data;
unsigned long cmdline_paddr;
- if (!sme_active())
+ if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
return;
__sme_early_map_unmap_mem(real_mode_data, sizeof(boot_params), true);
@@ -193,7 +194,7 @@ void __init sme_early_init(void)
for (i = 0; i < ARRAY_SIZE(protection_map); i++)
protection_map[i] = pgprot_encrypted(protection_map[i]);
- if (sev_active())
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
swiotlb_force = SWIOTLB_FORCE;
}
@@ -202,7 +203,7 @@ void __init sev_setup_arch(void)
phys_addr_t total_mem = memblock_phys_mem_size();
unsigned long size;
- if (!sev_active())
+ if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
return;
/*
@@ -360,42 +361,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size)
return early_set_memory_enc_dec(vaddr, size, true);
}
-/*
- * SME and SEV are very similar but they are not the same, so there are
- * times that the kernel will need to distinguish between SME and SEV. The
- * sme_active() and sev_active() functions are used for this. When a
- * distinction isn't needed, the mem_encrypt_active() function can be used.
- *
- * The trampoline code is a good example for this requirement. Before
- * paging is activated, SME will access all memory as decrypted, but SEV
- * will access all memory as encrypted. So, when APs are being brought
- * up under SME the trampoline area cannot be encrypted, whereas under SEV
- * the trampoline area must be encrypted.
- */
-bool sev_active(void)
-{
- return sev_status & MSR_AMD64_SEV_ENABLED;
-}
-
-bool sme_active(void)
-{
- return sme_me_mask && !sev_active();
-}
-EXPORT_SYMBOL_GPL(sev_active);
-
-/* Needs to be called from non-instrumentable code */
-bool noinstr sev_es_active(void)
-{
- return sev_status & MSR_AMD64_SEV_ES_ENABLED;
-}
-
/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
bool force_dma_unencrypted(struct device *dev)
{
/*
* For SEV, all DMA must be to unencrypted addresses.
*/
- if (sev_active())
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
return true;
/*
@@ -403,7 +375,7 @@ bool force_dma_unencrypted(struct device *dev)
* device does not support DMA to addresses that include the
* encryption mask.
*/
- if (sme_active()) {
+ if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
u64 dma_enc_mask = DMA_BIT_MASK(__ffs64(sme_me_mask));
u64 dma_dev_mask = min_not_zero(dev->coherent_dma_mask,
dev->bus_dma_limit);
@@ -428,7 +400,7 @@ void __init mem_encrypt_free_decrypted_mem(void)
* The unused memory range was mapped decrypted, change the encryption
* attribute from decrypted to encrypted before freeing it.
*/
- if (mem_encrypt_active()) {
+ if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
r = set_memory_encrypted(vaddr, npages);
if (r) {
pr_warn("failed to free unused decrypted pages\n");
@@ -444,7 +416,7 @@ static void print_mem_encrypt_feature_info(void)
pr_info("AMD Memory Encryption Features active:");
/* Secure Memory Encryption */
- if (sme_active()) {
+ if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) {
/*
* SME is mutually exclusive with any of the SEV
* features below.
@@ -454,11 +426,11 @@ static void print_mem_encrypt_feature_info(void)
}
/* Secure Encrypted Virtualization */
- if (sev_active())
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
pr_cont(" SEV");
/* Encrypted Register State */
- if (sev_es_active())
+ if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
pr_cont(" SEV-ES");
pr_cont("\n");
@@ -477,7 +449,8 @@ void __init mem_encrypt_init(void)
* With SEV, we need to unroll the rep string I/O instructions,
* but SEV-ES supports them through the #VC handler.
*/
- if (sev_active() && !sev_es_active())
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) &&
+ !cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT))
static_branch_enable(&sev_enable_key);
print_mem_encrypt_feature_info();
@@ -485,6 +458,6 @@ void __init mem_encrypt_init(void)
int arch_has_restricted_virtio_memory_access(void)
{
- return sev_active();
+ return cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT);
}
EXPORT_SYMBOL_GPL(arch_has_restricted_virtio_memory_access);
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 700ce8fdea87..3f0abb403340 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -39,6 +39,7 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/mem_encrypt.h>
+#include <linux/cc_platform.h>
#include <asm/setup.h>
#include <asm/sections.h>
@@ -296,7 +297,13 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
unsigned long pgtable_area_len;
unsigned long decrypted_base;
- if (!sme_active())
+ /*
+ * This is early code, use an open coded check for SME instead of
+ * using cc_platform_has(). This eliminates worries about removing
+ * instrumentation or checking boot_cpu_data in the cc_platform_has()
+ * function.
+ */
+ if (!sme_get_me_mask() || sev_status & MSR_AMD64_SEV_ENABLED)
return;
/*
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index ad8a5c586a35..527957586f3c 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -18,6 +18,7 @@
#include <linux/libnvdimm.h>
#include <linux/vmstat.h>
#include <linux/kernel.h>
+#include <linux/cc_platform.h>
#include <asm/e820/api.h>
#include <asm/processor.h>
@@ -1986,7 +1987,7 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
int ret;
/* Nothing to do if memory encryption is not active */
- if (!mem_encrypt_active())
+ if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
return 0;
/* Should not be working on unaligned addresses */
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 9ea57389c554..8796559f62a4 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -15,7 +15,6 @@
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
#include <asm/text-patching.h>
-#include <asm/asm-prototypes.h>
static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
@@ -225,6 +224,14 @@ static void jit_fill_hole(void *area, unsigned int size)
struct jit_context {
int cleanup_addr; /* Epilogue code offset */
+
+ /*
+ * Program specific offsets of labels in the code; these rely on the
+ * JIT doing at least 2 passes, recording the position on the first
+ * pass, only to generate the correct offset on the second pass.
+ */
+ int tail_call_direct_label;
+ int tail_call_indirect_label;
};
/* Maximum number of bytes emitted while JITing one eBPF insn */
@@ -380,20 +387,23 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
}
-static int get_pop_bytes(bool *callee_regs_used)
+#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)
+
+static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
{
- int bytes = 0;
+ u8 *prog = *pprog;
- if (callee_regs_used[3])
- bytes += 2;
- if (callee_regs_used[2])
- bytes += 2;
- if (callee_regs_used[1])
- bytes += 2;
- if (callee_regs_used[0])
- bytes += 1;
+#ifdef CONFIG_RETPOLINE
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
+ EMIT_LFENCE();
+ EMIT2(0xFF, 0xE0 + reg);
+ } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
+ emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
+ } else
+#endif
+ EMIT2(0xFF, 0xE0 + reg);
- return bytes;
+ *pprog = prog;
}
/*
@@ -411,29 +421,12 @@ static int get_pop_bytes(bool *callee_regs_used)
* out:
*/
static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
- u32 stack_depth)
+ u32 stack_depth, u8 *ip,
+ struct jit_context *ctx)
{
int tcc_off = -4 - round_up(stack_depth, 8);
- u8 *prog = *pprog;
- int pop_bytes = 0;
- int off1 = 42;
- int off2 = 31;
- int off3 = 9;
-
- /* count the additional bytes used for popping callee regs from stack
- * that need to be taken into account for each of the offsets that
- * are used for bailing out of the tail call
- */
- pop_bytes = get_pop_bytes(callee_regs_used);
- off1 += pop_bytes;
- off2 += pop_bytes;
- off3 += pop_bytes;
-
- if (stack_depth) {
- off1 += 7;
- off2 += 7;
- off3 += 7;
- }
+ u8 *prog = *pprog, *start = *pprog;
+ int offset;
/*
* rdi - pointer to ctx
@@ -448,8 +441,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */
- EMIT2(X86_JBE, OFFSET1); /* jbe out */
+
+ offset = ctx->tail_call_indirect_label - (prog + 2 - start);
+ EMIT2(X86_JBE, offset); /* jbe out */
/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
@@ -457,8 +451,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
*/
EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE)
- EMIT2(X86_JA, OFFSET2); /* ja out */
+
+ offset = ctx->tail_call_indirect_label - (prog + 2 - start);
+ EMIT2(X86_JA, offset); /* ja out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
@@ -471,12 +466,11 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
* goto out;
*/
EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */
-#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE)
- EMIT2(X86_JE, OFFSET3); /* je out */
- *pprog = prog;
- pop_callee_regs(pprog, callee_regs_used);
- prog = *pprog;
+ offset = ctx->tail_call_indirect_label - (prog + 2 - start);
+ EMIT2(X86_JE, offset); /* je out */
+
+ pop_callee_regs(&prog, callee_regs_used);
EMIT1(0x58); /* pop rax */
if (stack_depth)
@@ -493,41 +487,21 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
* rdi == ctx (1st arg)
* rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET
*/
- RETPOLINE_RCX_BPF_JIT();
+ emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));
/* out: */
+ ctx->tail_call_indirect_label = prog - start;
*pprog = prog;
}
static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
- u8 **pprog, int addr, u8 *image,
- bool *callee_regs_used, u32 stack_depth)
+ u8 **pprog, u8 *ip,
+ bool *callee_regs_used, u32 stack_depth,
+ struct jit_context *ctx)
{
int tcc_off = -4 - round_up(stack_depth, 8);
- u8 *prog = *pprog;
- int pop_bytes = 0;
- int off1 = 20;
- int poke_off;
-
- /* count the additional bytes used for popping callee regs to stack
- * that need to be taken into account for jump offset that is used for
- * bailing out from of the tail call when limit is reached
- */
- pop_bytes = get_pop_bytes(callee_regs_used);
- off1 += pop_bytes;
-
- /*
- * total bytes for:
- * - nop5/ jmpq $off
- * - pop callee regs
- * - sub rsp, $val if depth > 0
- * - pop rax
- */
- poke_off = X86_PATCH_SIZE + pop_bytes + 1;
- if (stack_depth) {
- poke_off += 7;
- off1 += 7;
- }
+ u8 *prog = *pprog, *start = *pprog;
+ int offset;
/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
@@ -535,28 +509,30 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
*/
EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
- EMIT2(X86_JA, off1); /* ja out */
+
+ offset = ctx->tail_call_direct_label - (prog + 2 - start);
+ EMIT2(X86_JA, offset); /* ja out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
- poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE);
+ poke->tailcall_bypass = ip + (prog - start);
poke->adj_off = X86_TAIL_CALL_OFFSET;
- poke->tailcall_target = image + (addr - X86_PATCH_SIZE);
+ poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE;
poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE;
emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE,
poke->tailcall_bypass);
- *pprog = prog;
- pop_callee_regs(pprog, callee_regs_used);
- prog = *pprog;
+ pop_callee_regs(&prog, callee_regs_used);
EMIT1(0x58); /* pop rax */
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
prog += X86_PATCH_SIZE;
+
/* out: */
+ ctx->tail_call_direct_label = prog - start;
*pprog = prog;
}
@@ -827,9 +803,7 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
return 0;
}
-static bool ex_handler_bpf(const struct exception_table_entry *x,
- struct pt_regs *regs, int trapnr,
- unsigned long error_code, unsigned long fault_addr)
+bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
{
u32 reg = x->fixup >> 8;
@@ -1222,8 +1196,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
/* speculation barrier */
case BPF_ST | BPF_NOSPEC:
if (boot_cpu_has(X86_FEATURE_XMM2))
- /* Emit 'lfence' */
- EMIT3(0x0F, 0xAE, 0xE8);
+ EMIT_LFENCE();
break;
/* ST: *(u8*)(dst_reg + off) = imm */
@@ -1313,12 +1286,7 @@ st: if (is_imm8(insn->off))
}
ex->insn = delta;
- delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler;
- if (!is_simm32(delta)) {
- pr_err("extable->handler doesn't fit into 32-bit\n");
- return -EFAULT;
- }
- ex->handler = delta;
+ ex->type = EX_TYPE_BPF;
if (dst_reg > BPF_REG_9) {
pr_err("verifier error\n");
@@ -1412,13 +1380,16 @@ st: if (is_imm8(insn->off))
case BPF_JMP | BPF_TAIL_CALL:
if (imm32)
emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
- &prog, addrs[i], image,
+ &prog, image + addrs[i - 1],
callee_regs_used,
- bpf_prog->aux->stack_depth);
+ bpf_prog->aux->stack_depth,
+ ctx);
else
emit_bpf_tail_call_indirect(&prog,
callee_regs_used,
- bpf_prog->aux->stack_depth);
+ bpf_prog->aux->stack_depth,
+ image + addrs[i - 1],
+ ctx);
break;
/* cond jump */
@@ -2124,24 +2095,6 @@ cleanup:
return ret;
}
-static int emit_fallback_jump(u8 **pprog)
-{
- u8 *prog = *pprog;
- int err = 0;
-
-#ifdef CONFIG_RETPOLINE
- /* Note that this assumes the the compiler uses external
- * thunks for indirect calls. Both clang and GCC use the same
- * naming convention for external thunks.
- */
- err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog);
-#else
- EMIT2(0xFF, 0xE2); /* jmp rdx */
-#endif
- *pprog = prog;
- return err;
-}
-
static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
{
u8 *jg_reloc, *prog = *pprog;
@@ -2163,9 +2116,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
if (err)
return err;
- err = emit_fallback_jump(&prog); /* jmp thunk/indirect */
- if (err)
- return err;
+ emit_indirect_jump(&prog, 2 /* rdx */, prog);
*pprog = prog;
return 0;
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 3bfda5f502cb..da9b7cfa4632 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -15,6 +15,7 @@
#include <asm/cacheflush.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
+#include <asm/asm-prototypes.h>
#include <linux/bpf.h>
/*
@@ -1267,6 +1268,21 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth)
*pprog = prog;
}
+static int emit_jmp_edx(u8 **pprog, u8 *ip)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+#ifdef CONFIG_RETPOLINE
+ EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5));
+#else
+ EMIT2(0xFF, 0xE2);
+#endif
+ *pprog = prog;
+
+ return cnt;
+}
+
/*
* Generate the following code:
* ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
@@ -1280,7 +1296,7 @@ static void emit_epilogue(u8 **pprog, u32 stack_depth)
* goto *(prog->bpf_func + prologue_size);
* out:
*/
-static void emit_bpf_tail_call(u8 **pprog)
+static void emit_bpf_tail_call(u8 **pprog, u8 *ip)
{
u8 *prog = *pprog;
int cnt = 0;
@@ -1362,7 +1378,7 @@ static void emit_bpf_tail_call(u8 **pprog)
* eax == ctx (1st arg)
* edx == prog->bpf_func + prologue_size
*/
- RETPOLINE_EDX_BPF_JIT();
+ cnt += emit_jmp_edx(&prog, ip + cnt);
if (jmp_label1 == -1)
jmp_label1 = cnt;
@@ -2122,7 +2138,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
break;
}
case BPF_JMP | BPF_TAIL_CALL:
- emit_bpf_tail_call(&prog);
+ emit_bpf_tail_call(&prog, image + addrs[i - 1]);
break;
/* cond jump */
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 7515e78ef898..1f3675453a57 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -33,7 +33,7 @@
#include <linux/reboot.h>
#include <linux/slab.h>
#include <linux/ucs2_string.h>
-#include <linux/mem_encrypt.h>
+#include <linux/cc_platform.h>
#include <linux/sched/task.h>
#include <asm/setup.h>
@@ -284,7 +284,8 @@ static void __init __map_region(efi_memory_desc_t *md, u64 va)
if (!(md->attribute & EFI_MEMORY_WB))
flags |= _PAGE_PCD;
- if (sev_active() && md->type != EFI_MEMORY_MAPPED_IO)
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) &&
+ md->type != EFI_MEMORY_MAPPED_IO)
flags |= _PAGE_ENC;
pfn = md->phys_addr >> PAGE_SHIFT;
@@ -390,7 +391,7 @@ static int __init efi_update_mem_attr(struct mm_struct *mm, efi_memory_desc_t *m
if (!(md->attribute & EFI_MEMORY_RO))
pf |= _PAGE_RW;
- if (sev_active())
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
pf |= _PAGE_ENC;
return efi_update_mappings(md, pf);
@@ -438,7 +439,7 @@ void __init efi_runtime_update_mappings(void)
(md->type != EFI_RUNTIME_SERVICES_CODE))
pf |= _PAGE_RW;
- if (sev_active())
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
pf |= _PAGE_ENC;
efi_update_mappings(md, pf);
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 6665f8802098..9f2b251e83c5 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -20,7 +20,7 @@
#include <asm/page.h>
#include <asm/mce.h>
#include <asm/suspend.h>
-#include <asm/fpu/internal.h>
+#include <asm/fpu/api.h>
#include <asm/debugreg.h>
#include <asm/cpu.h>
#include <asm/mmu_context.h>
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 31b5856010cb..4a3da7592b99 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -2,7 +2,7 @@
#include <linux/io.h>
#include <linux/slab.h>
#include <linux/memblock.h>
-#include <linux/mem_encrypt.h>
+#include <linux/cc_platform.h>
#include <linux/pgtable.h>
#include <asm/set_memory.h>
@@ -44,10 +44,10 @@ void __init reserve_real_mode(void)
static void sme_sev_setup_real_mode(struct trampoline_header *th)
{
#ifdef CONFIG_AMD_MEM_ENCRYPT
- if (sme_active())
+ if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
th->flags |= TH_FLAGS_SME_ACTIVE;
- if (sev_es_active()) {
+ if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) {
/*
* Skip the call to verify_cpu() in secondary_startup_64 as it
* will cause #VC exceptions when the AP can't handle them yet.
@@ -81,7 +81,7 @@ static void __init setup_real_mode(void)
* decrypted memory in order to bring up other processors
* successfully. This is not needed for SEV.
*/
- if (sme_active())
+ if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
set_memory_decrypted((unsigned long)base, size >> PAGE_SHIFT);
memcpy(base, real_mode_blob, size);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index a7b7d674f500..4f63117f09bb 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -283,12 +283,12 @@ static void __init xen_init_capabilities(void)
}
}
-static void xen_set_debugreg(int reg, unsigned long val)
+static noinstr void xen_set_debugreg(int reg, unsigned long val)
{
HYPERVISOR_set_debugreg(reg, val);
}
-static unsigned long xen_get_debugreg(int reg)
+static noinstr unsigned long xen_get_debugreg(int reg)
{
return HYPERVISOR_get_debugreg(reg);
}
@@ -1025,52 +1025,54 @@ static const struct pv_info xen_info __initconst = {
.name = "Xen",
};
-static const struct pv_cpu_ops xen_cpu_ops __initconst = {
- .cpuid = xen_cpuid,
+static const typeof(pv_ops) xen_cpu_ops __initconst = {
+ .cpu = {
+ .cpuid = xen_cpuid,
- .set_debugreg = xen_set_debugreg,
- .get_debugreg = xen_get_debugreg,
+ .set_debugreg = xen_set_debugreg,
+ .get_debugreg = xen_get_debugreg,
- .read_cr0 = xen_read_cr0,
- .write_cr0 = xen_write_cr0,
+ .read_cr0 = xen_read_cr0,
+ .write_cr0 = xen_write_cr0,
- .write_cr4 = xen_write_cr4,
+ .write_cr4 = xen_write_cr4,
- .wbinvd = native_wbinvd,
+ .wbinvd = native_wbinvd,
- .read_msr = xen_read_msr,
- .write_msr = xen_write_msr,
+ .read_msr = xen_read_msr,
+ .write_msr = xen_write_msr,
- .read_msr_safe = xen_read_msr_safe,
- .write_msr_safe = xen_write_msr_safe,
+ .read_msr_safe = xen_read_msr_safe,
+ .write_msr_safe = xen_write_msr_safe,
- .read_pmc = xen_read_pmc,
+ .read_pmc = xen_read_pmc,
- .load_tr_desc = paravirt_nop,
- .set_ldt = xen_set_ldt,
- .load_gdt = xen_load_gdt,
- .load_idt = xen_load_idt,
- .load_tls = xen_load_tls,
- .load_gs_index = xen_load_gs_index,
+ .load_tr_desc = paravirt_nop,
+ .set_ldt = xen_set_ldt,
+ .load_gdt = xen_load_gdt,
+ .load_idt = xen_load_idt,
+ .load_tls = xen_load_tls,
+ .load_gs_index = xen_load_gs_index,
- .alloc_ldt = xen_alloc_ldt,
- .free_ldt = xen_free_ldt,
+ .alloc_ldt = xen_alloc_ldt,
+ .free_ldt = xen_free_ldt,
- .store_tr = xen_store_tr,
+ .store_tr = xen_store_tr,
- .write_ldt_entry = xen_write_ldt_entry,
- .write_gdt_entry = xen_write_gdt_entry,
- .write_idt_entry = xen_write_idt_entry,
- .load_sp0 = xen_load_sp0,
+ .write_ldt_entry = xen_write_ldt_entry,
+ .write_gdt_entry = xen_write_gdt_entry,
+ .write_idt_entry = xen_write_idt_entry,
+ .load_sp0 = xen_load_sp0,
#ifdef CONFIG_X86_IOPL_IOPERM
- .invalidate_io_bitmap = xen_invalidate_io_bitmap,
- .update_io_bitmap = xen_update_io_bitmap,
+ .invalidate_io_bitmap = xen_invalidate_io_bitmap,
+ .update_io_bitmap = xen_update_io_bitmap,
#endif
- .io_delay = xen_io_delay,
+ .io_delay = xen_io_delay,
- .start_context_switch = paravirt_start_context_switch,
- .end_context_switch = xen_end_context_switch,
+ .start_context_switch = paravirt_start_context_switch,
+ .end_context_switch = xen_end_context_switch,
+ },
};
static void xen_restart(char *msg)
@@ -1211,7 +1213,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
/* Install Xen paravirt ops */
pv_info = xen_info;
- pv_ops.cpu = xen_cpu_ops;
+ pv_ops.cpu = xen_cpu_ops.cpu;
paravirt_iret = xen_iret;
xen_init_irq_ops();
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index dfa091d79c2e..4fe387e520af 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -19,12 +19,12 @@
* callback mask. We do this in a very simple manner, by making a call
* down into Xen. The pending flag will be checked by Xen on return.
*/
-void xen_force_evtchn_callback(void)
+noinstr void xen_force_evtchn_callback(void)
{
(void)HYPERVISOR_xen_version(0, NULL);
}
-asmlinkage __visible unsigned long xen_save_fl(void)
+asmlinkage __visible noinstr unsigned long xen_save_fl(void)
{
struct vcpu_info *vcpu;
unsigned long flags;
@@ -40,9 +40,9 @@ asmlinkage __visible unsigned long xen_save_fl(void)
*/
return (-flags) & X86_EFLAGS_IF;
}
-PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl);
+__PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl, ".noinstr.text");
-asmlinkage __visible void xen_irq_disable(void)
+asmlinkage __visible noinstr void xen_irq_disable(void)
{
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
@@ -51,9 +51,9 @@ asmlinkage __visible void xen_irq_disable(void)
this_cpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
preempt_enable_no_resched();
}
-PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable);
+__PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable, ".noinstr.text");
-asmlinkage __visible void xen_irq_enable(void)
+asmlinkage __visible noinstr void xen_irq_enable(void)
{
struct vcpu_info *vcpu;
@@ -76,7 +76,7 @@ asmlinkage __visible void xen_irq_enable(void)
preempt_enable();
}
-PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable);
+__PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable, ".noinstr.text");
static void xen_safe_halt(void)
{
@@ -94,17 +94,20 @@ static void xen_halt(void)
xen_safe_halt();
}
-static const struct pv_irq_ops xen_irq_ops __initconst = {
- .save_fl = PV_CALLEE_SAVE(xen_save_fl),
- .irq_disable = PV_CALLEE_SAVE(xen_irq_disable),
- .irq_enable = PV_CALLEE_SAVE(xen_irq_enable),
+static const typeof(pv_ops) xen_irq_ops __initconst = {
+ .irq = {
- .safe_halt = xen_safe_halt,
- .halt = xen_halt,
+ .save_fl = PV_CALLEE_SAVE(xen_save_fl),
+ .irq_disable = PV_CALLEE_SAVE(xen_irq_disable),
+ .irq_enable = PV_CALLEE_SAVE(xen_irq_enable),
+
+ .safe_halt = xen_safe_halt,
+ .halt = xen_halt,
+ },
};
void __init xen_init_irq_ops(void)
{
- pv_ops.irq = xen_irq_ops;
+ pv_ops.irq = xen_irq_ops.irq;
x86_init.irqs.intr_init = xen_init_IRQ;
}
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 3359c23573c5..1ce436eeda15 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1204,7 +1204,8 @@ static void __init xen_pagetable_init(void)
xen_remap_memory();
xen_setup_mfn_list_list();
}
-static void xen_write_cr2(unsigned long cr2)
+
+static noinstr void xen_write_cr2(unsigned long cr2)
{
this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
}
@@ -2078,67 +2079,69 @@ static void xen_leave_lazy_mmu(void)
preempt_enable();
}
-static const struct pv_mmu_ops xen_mmu_ops __initconst = {
- .read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2),
- .write_cr2 = xen_write_cr2,
+static const typeof(pv_ops) xen_mmu_ops __initconst = {
+ .mmu = {
+ .read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2),
+ .write_cr2 = xen_write_cr2,
- .read_cr3 = xen_read_cr3,
- .write_cr3 = xen_write_cr3_init,
+ .read_cr3 = xen_read_cr3,
+ .write_cr3 = xen_write_cr3_init,
- .flush_tlb_user = xen_flush_tlb,
- .flush_tlb_kernel = xen_flush_tlb,
- .flush_tlb_one_user = xen_flush_tlb_one_user,
- .flush_tlb_multi = xen_flush_tlb_multi,
- .tlb_remove_table = tlb_remove_table,
+ .flush_tlb_user = xen_flush_tlb,
+ .flush_tlb_kernel = xen_flush_tlb,
+ .flush_tlb_one_user = xen_flush_tlb_one_user,
+ .flush_tlb_multi = xen_flush_tlb_multi,
+ .tlb_remove_table = tlb_remove_table,
- .pgd_alloc = xen_pgd_alloc,
- .pgd_free = xen_pgd_free,
+ .pgd_alloc = xen_pgd_alloc,
+ .pgd_free = xen_pgd_free,
- .alloc_pte = xen_alloc_pte_init,
- .release_pte = xen_release_pte_init,
- .alloc_pmd = xen_alloc_pmd_init,
- .release_pmd = xen_release_pmd_init,
+ .alloc_pte = xen_alloc_pte_init,
+ .release_pte = xen_release_pte_init,
+ .alloc_pmd = xen_alloc_pmd_init,
+ .release_pmd = xen_release_pmd_init,
- .set_pte = xen_set_pte_init,
- .set_pmd = xen_set_pmd_hyper,
+ .set_pte = xen_set_pte_init,
+ .set_pmd = xen_set_pmd_hyper,
- .ptep_modify_prot_start = xen_ptep_modify_prot_start,
- .ptep_modify_prot_commit = xen_ptep_modify_prot_commit,
+ .ptep_modify_prot_start = xen_ptep_modify_prot_start,
+ .ptep_modify_prot_commit = xen_ptep_modify_prot_commit,
- .pte_val = PV_CALLEE_SAVE(xen_pte_val),
- .pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
+ .pte_val = PV_CALLEE_SAVE(xen_pte_val),
+ .pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
- .make_pte = PV_CALLEE_SAVE(xen_make_pte_init),
- .make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
+ .make_pte = PV_CALLEE_SAVE(xen_make_pte_init),
+ .make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
- .set_pud = xen_set_pud_hyper,
+ .set_pud = xen_set_pud_hyper,
- .make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
- .pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
+ .make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
+ .pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
- .pud_val = PV_CALLEE_SAVE(xen_pud_val),
- .make_pud = PV_CALLEE_SAVE(xen_make_pud),
- .set_p4d = xen_set_p4d_hyper,
+ .pud_val = PV_CALLEE_SAVE(xen_pud_val),
+ .make_pud = PV_CALLEE_SAVE(xen_make_pud),
+ .set_p4d = xen_set_p4d_hyper,
- .alloc_pud = xen_alloc_pmd_init,
- .release_pud = xen_release_pmd_init,
+ .alloc_pud = xen_alloc_pmd_init,
+ .release_pud = xen_release_pmd_init,
#if CONFIG_PGTABLE_LEVELS >= 5
- .p4d_val = PV_CALLEE_SAVE(xen_p4d_val),
- .make_p4d = PV_CALLEE_SAVE(xen_make_p4d),
+ .p4d_val = PV_CALLEE_SAVE(xen_p4d_val),
+ .make_p4d = PV_CALLEE_SAVE(xen_make_p4d),
#endif
- .activate_mm = xen_activate_mm,
- .dup_mmap = xen_dup_mmap,
- .exit_mmap = xen_exit_mmap,
+ .activate_mm = xen_activate_mm,
+ .dup_mmap = xen_dup_mmap,
+ .exit_mmap = xen_exit_mmap,
- .lazy_mode = {
- .enter = paravirt_enter_lazy_mmu,
- .leave = xen_leave_lazy_mmu,
- .flush = paravirt_flush_lazy_mmu,
- },
+ .lazy_mode = {
+ .enter = paravirt_enter_lazy_mmu,
+ .leave = xen_leave_lazy_mmu,
+ .flush = paravirt_flush_lazy_mmu,
+ },
- .set_fixmap = xen_set_fixmap,
+ .set_fixmap = xen_set_fixmap,
+ },
};
void __init xen_init_mmu_ops(void)
@@ -2146,7 +2149,7 @@ void __init xen_init_mmu_ops(void)
x86_init.paging.pagetable_init = xen_pagetable_init;
x86_init.hyper.init_after_bootmem = xen_after_bootmem;
- pv_ops.mmu = xen_mmu_ops;
+ pv_ops.mmu = xen_mmu_ops.mmu;
memset(dummy_mapping, 0xff, PAGE_SIZE);
}
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 1e626444712b..220dd9678494 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -21,6 +21,45 @@
#include <linux/init.h>
#include <linux/linkage.h>
+.pushsection .noinstr.text, "ax"
+/*
+ * Disabling events is simply a matter of making the event mask
+ * non-zero.
+ */
+SYM_FUNC_START(xen_irq_disable_direct)
+ movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+ ret
+SYM_FUNC_END(xen_irq_disable_direct)
+
+/*
+ * Force an event check by making a hypercall, but preserve regs
+ * before making the call.
+ */
+SYM_FUNC_START(check_events)
+ FRAME_BEGIN
+ push %rax
+ push %rcx
+ push %rdx
+ push %rsi
+ push %rdi
+ push %r8
+ push %r9
+ push %r10
+ push %r11
+ call xen_force_evtchn_callback
+ pop %r11
+ pop %r10
+ pop %r9
+ pop %r8
+ pop %rdi
+ pop %rsi
+ pop %rdx
+ pop %rcx
+ pop %rax
+ FRAME_END
+ ret
+SYM_FUNC_END(check_events)
+
/*
* Enable events. This clears the event mask and tests the pending
* event status with one and operation. If there are pending events,
@@ -47,16 +86,6 @@ SYM_FUNC_START(xen_irq_enable_direct)
ret
SYM_FUNC_END(xen_irq_enable_direct)
-
-/*
- * Disabling events is simply a matter of making the event mask
- * non-zero.
- */
-SYM_FUNC_START(xen_irq_disable_direct)
- movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
- ret
-SYM_FUNC_END(xen_irq_disable_direct)
-
/*
* (xen_)save_fl is used to get the current interrupt enable status.
* Callers expect the status to be in X86_EFLAGS_IF, and other bits
@@ -73,35 +102,6 @@ SYM_FUNC_START(xen_save_fl_direct)
ret
SYM_FUNC_END(xen_save_fl_direct)
-/*
- * Force an event check by making a hypercall, but preserve regs
- * before making the call.
- */
-SYM_FUNC_START(check_events)
- FRAME_BEGIN
- push %rax
- push %rcx
- push %rdx
- push %rsi
- push %rdi
- push %r8
- push %r9
- push %r10
- push %r11
- call xen_force_evtchn_callback
- pop %r11
- pop %r10
- pop %r9
- pop %r8
- pop %rdi
- pop %rsi
- pop %rdx
- pop %rcx
- pop %rax
- FRAME_END
- ret
-SYM_FUNC_END(check_events)
-
SYM_FUNC_START(xen_read_cr2)
FRAME_BEGIN
_ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
@@ -116,6 +116,7 @@ SYM_FUNC_START(xen_read_cr2_direct)
FRAME_END
ret
SYM_FUNC_END(xen_read_cr2_direct);
+.popsection
.macro xen_pv_trap name
SYM_CODE_START(xen_\name)
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index cb6538ae2fe0..9e27b86a0c31 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -20,6 +20,23 @@
#include <xen/interface/xen-mca.h>
#include <asm/xen/interface.h>
+.pushsection .noinstr.text, "ax"
+ .balign PAGE_SIZE
+SYM_CODE_START(hypercall_page)
+ .rept (PAGE_SIZE / 32)
+ UNWIND_HINT_FUNC
+ .skip 31, 0x90
+ ret
+ .endr
+
+#define HYPERCALL(n) \
+ .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
+ .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
+#include <asm/xen-hypercalls.h>
+#undef HYPERCALL
+SYM_CODE_END(hypercall_page)
+.popsection
+
#ifdef CONFIG_XEN_PV
__INIT
SYM_CODE_START(startup_xen)
@@ -64,23 +81,6 @@ SYM_CODE_END(asm_cpu_bringup_and_idle)
#endif
#endif
-.pushsection .text
- .balign PAGE_SIZE
-SYM_CODE_START(hypercall_page)
- .rept (PAGE_SIZE / 32)
- UNWIND_HINT_FUNC
- .skip 31, 0x90
- ret
- .endr
-
-#define HYPERCALL(n) \
- .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
- .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
-#include <asm/xen-hypercalls.h>
-#undef HYPERCALL
-SYM_CODE_END(hypercall_page)
-.popsection
-
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6")
ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0")
diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h
index cf907e5bf2f2..a8a041609c5d 100644
--- a/arch/xtensa/include/asm/cacheflush.h
+++ b/arch/xtensa/include/asm/cacheflush.h
@@ -120,7 +120,8 @@ void flush_cache_page(struct vm_area_struct*,
#define flush_cache_vunmap(start,end) flush_cache_all()
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
-extern void flush_dcache_page(struct page*);
+void flush_dcache_page(struct page *);
+void flush_dcache_folio(struct folio *);
void local_flush_cache_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
@@ -137,7 +138,9 @@ void local_flush_cache_page(struct vm_area_struct *vma,
#define flush_cache_vunmap(start,end) do { } while (0)
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
#define flush_dcache_page(page) do { } while (0)
+static inline void flush_dcache_folio(struct folio *folio) { }
#define flush_icache_range local_flush_icache_range
#define flush_cache_page(vma, addr, pfn) do { } while (0)
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 7f63aca6a0d3..ad15fbc57283 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -215,7 +215,7 @@ struct mm_struct;
/* Free all resources held by a thread. */
#define release_thread(thread) do { } while(0)
-extern unsigned long get_wchan(struct task_struct *p);
+extern unsigned long __get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc)
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->areg[1])
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 060165340612..47f933fed870 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -298,15 +298,12 @@ int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn,
* These bracket the sleeping functions..
*/
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
{
unsigned long sp, pc;
unsigned long stack_page = (unsigned long) task_stack_page(p);
int count = 0;
- if (!p || p == current || task_is_running(p))
- return 0;
-
sp = p->thread.sp;
pc = MAKE_PC_FROM_RA(p->thread.ra, p->thread.sp);
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 3cdfa00738e0..07b642c1916a 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -100,7 +100,7 @@ static void simdisk_transfer(struct simdisk *dev, unsigned long sector,
spin_unlock(&dev->lock);
}
-static blk_qc_t simdisk_submit_bio(struct bio *bio)
+static void simdisk_submit_bio(struct bio *bio)
{
struct simdisk *dev = bio->bi_bdev->bd_disk->private_data;
struct bio_vec bvec;
@@ -118,7 +118,6 @@ static blk_qc_t simdisk_submit_bio(struct bio *bio)
}
bio_endio(bio);
- return BLK_QC_T_NONE;
}
static int simdisk_open(struct block_device *bdev, fmode_t mode)
@@ -259,6 +258,7 @@ static int __init simdisk_setup(struct simdisk *dev, int which,
struct proc_dir_entry *procdir)
{
char tmp[2] = { '0' + which, 0 };
+ int err = -ENOMEM;
dev->fd = -1;
dev->filename = NULL;
@@ -267,7 +267,7 @@ static int __init simdisk_setup(struct simdisk *dev, int which,
dev->gd = blk_alloc_disk(NUMA_NO_NODE);
if (!dev->gd)
- return -ENOMEM;
+ goto out;
dev->gd->major = simdisk_major;
dev->gd->first_minor = which;
dev->gd->minors = SIMDISK_MINORS;
@@ -275,10 +275,18 @@ static int __init simdisk_setup(struct simdisk *dev, int which,
dev->gd->private_data = dev;
snprintf(dev->gd->disk_name, 32, "simdisk%d", which);
set_capacity(dev->gd, 0);
- add_disk(dev->gd);
+ err = add_disk(dev->gd);
+ if (err)
+ goto out_cleanup_disk;
dev->procfile = proc_create_data(tmp, 0644, procdir, &simdisk_proc_ops, dev);
+
return 0;
+
+out_cleanup_disk:
+ blk_cleanup_disk(dev->gd);
+out:
+ return err;
}
static int __init simdisk_init(void)