From fb3bd914b3ec28f5fb697ac55c4846ac2d542855 Mon Sep 17 00:00:00 2001
From: "Borislav Petkov (AMD)" <bp@alien8.de>
Date: Wed, 28 Jun 2023 11:02:39 +0200
Subject: x86/srso: Add a Speculative RAS Overflow mitigation

Add a mitigation for the speculative return address stack overflow
vulnerability found on AMD processors.

The mitigation works by ensuring all RET instructions speculate to
a controlled location, similar to how speculation is controlled in the
retpoline sequence.  To accomplish this, the __x86_return_thunk forces
the CPU to mispredict every function return using a 'safe return'
sequence.

To ensure the safety of this mitigation, the kernel must ensure that the
safe return sequence is itself free from attacker interference.  In Zen3
and Zen4, this is accomplished by creating a BTB alias between the
untraining function srso_untrain_ret_alias() and the safe return
function srso_safe_ret_alias() which results in evicting a potentially
poisoned BTB entry and using that safe one for all function returns.

In older Zen1 and Zen2, this is accomplished using a reinterpretation
technique similar to Retbleed one: srso_untrain_ret() and
srso_safe_ret().

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
---
 arch/x86/include/asm/processor.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86/include/asm/processor.h')

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index d46300e94f85..7c67db7c9f53 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -682,9 +682,11 @@ extern u16 get_llc_id(unsigned int cpu);
 #ifdef CONFIG_CPU_SUP_AMD
 extern u32 amd_get_nodes_per_socket(void);
 extern u32 amd_get_highest_perf(void);
+extern bool cpu_has_ibpb_brtype_microcode(void);
 #else
 static inline u32 amd_get_nodes_per_socket(void)	{ return 0; }
 static inline u32 amd_get_highest_perf(void)		{ return 0; }
+static inline bool cpu_has_ibpb_brtype_microcode(void)	{ return false; }
 #endif
 
 extern unsigned long arch_align_stack(unsigned long sp);
-- 
cgit v1.2.3-70-g09d2


From 77245f1c3c6495521f6a3af082696ee2f8ce3921 Mon Sep 17 00:00:00 2001
From: "Borislav Petkov (AMD)" <bp@alien8.de>
Date: Sat, 5 Aug 2023 00:06:43 +0200
Subject: x86/CPU/AMD: Do not leak quotient data after a division by 0

Under certain circumstances, an integer division by 0 which faults, can
leave stale quotient data from a previous division operation on Zen1
microarchitectures.

Do a dummy division 0/1 before returning from the #DE exception handler
in order to avoid any leaks of potentially sensitive data.

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: <stable@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/include/asm/processor.h   |  2 ++
 arch/x86/kernel/cpu/amd.c          | 19 +++++++++++++++++++
 arch/x86/kernel/traps.c            |  2 ++
 4 files changed, 24 insertions(+)

(limited to 'arch/x86/include/asm/processor.h')

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 094f88fee536..b69b0d7756aa 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -495,4 +495,5 @@
 
 /* BUG word 2 */
 #define X86_BUG_SRSO			X86_BUG(1*32 + 0) /* AMD SRSO bug */
+#define X86_BUG_DIV0			X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 7c67db7c9f53..973db0406528 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -683,10 +683,12 @@ extern u16 get_llc_id(unsigned int cpu);
 extern u32 amd_get_nodes_per_socket(void);
 extern u32 amd_get_highest_perf(void);
 extern bool cpu_has_ibpb_brtype_microcode(void);
+extern void amd_clear_divider(void);
 #else
 static inline u32 amd_get_nodes_per_socket(void)	{ return 0; }
 static inline u32 amd_get_highest_perf(void)		{ return 0; }
 static inline bool cpu_has_ibpb_brtype_microcode(void)	{ return false; }
+static inline void amd_clear_divider(void)		{ }
 #endif
 
 extern unsigned long arch_align_stack(unsigned long sp);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 1e1e253038ce..b55d8f82b621 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -75,6 +75,10 @@ static const int amd_zenbleed[] =
 			   AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf),
 			   AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf));
 
+static const int amd_div0[] =
+	AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf),
+			   AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf));
+
 static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
 {
 	int osvw_id = *erratum++;
@@ -1130,6 +1134,11 @@ static void init_amd(struct cpuinfo_x86 *c)
 		WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS));
 
 	zenbleed_check(c);
+
+	if (cpu_has_amd_erratum(c, amd_div0)) {
+		pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
+		setup_force_cpu_bug(X86_BUG_DIV0);
+	}
 }
 
 #ifdef CONFIG_X86_32
@@ -1309,3 +1318,13 @@ bool cpu_has_ibpb_brtype_microcode(void)
 		return false;
 	}
 }
+
+/*
+ * Issue a DIV 0/1 insn to clear any division data from previous DIV
+ * operations.
+ */
+void noinstr amd_clear_divider(void)
+{
+	asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0)
+		     :: "a" (0), "d" (0), "r" (1));
+}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 4a817d20ce3b..1885326a8f65 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -206,6 +206,8 @@ DEFINE_IDTENTRY(exc_divide_error)
 {
 	do_error_trap(regs, 0, "divide error", X86_TRAP_DE, SIGFPE,
 		      FPE_INTDIV, error_get_trap_addr(regs));
+
+	amd_clear_divider();
 }
 
 DEFINE_IDTENTRY(exc_overflow)
-- 
cgit v1.2.3-70-g09d2


From eb3515dc99c7c85f4170b50838136b2a193f8012 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 9 Aug 2023 15:05:00 +0200
Subject: x86: Move gds_ucode_mitigated() declaration to header

The declaration got placed in the .c file of the caller, but that
causes a warning for the definition:

arch/x86/kernel/cpu/bugs.c:682:6: error: no previous prototype for 'gds_ucode_mitigated' [-Werror=missing-prototypes]

Move it to a header where both sides can observe it instead.

Fixes: 81ac7e5d74174 ("KVM: Add GDS_NO support to KVM")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Tested-by: Daniel Sneddon <daniel.sneddon@linux.intel.com>
Cc: stable@kernel.org
Link: https://lore.kernel.org/all/20230809130530.1913368-2-arnd%40kernel.org
---
 arch/x86/include/asm/processor.h | 2 ++
 arch/x86/kvm/x86.c               | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/include/asm/processor.h')

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 973db0406528..4ae2773b873d 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -731,4 +731,6 @@ bool arch_is_platform_page(u64 paddr);
 #define arch_is_platform_page arch_is_platform_page
 #endif
 
+extern bool gds_ucode_mitigated(void);
+
 #endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 19d9ff92d699..c381770bcbf1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -314,8 +314,6 @@ u64 __read_mostly host_xcr0;
 
 static struct kmem_cache *x86_emulator_cache;
 
-extern bool gds_ucode_mitigated(void);
-
 /*
  * When called, it means the previous get/set msr reached an invalid msr.
  * Return true if we want to ignore/silent this failed msr access.
-- 
cgit v1.2.3-70-g09d2


From c8afaa1b0f8bc93d013ab2ea6b9649958af3f1d3 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Sat, 12 Aug 2023 18:15:54 +0200
Subject: locking: remove spin_lock_prefetch

The only remaining consumer is new_inode, where it showed up in 2001 as
commit c37fa164f793 ("v2.4.9.9 -> v2.4.9.10") in a historical repo [1]
with a changelog which does not mention it.

Since then the line got only touched up to keep compiling.

While it may have been of benefit back in the day, it is guaranteed to
at best not get in the way in the multicore setting -- as the code
performs *a lot* of work between the prefetch and actual lock acquire,
any contention means the cacheline is already invalid by the time the
routine calls spin_lock().  It adds spurious traffic, for short.

On top of it prefetch is notoriously tricky to use for single-threaded
purposes, making it questionable from the get go.

As such, remove it.

I admit upfront I did not see value in benchmarking this change, but I
can do it if that is deemed appropriate.

Removal from new_inode and of the entire thing are in the same patch as
requested by Linus, so whatever weird looks can be directed at that guy.

Link: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git/commit/fs/inode.c?id=c37fa164f793735b32aa3f53154ff1a7659e6442 [1]
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/processor.h                          | 13 -------------
 arch/arm64/include/asm/processor.h                          |  8 --------
 arch/ia64/include/asm/processor.h                           |  3 ---
 .../include/asm/mach-cavium-octeon/cpu-feature-overrides.h  |  2 --
 arch/powerpc/include/asm/processor.h                        |  3 ---
 arch/sparc/include/asm/processor_64.h                       |  3 ---
 arch/x86/include/asm/processor.h                            |  6 ------
 fs/inode.c                                                  |  3 ---
 include/linux/prefetch.h                                    |  7 +------
 9 files changed, 1 insertion(+), 47 deletions(-)

(limited to 'arch/x86/include/asm/processor.h')

diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h
index 714abe494e5f..55bb1c09fd39 100644
--- a/arch/alpha/include/asm/processor.h
+++ b/arch/alpha/include/asm/processor.h
@@ -47,12 +47,6 @@ unsigned long __get_wchan(struct task_struct *p);
 
 #define ARCH_HAS_PREFETCH
 #define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
-
-#ifndef CONFIG_SMP
-/* Nothing to prefetch. */
-#define spin_lock_prefetch(lock)  	do { } while (0)
-#endif
 
 extern inline void prefetch(const void *ptr)  
 { 
@@ -64,11 +58,4 @@ extern inline void prefetchw(const void *ptr)
 	__builtin_prefetch(ptr, 1, 3);
 }
 
-#ifdef CONFIG_SMP
-extern inline void spin_lock_prefetch(const void *ptr)  
-{
-	__builtin_prefetch(ptr, 1, 3);
-}
-#endif
-
 #endif /* __ASM_ALPHA_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 3918f2a67970..e5bc54522e71 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -359,14 +359,6 @@ static inline void prefetchw(const void *ptr)
 	asm volatile("prfm pstl1keep, %a0\n" : : "p" (ptr));
 }
 
-#define ARCH_HAS_SPINLOCK_PREFETCH
-static inline void spin_lock_prefetch(const void *ptr)
-{
-	asm volatile(ARM64_LSE_ATOMIC_INSN(
-		     "prfm pstl1strm, %a0",
-		     "nop") : : "p" (ptr));
-}
-
 extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */
 extern void __init minsigstksz_setup(void);
 
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index d1978e004054..47e3801b526a 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -634,7 +634,6 @@ ia64_imva (void *addr)
 
 #define ARCH_HAS_PREFETCH
 #define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
 #define PREFETCH_STRIDE			L1_CACHE_BYTES
 
 static inline void
@@ -649,8 +648,6 @@ prefetchw (const void *x)
 	ia64_lfetch_excl(ia64_lfhint_none, x);
 }
 
-#define spin_lock_prefetch(x)	prefetchw(x)
-
 extern unsigned long boot_option_idle_override;
 
 enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_FORCE_MWAIT,
diff --git a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
index 9151dcd9d0d5..af9cea21c853 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
@@ -58,8 +58,6 @@
 
 #define cpu_has_rixi		(cpu_data[0].cputype != CPU_CAVIUM_OCTEON)
 
-#define ARCH_HAS_SPINLOCK_PREFETCH 1
-#define spin_lock_prefetch(x) prefetch(x)
 #define PREFETCH_STRIDE 128
 
 #ifdef __OCTEON__
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 8a6754ffdc7e..a6c7069bec5d 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -393,7 +393,6 @@ int validate_sp_size(unsigned long sp, struct task_struct *p,
  */
 #define ARCH_HAS_PREFETCH
 #define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
 
 static inline void prefetch(const void *x)
 {
@@ -411,8 +410,6 @@ static inline void prefetchw(const void *x)
 	__asm__ __volatile__ ("dcbtst 0,%0" : : "r" (x));
 }
 
-#define spin_lock_prefetch(x)	prefetchw(x)
-
 /* asm stubs */
 extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val);
 extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index 2667f35d5ea5..0a0d5c3d184c 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -213,7 +213,6 @@ unsigned long __get_wchan(struct task_struct *task);
  */
 #define ARCH_HAS_PREFETCH
 #define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
 
 static inline void prefetch(const void *x)
 {
@@ -239,8 +238,6 @@ static inline void prefetchw(const void *x)
 			     : "r" (x));
 }
 
-#define spin_lock_prefetch(x)	prefetchw(x)
-
 #define HAVE_ARCH_PICK_MMAP_LAYOUT
 
 int do_mathemu(struct pt_regs *regs, struct fpustate *f, bool illegal_insn_trap);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 4ae2773b873d..fd750247ca89 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -586,7 +586,6 @@ extern char			ignore_fpu_irq;
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT 1
 #define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
 
 #ifdef CONFIG_X86_32
 # define BASE_PREFETCH		""
@@ -620,11 +619,6 @@ static __always_inline void prefetchw(const void *x)
 			  "m" (*(const char *)x));
 }
 
-static inline void spin_lock_prefetch(const void *x)
-{
-	prefetchw(x);
-}
-
 #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
 			   TOP_OF_KERNEL_STACK_PADDING)
 
diff --git a/fs/inode.c b/fs/inode.c
index 8fefb69e1f84..67611a360031 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -16,7 +16,6 @@
 #include <linux/fsnotify.h>
 #include <linux/mount.h>
 #include <linux/posix_acl.h>
-#include <linux/prefetch.h>
 #include <linux/buffer_head.h> /* for inode_has_buffers */
 #include <linux/ratelimit.h>
 #include <linux/list_lru.h>
@@ -1041,8 +1040,6 @@ struct inode *new_inode(struct super_block *sb)
 {
 	struct inode *inode;
 
-	spin_lock_prefetch(&sb->s_inode_list_lock);
-
 	inode = new_inode_pseudo(sb);
 	if (inode)
 		inode_sb_list_add(inode);
diff --git a/include/linux/prefetch.h b/include/linux/prefetch.h
index b83a3f944f28..b068e2e60939 100644
--- a/include/linux/prefetch.h
+++ b/include/linux/prefetch.h
@@ -25,11 +25,10 @@ struct page;
 	prefetch() should be defined by the architecture, if not, the 
 	#define below provides a no-op define.	
 	
-	There are 3 prefetch() macros:
+	There are 2 prefetch() macros:
 	
 	prefetch(x)  	- prefetches the cacheline at "x" for read
 	prefetchw(x)	- prefetches the cacheline at "x" for write
-	spin_lock_prefetch(x) - prefetches the spinlock *x for taking
 	
 	there is also PREFETCH_STRIDE which is the architecure-preferred 
 	"lookahead" size for prefetching streamed operations.
@@ -44,10 +43,6 @@ struct page;
 #define prefetchw(x) __builtin_prefetch(x,1)
 #endif
 
-#ifndef ARCH_HAS_SPINLOCK_PREFETCH
-#define spin_lock_prefetch(x) prefetchw(x)
-#endif
-
 #ifndef PREFETCH_STRIDE
 #define PREFETCH_STRIDE (4*L1_CACHE_BYTES)
 #endif
-- 
cgit v1.2.3-70-g09d2