summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/pgtable_types.h8
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c43
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/topology.c6
-rw-r--r--arch/x86/kernel/fpu/signal.c20
-rw-r--r--arch/x86/kernel/fpu/xstate.h27
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S7
-rw-r--r--arch/x86/mm/ident_map.c6
-rw-r--r--arch/x86/mm/pti.c2
-rw-r--r--drivers/base/cacheinfo.c14
11 files changed, 81 insertions, 58 deletions
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 6f82e75b6149..4b804531b03c 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -36,10 +36,12 @@
#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4
#ifdef CONFIG_X86_64
-#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit */
+#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit (leaf) */
+#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW5 /* No PTI shadow (root PGD) */
#else
/* Shared with _PAGE_BIT_UFFD_WP which is not supported on 32 bit */
-#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit */
+#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit (leaf) */
+#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW2 /* No PTI shadow (root PGD) */
#endif
/* If _PAGE_BIT_PRESENT is clear, we use these: */
@@ -139,6 +141,8 @@
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
+#define _PAGE_NOPTISHADOW (_AT(pteval_t, 1) << _PAGE_BIT_NOPTISHADOW)
+
/*
* Set of bits not changed in pte_modify. The pte's
* protection key is treated like _PAGE_RW, for
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index d8408aafeed9..79d2e17f6582 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1065,7 +1065,7 @@ static void init_amd(struct cpuinfo_x86 *c)
*/
if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
cpu_has(c, X86_FEATURE_AUTOIBRS))
- WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS));
+ WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS) < 0);
/* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */
clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 392d09c936d6..e6fa03ed9172 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -178,8 +178,6 @@ struct _cpuid4_info_regs {
struct amd_northbridge *nb;
};
-static unsigned short num_cache_leaves;
-
/* AMD doesn't have CPUID4. Emulate it here to report the same
information to the user. This makes some assumptions about the machine:
L2 not shared, no SMT etc. that is currently true on AMD CPUs.
@@ -717,20 +715,23 @@ void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c)
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
{
+ struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
- num_cache_leaves = find_num_cache_leaves(c);
+ ci->num_leaves = find_num_cache_leaves(c);
} else if (c->extended_cpuid_level >= 0x80000006) {
if (cpuid_edx(0x80000006) & 0xf000)
- num_cache_leaves = 4;
+ ci->num_leaves = 4;
else
- num_cache_leaves = 3;
+ ci->num_leaves = 3;
}
}
void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
{
- num_cache_leaves = find_num_cache_leaves(c);
+ struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
+
+ ci->num_leaves = find_num_cache_leaves(c);
}
void init_intel_cacheinfo(struct cpuinfo_x86 *c)
@@ -740,21 +741,21 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
+ struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
if (c->cpuid_level > 3) {
- static int is_initialized;
-
- if (is_initialized == 0) {
- /* Init num_cache_leaves from boot CPU */
- num_cache_leaves = find_num_cache_leaves(c);
- is_initialized++;
- }
+ /*
+ * There should be at least one leaf. A non-zero value means
+ * that the number of leaves has been initialized.
+ */
+ if (!ci->num_leaves)
+ ci->num_leaves = find_num_cache_leaves(c);
/*
* Whenever possible use cpuid(4), deterministic cache
* parameters cpuid leaf to find the cache details
*/
- for (i = 0; i < num_cache_leaves; i++) {
+ for (i = 0; i < ci->num_leaves; i++) {
struct _cpuid4_info_regs this_leaf = {};
int retval;
@@ -790,14 +791,14 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
* Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
* trace cache
*/
- if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
+ if ((!ci->num_leaves || c->x86 == 15) && c->cpuid_level > 1) {
/* supports eax=2 call */
int j, n;
unsigned int regs[4];
unsigned char *dp = (unsigned char *)regs;
int only_trace = 0;
- if (num_cache_leaves != 0 && c->x86 == 15)
+ if (ci->num_leaves && c->x86 == 15)
only_trace = 1;
/* Number of times to iterate */
@@ -991,14 +992,12 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
int init_cache_level(unsigned int cpu)
{
- struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+ struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
- if (!num_cache_leaves)
+ /* There should be at least one leaf. */
+ if (!ci->num_leaves)
return -ENOENT;
- if (!this_cpu_ci)
- return -EINVAL;
- this_cpu_ci->num_levels = 3;
- this_cpu_ci->num_leaves = num_cache_leaves;
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d1de300af173..8ded9f859a3a 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -555,7 +555,9 @@ static void init_intel(struct cpuinfo_x86 *c)
c->x86_vfm == INTEL_WESTMERE_EX))
set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
- if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT)
+ if (boot_cpu_has(X86_FEATURE_MWAIT) &&
+ (c->x86_vfm == INTEL_ATOM_GOLDMONT ||
+ c->x86_vfm == INTEL_LUNARLAKE_M))
set_cpu_bug(c, X86_BUG_MONITOR);
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 621a151ccf7d..b2e313ea17bf 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -428,8 +428,8 @@ void __init topology_apply_cmdline_limits_early(void)
{
unsigned int possible = nr_cpu_ids;
- /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */
- if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled)
+ /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' */
+ if (!setup_max_cpus || apic_is_disabled)
possible = 1;
/* 'possible_cpus=N' */
@@ -443,7 +443,7 @@ void __init topology_apply_cmdline_limits_early(void)
static __init bool restrict_to_up(void)
{
- if (!smp_found_config || ioapic_is_disabled)
+ if (!smp_found_config)
return true;
/*
* XEN PV is special as it does not advertise the local APIC
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 1065ab995305..8f62e0666dea 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -64,16 +64,6 @@ setfx:
}
/*
- * Update the value of PKRU register that was already pushed onto the signal frame.
- */
-static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u32 pkru)
-{
- if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE)))
- return 0;
- return __put_user(pkru, (unsigned int __user *)get_xsave_addr_user(buf, XFEATURE_PKRU));
-}
-
-/*
* Signal frame handlers.
*/
static inline bool save_fsave_header(struct task_struct *tsk, void __user *buf)
@@ -168,14 +158,8 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf, u32 pkru)
{
- int err = 0;
-
- if (use_xsave()) {
- err = xsave_to_user_sigframe(buf);
- if (!err)
- err = update_pkru_in_sigframe(buf, pkru);
- return err;
- }
+ if (use_xsave())
+ return xsave_to_user_sigframe(buf, pkru);
if (use_fxsr())
return fxsave_to_user_sigframe((struct fxregs_state __user *) buf);
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 0b86a5002c84..aa16f1a1bbcf 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -69,6 +69,28 @@ static inline u64 xfeatures_mask_independent(void)
return fpu_kernel_cfg.independent_features;
}
+/*
+ * Update the value of PKRU register that was already pushed onto the signal frame.
+ */
+static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u64 mask, u32 pkru)
+{
+ u64 xstate_bv;
+ int err;
+
+ if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE)))
+ return 0;
+
+ /* Mark PKRU as in-use so that it is restored correctly. */
+ xstate_bv = (mask & xfeatures_in_use()) | XFEATURE_MASK_PKRU;
+
+ err = __put_user(xstate_bv, &buf->header.xfeatures);
+ if (err)
+ return err;
+
+ /* Update PKRU value in the userspace xsave buffer. */
+ return __put_user(pkru, (unsigned int __user *)get_xsave_addr_user(buf, XFEATURE_PKRU));
+}
+
/* XSAVE/XRSTOR wrapper functions */
#ifdef CONFIG_X86_64
@@ -256,7 +278,7 @@ static inline u64 xfeatures_need_sigframe_write(void)
* The caller has to zero buf::header before calling this because XSAVE*
* does not touch the reserved fields in the header.
*/
-static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
+static inline int xsave_to_user_sigframe(struct xregs_state __user *buf, u32 pkru)
{
/*
* Include the features which are not xsaved/rstored by the kernel
@@ -281,6 +303,9 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
XSTATE_OP(XSAVE, buf, lmask, hmask, err);
clac();
+ if (!err)
+ err = update_pkru_in_sigframe(buf, mask, pkru);
+
return err;
}
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index e9e88c342f75..1236f25fc8d1 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -242,6 +242,13 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
movq CR0(%r8), %r8
movq %rax, %cr3
movq %r8, %cr0
+
+#ifdef CONFIG_KEXEC_JUMP
+ /* Saved in save_processor_state. */
+ movq $saved_context, %rax
+ lgdt saved_context_gdt_desc(%rax)
+#endif
+
movq %rbp, %rax
popf
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 437e96fb4977..5ab7bd2f1983 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -174,7 +174,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page,
if (result)
return result;
- set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag));
+ set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag | _PAGE_NOPTISHADOW));
}
return 0;
@@ -218,14 +218,14 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
if (result)
return result;
if (pgtable_l5_enabled()) {
- set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
+ set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag | _PAGE_NOPTISHADOW));
} else {
/*
* With p4d folded, pgd is equal to p4d.
* The pgd entry has to point to the pud page table in this case.
*/
pud_t *pud = pud_offset(p4d, 0);
- set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag));
+ set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag | _PAGE_NOPTISHADOW));
}
}
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 851ec8f1363a..5f0d579932c6 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -132,7 +132,7 @@ pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
* Top-level entries added to init_mm's usermode pgd after boot
* will not be automatically propagated to other mms.
*/
- if (!pgdp_maps_userspace(pgdp))
+ if (!pgdp_maps_userspace(pgdp) || (pgd.pgd & _PAGE_NOPTISHADOW))
return pgd;
/*
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index 609935ad5091..cf0d455209d7 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -58,7 +58,7 @@ bool last_level_cache_is_valid(unsigned int cpu)
{
struct cacheinfo *llc;
- if (!cache_leaves(cpu))
+ if (!cache_leaves(cpu) || !per_cpu_cacheinfo(cpu))
return false;
llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);
@@ -458,11 +458,9 @@ int __weak populate_cache_leaves(unsigned int cpu)
return -ENOENT;
}
-static inline
-int allocate_cache_info(int cpu)
+static inline int allocate_cache_info(int cpu)
{
- per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu),
- sizeof(struct cacheinfo), GFP_ATOMIC);
+ per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu), sizeof(struct cacheinfo), GFP_ATOMIC);
if (!per_cpu_cacheinfo(cpu)) {
cache_leaves(cpu) = 0;
return -ENOMEM;
@@ -534,7 +532,11 @@ static inline int init_level_allocate_ci(unsigned int cpu)
*/
ci_cacheinfo(cpu)->early_ci_levels = false;
- if (cache_leaves(cpu) <= early_leaves)
+ /*
+ * Some architectures (e.g., x86) do not use early initialization.
+ * Allocate memory now in such case.
+ */
+ if (cache_leaves(cpu) <= early_leaves && per_cpu_cacheinfo(cpu))
return 0;
kfree(per_cpu_cacheinfo(cpu));