summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arc/net/bpf_jit.h2
-rw-r--r--arch/arc/net/bpf_jit_arcv2.c10
-rw-r--r--arch/arc/net/bpf_jit_core.c22
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx53-qsb-common.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso6
-rw-r--r--arch/arm/include/asm/efi.h13
-rw-r--r--arch/arm/kernel/ftrace.c17
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi3
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8qm-mek.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts1
-rw-r--r--arch/arm64/include/asm/el2_setup.h6
-rw-r--r--arch/arm64/include/asm/io.h36
-rw-r--r--arch/arm64/include/asm/kvm_arm.h6
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h71
-rw-r--r--arch/arm64/include/asm/kvm_host.h25
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h4
-rw-r--r--arch/arm64/include/asm/kvm_pkvm.h9
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c3
-rw-r--r--arch/arm64/kernel/efi.c2
-rw-r--r--arch/arm64/kvm/arm.c76
-rw-r--r--arch/arm64/kvm/emulate-nested.c21
-rw-r--r--arch/arm64/kvm/fpsimd.c11
-rw-r--r--arch/arm64/kvm/guest.c3
-rw-r--r--arch/arm64/kvm/hyp/aarch32.c18
-rw-r--r--arch/arm64/kvm/hyp/fpsimd.S6
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h36
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/pkvm.h1
-rw-r--r--arch/arm64/kvm/hyp/nvhe/ffa.c12
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c84
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c17
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c25
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c24
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c12
-rw-r--r--arch/arm64/kvm/nested.c6
-rw-r--r--arch/arm64/kvm/reset.c3
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c15
-rw-r--r--arch/arm64/kvm/vgic/vgic.h2
-rw-r--r--arch/arm64/mm/contpte.c4
-rw-r--r--arch/loongarch/Kconfig5
-rw-r--r--arch/loongarch/Kconfig.debug1
-rw-r--r--arch/loongarch/boot/dts/loongson-2k0500-ref.dts4
-rw-r--r--arch/loongarch/boot/dts/loongson-2k1000-ref.dts4
-rw-r--r--arch/loongarch/boot/dts/loongson-2k2000-ref.dts2
-rw-r--r--arch/loongarch/include/asm/hw_breakpoint.h4
-rw-r--r--arch/loongarch/include/asm/numa.h1
-rw-r--r--arch/loongarch/include/asm/stackframe.h2
-rw-r--r--arch/loongarch/kernel/head.S2
-rw-r--r--arch/loongarch/kernel/hw_breakpoint.c96
-rw-r--r--arch/loongarch/kernel/ptrace.c47
-rw-r--r--arch/loongarch/kernel/setup.c6
-rw-r--r--arch/loongarch/kernel/smp.c5
-rw-r--r--arch/loongarch/kernel/vmlinux.lds.S10
-rw-r--r--arch/loongarch/kvm/exit.c2
-rw-r--r--arch/mips/bmips/setup.c3
-rw-r--r--arch/mips/include/asm/mipsmtregs.h2
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl2
-rw-r--r--arch/mips/pci/ops-rc32434.c4
-rw-r--r--arch/parisc/include/asm/cacheflush.h15
-rw-r--r--arch/parisc/include/asm/pgtable.h27
-rw-r--r--arch/parisc/kernel/cache.c413
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/powerpc/crypto/.gitignore2
-rw-r--r--arch/powerpc/include/asm/uaccess.h27
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c18
-rw-r--r--arch/powerpc/net/bpf_jit_comp32.c12
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c12
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c4
-rw-r--r--arch/riscv/Kconfig2
-rw-r--r--arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts1
-rw-r--r--arch/riscv/include/asm/cmpxchg.h22
-rw-r--r--arch/riscv/include/asm/insn.h2
-rw-r--r--arch/riscv/kernel/cpu_ops_sbi.c2
-rw-r--r--arch/riscv/kernel/cpu_ops_spinwait.c3
-rw-r--r--arch/riscv/kernel/ftrace.c7
-rw-r--r--arch/riscv/kernel/patch.c26
-rw-r--r--arch/riscv/kvm/aia_device.c7
-rw-r--r--arch/riscv/kvm/vcpu_onereg.c4
-rw-r--r--arch/riscv/mm/fault.c4
-rw-r--r--arch/riscv/mm/init.c21
-rw-r--r--arch/s390/boot/startup.c27
-rw-r--r--arch/s390/boot/vmem.c12
-rw-r--r--arch/s390/boot/vmlinux.lds.S1
-rw-r--r--arch/s390/configs/debug_defconfig43
-rw-r--r--arch/s390/configs/defconfig40
-rw-r--r--arch/s390/configs/zfcpdump_defconfig5
-rw-r--r--arch/s390/kernel/crash_dump.c54
-rw-r--r--arch/x86/boot/compressed/Makefile4
-rw-r--r--arch/x86/events/intel/cstate.c1
-rw-r--r--arch/x86/events/intel/uncore.c1
-rw-r--r--arch/x86/events/rapl.c1
-rw-r--r--arch/x86/include/asm/efi.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/uaccess.h4
-rw-r--r--arch/x86/include/asm/vmxfeatures.h2
-rw-r--r--arch/x86/kernel/amd_nb.c9
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c1
-rw-r--r--arch/x86/kernel/cpu/common.c7
-rw-r--r--arch/x86/kernel/cpu/cpu.h2
-rw-r--r--arch/x86/kernel/cpu/intel.c25
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c3
-rw-r--r--arch/x86/kernel/cpu/topology_amd.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c11
-rw-r--r--arch/x86/kvm/Kconfig11
-rw-r--r--arch/x86/kvm/lapic.c39
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu/mmu.c46
-rw-r--r--arch/x86/kvm/mmu/spte.h9
-rw-r--r--arch/x86/kvm/mmu/tdp_iter.h2
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c2
-rw-r--r--arch/x86/kvm/svm/sev.c19
-rw-r--r--arch/x86/kvm/svm/svm.c69
-rw-r--r--arch/x86/kvm/svm/svm.h4
-rw-r--r--arch/x86/kvm/vmx/nested.c5
-rw-r--r--arch/x86/kvm/vmx/vmx.c11
-rw-r--r--arch/x86/kvm/x86.c20
-rw-r--r--arch/x86/lib/getuser.S6
-rw-r--r--arch/x86/mm/numa.c6
-rw-r--r--arch/x86/platform/efi/memmap.c12
121 files changed, 1384 insertions, 595 deletions
diff --git a/arch/arc/net/bpf_jit.h b/arch/arc/net/bpf_jit.h
index ec44873c42d1..495f3023e4c1 100644
--- a/arch/arc/net/bpf_jit.h
+++ b/arch/arc/net/bpf_jit.h
@@ -39,7 +39,7 @@
/************** Functions that the back-end must provide **************/
/* Extension for 32-bit operations. */
-inline u8 zext(u8 *buf, u8 rd);
+u8 zext(u8 *buf, u8 rd);
/***** Moves *****/
u8 mov_r32(u8 *buf, u8 rd, u8 rs, u8 sign_ext);
u8 mov_r32_i32(u8 *buf, u8 reg, s32 imm);
diff --git a/arch/arc/net/bpf_jit_arcv2.c b/arch/arc/net/bpf_jit_arcv2.c
index 31bfb6e9ce00..4458e409ca0a 100644
--- a/arch/arc/net/bpf_jit_arcv2.c
+++ b/arch/arc/net/bpf_jit_arcv2.c
@@ -62,7 +62,7 @@ enum {
* If/when we decide to add ARCv2 instructions that do use register pairs,
* the mapping, hopefully, doesn't need to be revisited.
*/
-const u8 bpf2arc[][2] = {
+static const u8 bpf2arc[][2] = {
/* Return value from in-kernel function, and exit value from eBPF */
[BPF_REG_0] = {ARC_R_8, ARC_R_9},
/* Arguments from eBPF program to in-kernel function */
@@ -1302,7 +1302,7 @@ static u8 arc_b(u8 *buf, s32 offset)
/************* Packers (Deal with BPF_REGs) **************/
-inline u8 zext(u8 *buf, u8 rd)
+u8 zext(u8 *buf, u8 rd)
{
if (rd != BPF_REG_FP)
return arc_movi_r(buf, REG_HI(rd), 0);
@@ -2235,6 +2235,7 @@ u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext)
break;
default:
/* The caller must have handled this. */
+ break;
}
} else {
/*
@@ -2253,6 +2254,7 @@ u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext)
break;
default:
/* The caller must have handled this. */
+ break;
}
}
@@ -2517,7 +2519,7 @@ u8 arc_epilogue(u8 *buf, u32 usage, u16 frame_size)
#define JCC64_NR_OF_JMPS 3 /* Number of jumps in jcc64 template. */
#define JCC64_INSNS_TO_END 3 /* Number of insn. inclusive the 2nd jmp to end. */
#define JCC64_SKIP_JMP 1 /* Index of the "skip" jump to "end". */
-const struct {
+static const struct {
/*
* "jit_off" is common between all "jmp[]" and is coupled with
* "cond" of each "jmp[]" instance. e.g.:
@@ -2883,7 +2885,7 @@ u8 gen_jmp_64(u8 *buf, u8 rd, u8 rs, u8 cond, u32 curr_off, u32 targ_off)
* The "ARC_CC_SET" becomes "CC_unequal" because of the "tst"
* instruction that precedes the conditional branch.
*/
-const u8 arcv2_32_jmps[ARC_CC_LAST] = {
+static const u8 arcv2_32_jmps[ARC_CC_LAST] = {
[ARC_CC_UGT] = CC_great_u,
[ARC_CC_UGE] = CC_great_eq_u,
[ARC_CC_ULT] = CC_less_u,
diff --git a/arch/arc/net/bpf_jit_core.c b/arch/arc/net/bpf_jit_core.c
index 6f6b4ffccf2c..e3628922c24a 100644
--- a/arch/arc/net/bpf_jit_core.c
+++ b/arch/arc/net/bpf_jit_core.c
@@ -159,7 +159,7 @@ static void jit_dump(const struct jit_context *ctx)
/* Initialise the context so there's no garbage. */
static int jit_ctx_init(struct jit_context *ctx, struct bpf_prog *prog)
{
- memset(ctx, 0, sizeof(ctx));
+ memset(ctx, 0, sizeof(*ctx));
ctx->orig_prog = prog;
@@ -167,7 +167,7 @@ static int jit_ctx_init(struct jit_context *ctx, struct bpf_prog *prog)
ctx->prog = bpf_jit_blind_constants(prog);
if (IS_ERR(ctx->prog))
return PTR_ERR(ctx->prog);
- ctx->blinded = (ctx->prog == ctx->orig_prog ? false : true);
+ ctx->blinded = (ctx->prog != ctx->orig_prog);
/* If the verifier doesn't zero-extend, then we have to do it. */
ctx->do_zext = !ctx->prog->aux->verifier_zext;
@@ -1182,12 +1182,12 @@ static int jit_prepare(struct jit_context *ctx)
}
/*
- * All the "handle_*()" functions have been called before by the
- * "jit_prepare()". If there was an error, we would know by now.
- * Therefore, no extra error checking at this point, other than
- * a sanity check at the end that expects the calculated length
- * (jit.len) to be equal to the length of generated instructions
- * (jit.index).
+ * jit_compile() is the real compilation phase. jit_prepare() is
+ * invoked before jit_compile() as a dry-run to make sure everything
+ * will go OK and allocate the necessary memory.
+ *
+ * In the end, jit_compile() checks if it has produced the same number
+ * of instructions as jit_prepare() would.
*/
static int jit_compile(struct jit_context *ctx)
{
@@ -1407,9 +1407,9 @@ static struct bpf_prog *do_extra_pass(struct bpf_prog *prog)
/*
* This function may be invoked twice for the same stream of BPF
- * instructions. The "extra pass" happens, when there are "call"s
- * involved that their addresses are not known during the first
- * invocation.
+ * instructions. The "extra pass" happens, when there are
+ * (re)locations involved that their addresses are not known
+ * during the first run.
*/
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
diff --git a/arch/arm/boot/dts/nxp/imx/imx53-qsb-common.dtsi b/arch/arm/boot/dts/nxp/imx/imx53-qsb-common.dtsi
index d80440446473..05d7a462ea25 100644
--- a/arch/arm/boot/dts/nxp/imx/imx53-qsb-common.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx53-qsb-common.dtsi
@@ -85,7 +85,7 @@
};
};
- panel {
+ panel_dpi: panel {
compatible = "sii,43wvf1g";
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_display_power>;
diff --git a/arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso b/arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso
index c84e9b052527..151e9cee3c87 100644
--- a/arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso
+++ b/arch/arm/boot/dts/nxp/imx/imx53-qsb-hdmi.dtso
@@ -10,8 +10,6 @@
/plugin/;
&{/} {
- /delete-node/ panel;
-
hdmi: connector-hdmi {
compatible = "hdmi-connector";
label = "hdmi";
@@ -82,6 +80,10 @@
};
};
+&panel_dpi {
+ status = "disabled";
+};
+
&tve {
status = "disabled";
};
diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h
index 78282ced5038..e408399d5f0e 100644
--- a/arch/arm/include/asm/efi.h
+++ b/arch/arm/include/asm/efi.h
@@ -14,6 +14,7 @@
#include <asm/mach/map.h>
#include <asm/mmu_context.h>
#include <asm/ptrace.h>
+#include <asm/uaccess.h>
#ifdef CONFIG_EFI
void efi_init(void);
@@ -25,6 +26,18 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md, boo
#define arch_efi_call_virt_setup() efi_virtmap_load()
#define arch_efi_call_virt_teardown() efi_virtmap_unload()
+#ifdef CONFIG_CPU_TTBR0_PAN
+#undef arch_efi_call_virt
+#define arch_efi_call_virt(p, f, args...) ({ \
+ unsigned int flags = uaccess_save_and_enable(); \
+ efi_status_t res = _Generic((p)->f(args), \
+ efi_status_t: (p)->f(args), \
+ default: ((p)->f(args), EFI_ABORTED)); \
+ uaccess_restore(flags); \
+ res; \
+})
+#endif
+
#define ARCH_EFI_IRQ_FLAGS_MASK \
(PSR_J_BIT | PSR_E_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | \
PSR_T_BIT | MODE_MASK)
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index a0b6d1e3812f..e61591f33a6c 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -232,11 +232,24 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
unsigned long old;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
+err_out:
return;
if (IS_ENABLED(CONFIG_UNWINDER_FRAME_POINTER)) {
- /* FP points one word below parent's top of stack */
- frame_pointer += 4;
+ /*
+ * Usually, the stack frames are contiguous in memory but cases
+ * have been observed where the next stack frame does not live
+ * at 'frame_pointer + 4' as this code used to assume.
+ *
+ * Instead, dereference the field in the stack frame that
+ * stores the SP of the calling frame: to avoid unbounded
+ * recursion, this cannot involve any ftrace instrumented
+ * functions, so use the __get_kernel_nofault() primitive
+ * directly.
+ */
+ __get_kernel_nofault(&frame_pointer,
+ (unsigned long *)(frame_pointer - 8),
+ unsigned long, err_out);
} else {
struct stackframe frame = {
.fp = frame_pointer,
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
index 4768b05fd765..98544741ce17 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
@@ -6,6 +6,7 @@
#include <dt-bindings/phy/phy-imx8-pcie.h>
#include <dt-bindings/pwm/pwm.h>
#include "imx8mm.dtsi"
+#include "imx8mm-overdrive.dtsi"
/ {
chosen {
@@ -935,7 +936,7 @@
/* Verdin GPIO_9_DSI (pulled-up as active-low) */
pinctrl_gpio_9_dsi: gpio9dsigrp {
fsl,pins =
- <MX8MM_IOMUXC_NAND_RE_B_GPIO3_IO15 0x146>; /* SODIMM 17 */
+ <MX8MM_IOMUXC_NAND_RE_B_GPIO3_IO15 0x1c6>; /* SODIMM 17 */
};
/* Verdin GPIO_10_DSI (pulled-up as active-low) */
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi
index 43f1d45ccc96..f5115f9e8c47 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-dhcom-som.dtsi
@@ -254,7 +254,7 @@
<&clk IMX8MP_CLK_CLKOUT2>,
<&clk IMX8MP_AUDIO_PLL2_OUT>;
assigned-clock-parents = <&clk IMX8MP_AUDIO_PLL2_OUT>;
- assigned-clock-rates = <13000000>, <13000000>, <156000000>;
+ assigned-clock-rates = <13000000>, <13000000>, <208000000>;
reset-gpios = <&gpio4 1 GPIO_ACTIVE_HIGH>;
status = "disabled";
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
index dec57fad6828..e2b5e7ac3e46 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
@@ -219,7 +219,7 @@
bluetooth {
compatible = "brcm,bcm4330-bt";
- shutdown-gpios = <&gpio4 16 GPIO_ACTIVE_HIGH>;
+ shutdown-gpios = <&gpio1 3 GPIO_ACTIVE_HIGH>;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts
index 5c6b39c6933f..6e05361c1ffb 100644
--- a/arch/arm64/boot/dts/freescale/imx8qm-mek.dts
+++ b/arch/arm64/boot/dts/freescale/imx8qm-mek.dts
@@ -36,7 +36,7 @@
regulator-name = "SD1_SPWR";
regulator-min-microvolt = <3000000>;
regulator-max-microvolt = <3000000>;
- gpio = <&lsio_gpio4 19 GPIO_ACTIVE_HIGH>;
+ gpio = <&lsio_gpio4 7 GPIO_ACTIVE_HIGH>;
enable-active-high;
};
diff --git a/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts b/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts
index d400d85f42a9..bd98eff4d685 100644
--- a/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts
+++ b/arch/arm64/boot/dts/freescale/imx93-11x11-evk.dts
@@ -296,7 +296,6 @@
vmmc-supply = <&reg_usdhc2_vmmc>;
bus-width = <4>;
status = "okay";
- no-sdio;
no-mmc;
};
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index e4546b29dd0c..fd87c4b8f984 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -146,7 +146,7 @@
/* Coprocessor traps */
.macro __init_el2_cptr
__check_hvhe .LnVHE_\@, x1
- mov x0, #(CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN)
+ mov x0, #CPACR_ELx_FPEN
msr cpacr_el1, x0
b .Lskip_set_cptr_\@
.LnVHE_\@:
@@ -277,7 +277,7 @@
// (h)VHE case
mrs x0, cpacr_el1 // Disable SVE traps
- orr x0, x0, #(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
+ orr x0, x0, #CPACR_ELx_ZEN
msr cpacr_el1, x0
b .Lskip_set_cptr_\@
@@ -298,7 +298,7 @@
// (h)VHE case
mrs x0, cpacr_el1 // Disable SME traps
- orr x0, x0, #(CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN)
+ orr x0, x0, #CPACR_ELx_SMEN
msr cpacr_el1, x0
b .Lskip_set_cptr_sme_\@
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 4ff0ae3f6d66..41fd90895dfc 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -153,8 +153,9 @@ extern void __memset_io(volatile void __iomem *, int, size_t);
* emit the large TLP from the CPU.
*/
-static inline void __const_memcpy_toio_aligned32(volatile u32 __iomem *to,
- const u32 *from, size_t count)
+static __always_inline void
+__const_memcpy_toio_aligned32(volatile u32 __iomem *to, const u32 *from,
+ size_t count)
{
switch (count) {
case 8:
@@ -196,24 +197,22 @@ static inline void __const_memcpy_toio_aligned32(volatile u32 __iomem *to,
void __iowrite32_copy_full(void __iomem *to, const void *from, size_t count);
-static inline void __const_iowrite32_copy(void __iomem *to, const void *from,
- size_t count)
+static __always_inline void
+__iowrite32_copy(void __iomem *to, const void *from, size_t count)
{
- if (count == 8 || count == 4 || count == 2 || count == 1) {
+ if (__builtin_constant_p(count) &&
+ (count == 8 || count == 4 || count == 2 || count == 1)) {
__const_memcpy_toio_aligned32(to, from, count);
dgh();
} else {
__iowrite32_copy_full(to, from, count);
}
}
+#define __iowrite32_copy __iowrite32_copy
-#define __iowrite32_copy(to, from, count) \
- (__builtin_constant_p(count) ? \
- __const_iowrite32_copy(to, from, count) : \
- __iowrite32_copy_full(to, from, count))
-
-static inline void __const_memcpy_toio_aligned64(volatile u64 __iomem *to,
- const u64 *from, size_t count)
+static __always_inline void
+__const_memcpy_toio_aligned64(volatile u64 __iomem *to, const u64 *from,
+ size_t count)
{
switch (count) {
case 8:
@@ -255,21 +254,18 @@ static inline void __const_memcpy_toio_aligned64(volatile u64 __iomem *to,
void __iowrite64_copy_full(void __iomem *to, const void *from, size_t count);
-static inline void __const_iowrite64_copy(void __iomem *to, const void *from,
- size_t count)
+static __always_inline void
+__iowrite64_copy(void __iomem *to, const void *from, size_t count)
{
- if (count == 8 || count == 4 || count == 2 || count == 1) {
+ if (__builtin_constant_p(count) &&
+ (count == 8 || count == 4 || count == 2 || count == 1)) {
__const_memcpy_toio_aligned64(to, from, count);
dgh();
} else {
__iowrite64_copy_full(to, from, count);
}
}
-
-#define __iowrite64_copy(to, from, count) \
- (__builtin_constant_p(count) ? \
- __const_iowrite64_copy(to, from, count) : \
- __iowrite64_copy_full(to, from, count))
+#define __iowrite64_copy __iowrite64_copy
/*
* I/O memory mapping functions.
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index e01bb5ca13b7..b2adc2c6c82a 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -305,6 +305,12 @@
GENMASK(19, 14) | \
BIT(11))
+#define CPTR_VHE_EL2_RES0 (GENMASK(63, 32) | \
+ GENMASK(27, 26) | \
+ GENMASK(23, 22) | \
+ GENMASK(19, 18) | \
+ GENMASK(15, 0))
+
/* Hyp Debug Configuration Register bits */
#define MDCR_EL2_E2TB_MASK (UL(0x3))
#define MDCR_EL2_E2TB_SHIFT (UL(24))
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 501e3e019c93..21650e7924d4 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -557,6 +557,68 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
vcpu_set_flag((v), e); \
} while (0)
+#define __build_check_all_or_none(r, bits) \
+ BUILD_BUG_ON(((r) & (bits)) && ((r) & (bits)) != (bits))
+
+#define __cpacr_to_cptr_clr(clr, set) \
+ ({ \
+ u64 cptr = 0; \
+ \
+ if ((set) & CPACR_ELx_FPEN) \
+ cptr |= CPTR_EL2_TFP; \
+ if ((set) & CPACR_ELx_ZEN) \
+ cptr |= CPTR_EL2_TZ; \
+ if ((set) & CPACR_ELx_SMEN) \
+ cptr |= CPTR_EL2_TSM; \
+ if ((clr) & CPACR_ELx_TTA) \
+ cptr |= CPTR_EL2_TTA; \
+ if ((clr) & CPTR_EL2_TAM) \
+ cptr |= CPTR_EL2_TAM; \
+ if ((clr) & CPTR_EL2_TCPAC) \
+ cptr |= CPTR_EL2_TCPAC; \
+ \
+ cptr; \
+ })
+
+#define __cpacr_to_cptr_set(clr, set) \
+ ({ \
+ u64 cptr = 0; \
+ \
+ if ((clr) & CPACR_ELx_FPEN) \
+ cptr |= CPTR_EL2_TFP; \
+ if ((clr) & CPACR_ELx_ZEN) \
+ cptr |= CPTR_EL2_TZ; \
+ if ((clr) & CPACR_ELx_SMEN) \
+ cptr |= CPTR_EL2_TSM; \
+ if ((set) & CPACR_ELx_TTA) \
+ cptr |= CPTR_EL2_TTA; \
+ if ((set) & CPTR_EL2_TAM) \
+ cptr |= CPTR_EL2_TAM; \
+ if ((set) & CPTR_EL2_TCPAC) \
+ cptr |= CPTR_EL2_TCPAC; \
+ \
+ cptr; \
+ })
+
+#define cpacr_clear_set(clr, set) \
+ do { \
+ BUILD_BUG_ON((set) & CPTR_VHE_EL2_RES0); \
+ BUILD_BUG_ON((clr) & CPACR_ELx_E0POE); \
+ __build_check_all_or_none((clr), CPACR_ELx_FPEN); \
+ __build_check_all_or_none((set), CPACR_ELx_FPEN); \
+ __build_check_all_or_none((clr), CPACR_ELx_ZEN); \
+ __build_check_all_or_none((set), CPACR_ELx_ZEN); \
+ __build_check_all_or_none((clr), CPACR_ELx_SMEN); \
+ __build_check_all_or_none((set), CPACR_ELx_SMEN); \
+ \
+ if (has_vhe() || has_hvhe()) \
+ sysreg_clear_set(cpacr_el1, clr, set); \
+ else \
+ sysreg_clear_set(cptr_el2, \
+ __cpacr_to_cptr_clr(clr, set), \
+ __cpacr_to_cptr_set(clr, set));\
+ } while (0)
+
static __always_inline void kvm_write_cptr_el2(u64 val)
{
if (has_vhe() || has_hvhe())
@@ -570,17 +632,16 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
u64 val;
if (has_vhe()) {
- val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
- CPACR_EL1_ZEN_EL1EN);
+ val = (CPACR_ELx_FPEN | CPACR_EL1_ZEN_EL1EN);
if (cpus_have_final_cap(ARM64_SME))
val |= CPACR_EL1_SMEN_EL1EN;
} else if (has_hvhe()) {
- val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
+ val = CPACR_ELx_FPEN;
if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
- val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN;
+ val |= CPACR_ELx_ZEN;
if (cpus_have_final_cap(ARM64_SME))
- val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN;
+ val |= CPACR_ELx_SMEN;
} else {
val = CPTR_NVHE_EL2_RES1;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 8170c04fde91..36b8e97bf49e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -76,6 +76,7 @@ static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; };
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
extern unsigned int __ro_after_init kvm_sve_max_vl;
+extern unsigned int __ro_after_init kvm_host_sve_max_vl;
int __init kvm_arm_init_sve(void);
u32 __attribute_const__ kvm_target_cpu(void);
@@ -521,6 +522,20 @@ struct kvm_cpu_context {
u64 *vncr_array;
};
+struct cpu_sve_state {
+ __u64 zcr_el1;
+
+ /*
+ * Ordering is important since __sve_save_state/__sve_restore_state
+ * relies on it.
+ */
+ __u32 fpsr;
+ __u32 fpcr;
+
+ /* Must be SVE_VQ_BYTES (128 bit) aligned. */
+ __u8 sve_regs[];
+};
+
/*
* This structure is instantiated on a per-CPU basis, and contains
* data that is:
@@ -534,7 +549,15 @@ struct kvm_cpu_context {
*/
struct kvm_host_data {
struct kvm_cpu_context host_ctxt;
- struct user_fpsimd_state *fpsimd_state; /* hyp VA */
+
+ /*
+ * All pointers in this union are hyp VA.
+ * sve_state is only used in pKVM and if system_supports_sve().
+ */
+ union {
+ struct user_fpsimd_state *fpsimd_state;
+ struct cpu_sve_state *sve_state;
+ };
/* Ownership of the FP regs */
enum {
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 3e80464f8953..b05bceca3385 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -111,7 +111,8 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
-void __sve_restore_state(void *sve_pffr, u32 *fpsr);
+void __sve_save_state(void *sve_pffr, u32 *fpsr, int save_ffr);
+void __sve_restore_state(void *sve_pffr, u32 *fpsr, int restore_ffr);
u64 __guest_enter(struct kvm_vcpu *vcpu);
@@ -142,5 +143,6 @@ extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val);
extern unsigned long kvm_nvhe_sym(__icache_flags);
extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
+extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl);
#endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index ad9cfb5c1ff4..cd56acd9a842 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -128,4 +128,13 @@ static inline unsigned long hyp_ffa_proxy_pages(void)
return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
}
+static inline size_t pkvm_host_sve_state_size(void)
+{
+ if (!system_supports_sve())
+ return 0;
+
+ return size_add(sizeof(struct cpu_sve_state),
+ SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
+}
+
#endif /* __ARM64_KVM_PKVM_H__ */
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index dd6ce86d4332..b776e7424fe9 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -462,6 +462,9 @@ static int run_all_insn_set_hw_mode(unsigned int cpu)
for (int i = 0; i < ARRAY_SIZE(insn_emulations); i++) {
struct insn_emulation *insn = insn_emulations[i];
bool enable = READ_ONCE(insn->current_mode) == INSN_HW;
+ if (insn->status == INSN_UNAVAILABLE)
+ continue;
+
if (insn->set_hw_mode && insn->set_hw_mode(enable)) {
pr_warn("CPU[%u] cannot support the emulation of %s",
cpu, insn->name);
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 4a92096db34e..712718aed5dd 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -9,6 +9,7 @@
#include <linux/efi.h>
#include <linux/init.h>
+#include <linux/kmemleak.h>
#include <linux/screen_info.h>
#include <linux/vmalloc.h>
@@ -213,6 +214,7 @@ l: if (!p) {
return -ENOMEM;
}
+ kmemleak_not_leak(p);
efi_rt_stack_top = p + THREAD_SIZE;
return 0;
}
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 9996a989b52e..59716789fe0f 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1931,6 +1931,11 @@ static unsigned long nvhe_percpu_order(void)
return size ? get_order(size) : 0;
}
+static size_t pkvm_host_sve_state_order(void)
+{
+ return get_order(pkvm_host_sve_state_size());
+}
+
/* A lookup table holding the hypervisor VA for each vector slot */
static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS];
@@ -2310,12 +2315,20 @@ static void __init teardown_subsystems(void)
static void __init teardown_hyp_mode(void)
{
+ bool free_sve = system_supports_sve() && is_protected_kvm_enabled();
int cpu;
free_hyp_pgds();
for_each_possible_cpu(cpu) {
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
+
+ if (free_sve) {
+ struct cpu_sve_state *sve_state;
+
+ sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
+ free_pages((unsigned long) sve_state, pkvm_host_sve_state_order());
+ }
}
}
@@ -2398,6 +2411,58 @@ static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
return 0;
}
+static int init_pkvm_host_sve_state(void)
+{
+ int cpu;
+
+ if (!system_supports_sve())
+ return 0;
+
+ /* Allocate pages for host sve state in protected mode. */
+ for_each_possible_cpu(cpu) {
+ struct page *page = alloc_pages(GFP_KERNEL, pkvm_host_sve_state_order());
+
+ if (!page)
+ return -ENOMEM;
+
+ per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state = page_address(page);
+ }
+
+ /*
+ * Don't map the pages in hyp since these are only used in protected
+ * mode, which will (re)create its own mapping when initialized.
+ */
+
+ return 0;
+}
+
+/*
+ * Finalizes the initialization of hyp mode, once everything else is initialized
+ * and the initialziation process cannot fail.
+ */
+static void finalize_init_hyp_mode(void)
+{
+ int cpu;
+
+ if (system_supports_sve() && is_protected_kvm_enabled()) {
+ for_each_possible_cpu(cpu) {
+ struct cpu_sve_state *sve_state;
+
+ sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
+ per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state =
+ kern_hyp_va(sve_state);
+ }
+ } else {
+ for_each_possible_cpu(cpu) {
+ struct user_fpsimd_state *fpsimd_state;
+
+ fpsimd_state = &per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->host_ctxt.fp_regs;
+ per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->fpsimd_state =
+ kern_hyp_va(fpsimd_state);
+ }
+ }
+}
+
static void pkvm_hyp_init_ptrauth(void)
{
struct kvm_cpu_context *hyp_ctxt;
@@ -2566,6 +2631,10 @@ static int __init init_hyp_mode(void)
goto out_err;
}
+ err = init_pkvm_host_sve_state();
+ if (err)
+ goto out_err;
+
err = kvm_hyp_init_protection(hyp_va_bits);
if (err) {
kvm_err("Failed to init hyp memory protection\n");
@@ -2730,6 +2799,13 @@ static __init int kvm_arm_init(void)
if (err)
goto out_subs;
+ /*
+ * This should be called after initialization is done and failure isn't
+ * possible anymore.
+ */
+ if (!in_hyp_mode)
+ finalize_init_hyp_mode();
+
kvm_arm_initialised = true;
return 0;
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 72d733c74a38..54090967a335 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -2181,16 +2181,23 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
if (forward_traps(vcpu, HCR_NV))
return;
+ spsr = vcpu_read_sys_reg(vcpu, SPSR_EL2);
+ spsr = kvm_check_illegal_exception_return(vcpu, spsr);
+
/* Check for an ERETAx */
esr = kvm_vcpu_get_esr(vcpu);
if (esr_iss_is_eretax(esr) && !kvm_auth_eretax(vcpu, &elr)) {
/*
- * Oh no, ERETAx failed to authenticate. If we have
- * FPACCOMBINE, deliver an exception right away. If we
- * don't, then let the mangled ELR value trickle down the
+ * Oh no, ERETAx failed to authenticate.
+ *
+ * If we have FPACCOMBINE and we don't have a pending
+ * Illegal Execution State exception (which has priority
+ * over FPAC), deliver an exception right away.
+ *
+ * Otherwise, let the mangled ELR value trickle down the
* ERET handling, and the guest will have a little surprise.
*/
- if (kvm_has_pauth(vcpu->kvm, FPACCOMBINE)) {
+ if (kvm_has_pauth(vcpu->kvm, FPACCOMBINE) && !(spsr & PSR_IL_BIT)) {
esr &= ESR_ELx_ERET_ISS_ERETA;
esr |= FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_FPAC);
kvm_inject_nested_sync(vcpu, esr);
@@ -2201,17 +2208,11 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
preempt_disable();
kvm_arch_vcpu_put(vcpu);
- spsr = __vcpu_sys_reg(vcpu, SPSR_EL2);
- spsr = kvm_check_illegal_exception_return(vcpu, spsr);
if (!esr_iss_is_eretax(esr))
elr = __vcpu_sys_reg(vcpu, ELR_EL2);
trace_kvm_nested_eret(vcpu, elr, spsr);
- /*
- * Note that the current exception level is always the virtual EL2,
- * since we set HCR_EL2.NV bit only when entering the virtual EL2.
- */
*vcpu_pc(vcpu) = elr;
*vcpu_cpsr(vcpu) = spsr;
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 1807d3a79a8a..521b32868d0d 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -90,6 +90,13 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
fpsimd_save_and_flush_cpu_state();
}
}
+
+ /*
+ * If normal guests gain SME support, maintain this behavior for pKVM
+ * guests, which don't support SME.
+ */
+ WARN_ON(is_protected_kvm_enabled() && system_supports_sme() &&
+ read_sysreg_s(SYS_SVCR));
}
/*
@@ -161,9 +168,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
if (has_vhe() && system_supports_sme()) {
/* Also restore EL0 state seen on entry */
if (vcpu_get_flag(vcpu, HOST_SME_ENABLED))
- sysreg_clear_set(CPACR_EL1, 0,
- CPACR_EL1_SMEN_EL0EN |
- CPACR_EL1_SMEN_EL1EN);
+ sysreg_clear_set(CPACR_EL1, 0, CPACR_ELx_SMEN);
else
sysreg_clear_set(CPACR_EL1,
CPACR_EL1_SMEN_EL0EN,
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index e2f762d959bb..11098eb7eb44 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -251,6 +251,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
case PSR_AA32_MODE_SVC:
case PSR_AA32_MODE_ABT:
case PSR_AA32_MODE_UND:
+ case PSR_AA32_MODE_SYS:
if (!vcpu_el1_is_32bit(vcpu))
return -EINVAL;
break;
@@ -276,7 +277,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) {
int i, nr_reg;
- switch (*vcpu_cpsr(vcpu)) {
+ switch (*vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK) {
/*
* Either we are dealing with user mode, and only the
* first 15 registers (+ PC) must be narrowed to 32bit.
diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c
index 8d9670e6615d..449fa58cf3b6 100644
--- a/arch/arm64/kvm/hyp/aarch32.c
+++ b/arch/arm64/kvm/hyp/aarch32.c
@@ -50,9 +50,23 @@ bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
u32 cpsr_cond;
int cond;
- /* Top two bits non-zero? Unconditional. */
- if (kvm_vcpu_get_esr(vcpu) >> 30)
+ /*
+ * These are the exception classes that could fire with a
+ * conditional instruction.
+ */
+ switch (kvm_vcpu_trap_get_class(vcpu)) {
+ case ESR_ELx_EC_CP15_32:
+ case ESR_ELx_EC_CP15_64:
+ case ESR_ELx_EC_CP14_MR:
+ case ESR_ELx_EC_CP14_LS:
+ case ESR_ELx_EC_FP_ASIMD:
+ case ESR_ELx_EC_CP10_ID:
+ case ESR_ELx_EC_CP14_64:
+ case ESR_ELx_EC_SVC32:
+ break;
+ default:
return true;
+ }
/* Is condition field valid? */
cond = kvm_vcpu_get_condition(vcpu);
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
index 61e6f3ba7b7d..e950875e31ce 100644
--- a/arch/arm64/kvm/hyp/fpsimd.S
+++ b/arch/arm64/kvm/hyp/fpsimd.S
@@ -25,3 +25,9 @@ SYM_FUNC_START(__sve_restore_state)
sve_load 0, x1, x2, 3
ret
SYM_FUNC_END(__sve_restore_state)
+
+SYM_FUNC_START(__sve_save_state)
+ mov x2, #1
+ sve_save 0, x1, x2, 3
+ ret
+SYM_FUNC_END(__sve_save_state)
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index a92566f36022..0c4de44534b7 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -316,10 +316,24 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
{
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
__sve_restore_state(vcpu_sve_pffr(vcpu),
- &vcpu->arch.ctxt.fp_regs.fpsr);
+ &vcpu->arch.ctxt.fp_regs.fpsr,
+ true);
write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
}
+static inline void __hyp_sve_save_host(void)
+{
+ struct cpu_sve_state *sve_state = *host_data_ptr(sve_state);
+
+ sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
+ write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+ __sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
+ &sve_state->fpsr,
+ true);
+}
+
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
+
/*
* We trap the first access to the FP/SIMD to save the host context and
* restore the guest context lazily.
@@ -330,7 +344,6 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
{
bool sve_guest;
u8 esr_ec;
- u64 reg;
if (!system_supports_fpsimd())
return false;
@@ -353,24 +366,15 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
/* Valid trap. Switch the context: */
/* First disable enough traps to allow us to update the registers */
- if (has_vhe() || has_hvhe()) {
- reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
- if (sve_guest)
- reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
-
- sysreg_clear_set(cpacr_el1, 0, reg);
- } else {
- reg = CPTR_EL2_TFP;
- if (sve_guest)
- reg |= CPTR_EL2_TZ;
-
- sysreg_clear_set(cptr_el2, reg, 0);
- }
+ if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve()))
+ cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN);
+ else
+ cpacr_clear_set(0, CPACR_ELx_FPEN);
isb();
/* Write out the host state if it's in the registers */
if (host_owns_fp_regs())
- __fpsimd_save_state(*host_data_ptr(fpsimd_state));
+ kvm_hyp_save_fpsimd_host(vcpu);
/* Restore the guest state */
if (sve_guest)
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 22f374e9f532..24a9a8330d19 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -59,7 +59,6 @@ static inline bool pkvm_hyp_vcpu_is_protected(struct pkvm_hyp_vcpu *hyp_vcpu)
}
void pkvm_hyp_vm_table_init(void *tbl);
-void pkvm_host_fpsimd_state_init(void);
int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
unsigned long pgd_hva);
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index 02746f9d0980..efb053af331c 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -177,6 +177,14 @@ static void ffa_retrieve_req(struct arm_smccc_res *res, u32 len)
res);
}
+static void ffa_rx_release(struct arm_smccc_res *res)
+{
+ arm_smccc_1_1_smc(FFA_RX_RELEASE,
+ 0, 0,
+ 0, 0, 0, 0, 0,
+ res);
+}
+
static void do_ffa_rxtx_map(struct arm_smccc_res *res,
struct kvm_cpu_context *ctxt)
{
@@ -543,16 +551,19 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
if (WARN_ON(offset > len ||
fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE)) {
ret = FFA_RET_ABORTED;
+ ffa_rx_release(res);
goto out_unlock;
}
if (len > ffa_desc_buf.len) {
ret = FFA_RET_NO_MEMORY;
+ ffa_rx_release(res);
goto out_unlock;
}
buf = ffa_desc_buf.buf;
memcpy(buf, hyp_buffers.rx, fraglen);
+ ffa_rx_release(res);
for (fragoff = fraglen; fragoff < len; fragoff += fraglen) {
ffa_mem_frag_rx(res, handle_lo, handle_hi, fragoff);
@@ -563,6 +574,7 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
fraglen = res->a3;
memcpy((void *)buf + fragoff, hyp_buffers.rx, fraglen);
+ ffa_rx_release(res);
}
ffa_mem_reclaim(res, handle_lo, handle_hi, flags);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index d5c48dc98f67..f43d845f3c4e 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -23,20 +23,80 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
+static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
+{
+ __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
+ /*
+ * On saving/restoring guest sve state, always use the maximum VL for
+ * the guest. The layout of the data when saving the sve state depends
+ * on the VL, so use a consistent (i.e., the maximum) guest VL.
+ */
+ sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
+ __sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, true);
+ write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+}
+
+static void __hyp_sve_restore_host(void)
+{
+ struct cpu_sve_state *sve_state = *host_data_ptr(sve_state);
+
+ /*
+ * On saving/restoring host sve state, always use the maximum VL for
+ * the host. The layout of the data when saving the sve state depends
+ * on the VL, so use a consistent (i.e., the maximum) host VL.
+ *
+ * Setting ZCR_EL2 to ZCR_ELx_LEN_MASK sets the effective length
+ * supported by the system (or limited at EL3).
+ */
+ write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+ __sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
+ &sve_state->fpsr,
+ true);
+ write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR);
+}
+
+static void fpsimd_sve_flush(void)
+{
+ *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+}
+
+static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
+{
+ if (!guest_owns_fp_regs())
+ return;
+
+ cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN);
+ isb();
+
+ if (vcpu_has_sve(vcpu))
+ __hyp_sve_save_guest(vcpu);
+ else
+ __fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
+
+ if (system_supports_sve())
+ __hyp_sve_restore_host();
+ else
+ __fpsimd_restore_state(*host_data_ptr(fpsimd_state));
+
+ *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+}
+
static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
{
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
+ fpsimd_sve_flush();
+
hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt;
hyp_vcpu->vcpu.arch.sve_state = kern_hyp_va(host_vcpu->arch.sve_state);
- hyp_vcpu->vcpu.arch.sve_max_vl = host_vcpu->arch.sve_max_vl;
+ /* Limit guest vector length to the maximum supported by the host. */
+ hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl);
hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu;
hyp_vcpu->vcpu.arch.hcr_el2 = host_vcpu->arch.hcr_el2;
hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2;
- hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2;
hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags;
@@ -54,10 +114,11 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3;
unsigned int i;
+ fpsimd_sve_sync(&hyp_vcpu->vcpu);
+
host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt;
host_vcpu->arch.hcr_el2 = hyp_vcpu->vcpu.arch.hcr_el2;
- host_vcpu->arch.cptr_el2 = hyp_vcpu->vcpu.arch.cptr_el2;
host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault;
@@ -79,6 +140,17 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
struct pkvm_hyp_vcpu *hyp_vcpu;
struct kvm *host_kvm;
+ /*
+ * KVM (and pKVM) doesn't support SME guests for now, and
+ * ensures that SME features aren't enabled in pstate when
+ * loading a vcpu. Therefore, if SME features enabled the host
+ * is misbehaving.
+ */
+ if (unlikely(system_supports_sme() && read_sysreg_s(SYS_SVCR))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
host_kvm = kern_hyp_va(host_vcpu->kvm);
hyp_vcpu = pkvm_load_hyp_vcpu(host_kvm->arch.pkvm.handle,
host_vcpu->vcpu_idx);
@@ -405,11 +477,7 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
handle_host_smc(host_ctxt);
break;
case ESR_ELx_EC_SVE:
- if (has_hvhe())
- sysreg_clear_set(cpacr_el1, 0, (CPACR_EL1_ZEN_EL1EN |
- CPACR_EL1_ZEN_EL0EN));
- else
- sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0);
+ cpacr_clear_set(0, CPACR_ELx_ZEN);
isb();
sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
break;
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 16aa4875ddb8..95cf18574251 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -18,6 +18,8 @@ unsigned long __icache_flags;
/* Used by kvm_get_vttbr(). */
unsigned int kvm_arm_vmid_bits;
+unsigned int kvm_host_sve_max_vl;
+
/*
* Set trap register values based on features in ID_AA64PFR0.
*/
@@ -63,7 +65,7 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
/* Trap SVE */
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) {
if (has_hvhe())
- cptr_clear |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
+ cptr_clear |= CPACR_ELx_ZEN;
else
cptr_set |= CPTR_EL2_TZ;
}
@@ -247,17 +249,6 @@ void pkvm_hyp_vm_table_init(void *tbl)
vm_table = tbl;
}
-void pkvm_host_fpsimd_state_init(void)
-{
- unsigned long i;
-
- for (i = 0; i < hyp_nr_cpus; i++) {
- struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i);
-
- host_data->fpsimd_state = &host_data->host_ctxt.fp_regs;
- }
-}
-
/*
* Return the hyp vm structure corresponding to the handle.
*/
@@ -586,6 +577,8 @@ unlock:
if (ret)
unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
+ hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
+
return ret;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 859f22f754d3..f4350ba07b0b 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -67,6 +67,28 @@ static int divide_memory_pool(void *virt, unsigned long size)
return 0;
}
+static int pkvm_create_host_sve_mappings(void)
+{
+ void *start, *end;
+ int ret, i;
+
+ if (!system_supports_sve())
+ return 0;
+
+ for (i = 0; i < hyp_nr_cpus; i++) {
+ struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i);
+ struct cpu_sve_state *sve_state = host_data->sve_state;
+
+ start = kern_hyp_va(sve_state);
+ end = start + PAGE_ALIGN(pkvm_host_sve_state_size());
+ ret = pkvm_create_mappings(start, end, PAGE_HYP);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
unsigned long *per_cpu_base,
u32 hyp_va_bits)
@@ -125,6 +147,8 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
return ret;
}
+ pkvm_create_host_sve_mappings();
+
/*
* Map the host sections RO in the hypervisor, but transfer the
* ownership from the host to the hypervisor itself to make sure they
@@ -300,7 +324,6 @@ void __noreturn __pkvm_init_finalise(void)
goto out;
pkvm_hyp_vm_table_init(vm_table_base);
- pkvm_host_fpsimd_state_init();
out:
/*
* We tail-called to here from handle___pkvm_init() and will not return,
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 6758cd905570..6af179c6356d 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -48,15 +48,14 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val |= has_hvhe() ? CPACR_EL1_TTA : CPTR_EL2_TTA;
if (cpus_have_final_cap(ARM64_SME)) {
if (has_hvhe())
- val &= ~(CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN);
+ val &= ~CPACR_ELx_SMEN;
else
val |= CPTR_EL2_TSM;
}
if (!guest_owns_fp_regs()) {
if (has_hvhe())
- val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
- CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN);
+ val &= ~(CPACR_ELx_FPEN | CPACR_ELx_ZEN);
else
val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
@@ -182,6 +181,25 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
kvm_handle_pvm_sysreg(vcpu, exit_code));
}
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Non-protected kvm relies on the host restoring its sve state.
+ * Protected kvm restores the host's sve state as not to reveal that
+ * fpsimd was used by a guest nor leak upper sve bits.
+ */
+ if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) {
+ __hyp_sve_save_host();
+
+ /* Re-enable SVE traps if not supported for the guest vcpu. */
+ if (!vcpu_has_sve(vcpu))
+ cpacr_clear_set(CPACR_ELx_ZEN, 0);
+
+ } else {
+ __fpsimd_save_state(*host_data_ptr(fpsimd_state));
+ }
+}
+
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index d7af5f46f22a..8fbb6a2e0559 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -93,8 +93,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val = read_sysreg(cpacr_el1);
val |= CPACR_ELx_TTA;
- val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
- CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN);
+ val &= ~(CPACR_ELx_ZEN | CPACR_ELx_SMEN);
/*
* With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
@@ -109,9 +108,9 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
if (guest_owns_fp_regs()) {
if (vcpu_has_sve(vcpu))
- val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
+ val |= CPACR_ELx_ZEN;
} else {
- val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
+ val &= ~CPACR_ELx_FPEN;
__activate_traps_fpsimd32(vcpu);
}
@@ -262,6 +261,11 @@ static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
return true;
}
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
+{
+ __fpsimd_save_state(*host_data_ptr(fpsimd_state));
+}
+
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 6813c7c7f00a..bae8536cbf00 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -58,8 +58,10 @@ static u64 limit_nv_id_reg(u32 id, u64 val)
break;
case SYS_ID_AA64PFR1_EL1:
- /* Only support SSBS */
- val &= NV_FTR(PFR1, SSBS);
+ /* Only support BTI, SSBS, CSV2_frac */
+ val &= (NV_FTR(PFR1, BT) |
+ NV_FTR(PFR1, SSBS) |
+ NV_FTR(PFR1, CSV2_frac));
break;
case SYS_ID_AA64MMFR0_EL1:
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 1b7b58cb121f..3fc8ca164dbe 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -32,6 +32,7 @@
/* Maximum phys_shift supported for any VM on this host */
static u32 __ro_after_init kvm_ipa_limit;
+unsigned int __ro_after_init kvm_host_sve_max_vl;
/*
* ARMv8 Reset Values
@@ -51,6 +52,8 @@ int __init kvm_arm_init_sve(void)
{
if (system_supports_sve()) {
kvm_sve_max_vl = sve_max_virtualisable_vl();
+ kvm_host_sve_max_vl = sve_max_vl();
+ kvm_nvhe_sym(kvm_host_sve_max_vl) = kvm_host_sve_max_vl;
/*
* The get_sve_reg()/set_sve_reg() ioctl interface will need
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 8f5b7a3e7009..7f68cf58b978 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -391,7 +391,7 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list)
- vgic_v3_free_redist_region(rdreg);
+ vgic_v3_free_redist_region(kvm, rdreg);
INIT_LIST_HEAD(&dist->rd_regions);
} else {
dist->vgic_cpu_base = VGIC_ADDR_UNDEF;
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index a3983a631b5a..9e50928f5d7d 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -919,8 +919,19 @@ free:
return ret;
}
-void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg)
+void vgic_v3_free_redist_region(struct kvm *kvm, struct vgic_redist_region *rdreg)
{
+ struct kvm_vcpu *vcpu;
+ unsigned long c;
+
+ lockdep_assert_held(&kvm->arch.config_lock);
+
+ /* Garbage collect the region */
+ kvm_for_each_vcpu(c, vcpu, kvm) {
+ if (vcpu->arch.vgic_cpu.rdreg == rdreg)
+ vcpu->arch.vgic_cpu.rdreg = NULL;
+ }
+
list_del(&rdreg->list);
kfree(rdreg);
}
@@ -945,7 +956,7 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
mutex_lock(&kvm->arch.config_lock);
rdreg = vgic_v3_rdist_region_from_index(kvm, index);
- vgic_v3_free_redist_region(rdreg);
+ vgic_v3_free_redist_region(kvm, rdreg);
mutex_unlock(&kvm->arch.config_lock);
return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 6106ebd5ba42..03d356a12377 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -316,7 +316,7 @@ vgic_v3_rd_region_size(struct kvm *kvm, struct vgic_redist_region *rdreg)
struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm,
u32 index);
-void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg);
+void vgic_v3_free_redist_region(struct kvm *kvm, struct vgic_redist_region *rdreg);
bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size);
diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c
index 9f9486de0004..a3edced29ac1 100644
--- a/arch/arm64/mm/contpte.c
+++ b/arch/arm64/mm/contpte.c
@@ -376,7 +376,7 @@ void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
* clearing access/dirty for the whole block.
*/
unsigned long start = addr;
- unsigned long end = start + nr;
+ unsigned long end = start + nr * PAGE_SIZE;
if (pte_cont(__ptep_get(ptep + nr - 1)))
end = ALIGN(end, CONT_PTE_SIZE);
@@ -386,7 +386,7 @@ void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
ptep = contpte_align_down(ptep);
}
- __clear_young_dirty_ptes(vma, start, ptep, end - start, flags);
+ __clear_young_dirty_ptes(vma, start, ptep, (end - start) / PAGE_SIZE, flags);
}
EXPORT_SYMBOL_GPL(contpte_clear_young_dirty_ptes);
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index e38139c576ee..ddc042895d01 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -143,7 +143,7 @@ config LOONGARCH
select HAVE_LIVEPATCH
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
- select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS
+ select HAVE_OBJTOOL if AS_HAS_EXPLICIT_RELOCS && AS_HAS_THIN_ADD_SUB && !CC_IS_CLANG
select HAVE_PCI
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
@@ -261,6 +261,9 @@ config AS_HAS_EXPLICIT_RELOCS
config AS_HAS_FCSR_CLASS
def_bool $(as-instr,movfcsr2gr \$t0$(comma)\$fcsr0)
+config AS_HAS_THIN_ADD_SUB
+ def_bool $(cc-option,-Wa$(comma)-mthin-add-sub)
+
config AS_HAS_LSX_EXTENSION
def_bool $(as-instr,vld \$vr0$(comma)\$a0$(comma)0)
diff --git a/arch/loongarch/Kconfig.debug b/arch/loongarch/Kconfig.debug
index 98d60630c3d4..8b2ce5b5d43e 100644
--- a/arch/loongarch/Kconfig.debug
+++ b/arch/loongarch/Kconfig.debug
@@ -28,6 +28,7 @@ config UNWINDER_PROLOGUE
config UNWINDER_ORC
bool "ORC unwinder"
+ depends on HAVE_OBJTOOL
select OBJTOOL
help
This option enables the ORC (Oops Rewind Capability) unwinder for
diff --git a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts
index 8aefb0c12672..a34734a6c3ce 100644
--- a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts
+++ b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts
@@ -44,14 +44,14 @@
&gmac0 {
status = "okay";
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
bus_id = <0x0>;
};
&gmac1 {
status = "okay";
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
bus_id = <0x1>;
};
diff --git a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
index 8463fe035386..23cf26cc3e5f 100644
--- a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
+++ b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
@@ -43,7 +43,7 @@
&gmac0 {
status = "okay";
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
phy-handle = <&phy0>;
mdio {
compatible = "snps,dwmac-mdio";
@@ -58,7 +58,7 @@
&gmac1 {
status = "okay";
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
phy-handle = <&phy1>;
mdio {
compatible = "snps,dwmac-mdio";
diff --git a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
index 74b99bd234cc..ea9e6985d0e9 100644
--- a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
+++ b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
@@ -92,7 +92,7 @@
&gmac2 {
status = "okay";
- phy-mode = "rgmii";
+ phy-mode = "rgmii-id";
phy-handle = <&phy2>;
mdio {
compatible = "snps,dwmac-mdio";
diff --git a/arch/loongarch/include/asm/hw_breakpoint.h b/arch/loongarch/include/asm/hw_breakpoint.h
index 21447fb1efc7..d78330916bd1 100644
--- a/arch/loongarch/include/asm/hw_breakpoint.h
+++ b/arch/loongarch/include/asm/hw_breakpoint.h
@@ -75,6 +75,8 @@ do { \
#define CSR_MWPC_NUM 0x3f
#define CTRL_PLV_ENABLE 0x1e
+#define CTRL_PLV0_ENABLE 0x02
+#define CTRL_PLV3_ENABLE 0x10
#define MWPnCFG3_LoadEn 8
#define MWPnCFG3_StoreEn 9
@@ -101,7 +103,7 @@ struct perf_event;
struct perf_event_attr;
extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
- int *gen_len, int *gen_type, int *offset);
+ int *gen_len, int *gen_type);
extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
extern int hw_breakpoint_arch_parse(struct perf_event *bp,
const struct perf_event_attr *attr,
diff --git a/arch/loongarch/include/asm/numa.h b/arch/loongarch/include/asm/numa.h
index 27f319b49862..b5f9de9f102e 100644
--- a/arch/loongarch/include/asm/numa.h
+++ b/arch/loongarch/include/asm/numa.h
@@ -56,6 +56,7 @@ extern int early_cpu_to_node(int cpu);
static inline void early_numa_add_cpu(int cpuid, s16 node) { }
static inline void numa_add_cpu(unsigned int cpu) { }
static inline void numa_remove_cpu(unsigned int cpu) { }
+static inline void set_cpuid_to_node(int cpuid, s16 node) { }
static inline int early_cpu_to_node(int cpu)
{
diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h
index 45b507a7b06f..d9eafd3ee3d1 100644
--- a/arch/loongarch/include/asm/stackframe.h
+++ b/arch/loongarch/include/asm/stackframe.h
@@ -42,7 +42,7 @@
.macro JUMP_VIRT_ADDR temp1 temp2
li.d \temp1, CACHE_BASE
pcaddi \temp2, 0
- or \temp1, \temp1, \temp2
+ bstrins.d \temp1, \temp2, (DMW_PABITS - 1), 0
jirl zero, \temp1, 0xc
.endm
diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
index c4f7de2e2805..4677ea8fa8e9 100644
--- a/arch/loongarch/kernel/head.S
+++ b/arch/loongarch/kernel/head.S
@@ -22,7 +22,7 @@
_head:
.word MZ_MAGIC /* "MZ", MS-DOS header */
.org 0x8
- .dword kernel_entry /* Kernel entry point */
+ .dword _kernel_entry /* Kernel entry point (physical address) */
.dword _kernel_asize /* Kernel image effective size */
.quad PHYS_LINK_KADDR /* Kernel image load offset from start of RAM */
.org 0x38 /* 0x20 ~ 0x37 reserved */
diff --git a/arch/loongarch/kernel/hw_breakpoint.c b/arch/loongarch/kernel/hw_breakpoint.c
index fc55c4de2a11..621ad7634df7 100644
--- a/arch/loongarch/kernel/hw_breakpoint.c
+++ b/arch/loongarch/kernel/hw_breakpoint.c
@@ -174,11 +174,21 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
static int hw_breakpoint_control(struct perf_event *bp,
enum hw_breakpoint_ops ops)
{
- u32 ctrl;
+ u32 ctrl, privilege;
int i, max_slots, enable;
+ struct pt_regs *regs;
struct perf_event **slots;
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ if (arch_check_bp_in_kernelspace(info))
+ privilege = CTRL_PLV0_ENABLE;
+ else
+ privilege = CTRL_PLV3_ENABLE;
+
+ /* Whether bp belongs to a task. */
+ if (bp->hw.target)
+ regs = task_pt_regs(bp->hw.target);
+
if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) {
/* Breakpoint */
slots = this_cpu_ptr(bp_on_reg);
@@ -197,31 +207,38 @@ static int hw_breakpoint_control(struct perf_event *bp,
switch (ops) {
case HW_BREAKPOINT_INSTALL:
/* Set the FWPnCFG/MWPnCFG 1~4 register. */
- write_wb_reg(CSR_CFG_ADDR, i, 0, info->address);
- write_wb_reg(CSR_CFG_ADDR, i, 1, info->address);
- write_wb_reg(CSR_CFG_MASK, i, 0, info->mask);
- write_wb_reg(CSR_CFG_MASK, i, 1, info->mask);
- write_wb_reg(CSR_CFG_ASID, i, 0, 0);
- write_wb_reg(CSR_CFG_ASID, i, 1, 0);
if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) {
- write_wb_reg(CSR_CFG_CTRL, i, 0, CTRL_PLV_ENABLE);
+ write_wb_reg(CSR_CFG_ADDR, i, 0, info->address);
+ write_wb_reg(CSR_CFG_MASK, i, 0, info->mask);
+ write_wb_reg(CSR_CFG_ASID, i, 0, 0);
+ write_wb_reg(CSR_CFG_CTRL, i, 0, privilege);
} else {
+ write_wb_reg(CSR_CFG_ADDR, i, 1, info->address);
+ write_wb_reg(CSR_CFG_MASK, i, 1, info->mask);
+ write_wb_reg(CSR_CFG_ASID, i, 1, 0);
ctrl = encode_ctrl_reg(info->ctrl);
- write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE);
+ write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | privilege);
}
enable = csr_read64(LOONGARCH_CSR_CRMD);
csr_write64(CSR_CRMD_WE | enable, LOONGARCH_CSR_CRMD);
+ if (bp->hw.target)
+ regs->csr_prmd |= CSR_PRMD_PWE;
break;
case HW_BREAKPOINT_UNINSTALL:
/* Reset the FWPnCFG/MWPnCFG 1~4 register. */
- write_wb_reg(CSR_CFG_ADDR, i, 0, 0);
- write_wb_reg(CSR_CFG_ADDR, i, 1, 0);
- write_wb_reg(CSR_CFG_MASK, i, 0, 0);
- write_wb_reg(CSR_CFG_MASK, i, 1, 0);
- write_wb_reg(CSR_CFG_CTRL, i, 0, 0);
- write_wb_reg(CSR_CFG_CTRL, i, 1, 0);
- write_wb_reg(CSR_CFG_ASID, i, 0, 0);
- write_wb_reg(CSR_CFG_ASID, i, 1, 0);
+ if (info->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) {
+ write_wb_reg(CSR_CFG_ADDR, i, 0, 0);
+ write_wb_reg(CSR_CFG_MASK, i, 0, 0);
+ write_wb_reg(CSR_CFG_CTRL, i, 0, 0);
+ write_wb_reg(CSR_CFG_ASID, i, 0, 0);
+ } else {
+ write_wb_reg(CSR_CFG_ADDR, i, 1, 0);
+ write_wb_reg(CSR_CFG_MASK, i, 1, 0);
+ write_wb_reg(CSR_CFG_CTRL, i, 1, 0);
+ write_wb_reg(CSR_CFG_ASID, i, 1, 0);
+ }
+ if (bp->hw.target)
+ regs->csr_prmd &= ~CSR_PRMD_PWE;
break;
}
@@ -283,7 +300,7 @@ int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
* to generic breakpoint descriptions.
*/
int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
- int *gen_len, int *gen_type, int *offset)
+ int *gen_len, int *gen_type)
{
/* Type */
switch (ctrl.type) {
@@ -303,11 +320,6 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
return -EINVAL;
}
- if (!ctrl.len)
- return -EINVAL;
-
- *offset = __ffs(ctrl.len);
-
/* Len */
switch (ctrl.len) {
case LOONGARCH_BREAKPOINT_LEN_1:
@@ -386,21 +398,17 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
struct arch_hw_breakpoint *hw)
{
int ret;
- u64 alignment_mask, offset;
+ u64 alignment_mask;
/* Build the arch_hw_breakpoint. */
ret = arch_build_bp_info(bp, attr, hw);
if (ret)
return ret;
- if (hw->ctrl.type != LOONGARCH_BREAKPOINT_EXECUTE)
- alignment_mask = 0x7;
- else
+ if (hw->ctrl.type == LOONGARCH_BREAKPOINT_EXECUTE) {
alignment_mask = 0x3;
- offset = hw->address & alignment_mask;
-
- hw->address &= ~alignment_mask;
- hw->ctrl.len <<= offset;
+ hw->address &= ~alignment_mask;
+ }
return 0;
}
@@ -471,12 +479,15 @@ void breakpoint_handler(struct pt_regs *regs)
slots = this_cpu_ptr(bp_on_reg);
for (i = 0; i < boot_cpu_data.watch_ireg_count; ++i) {
- bp = slots[i];
- if (bp == NULL)
- continue;
- perf_bp_event(bp, regs);
+ if ((csr_read32(LOONGARCH_CSR_FWPS) & (0x1 << i))) {
+ bp = slots[i];
+ if (bp == NULL)
+ continue;
+ perf_bp_event(bp, regs);
+ csr_write32(0x1 << i, LOONGARCH_CSR_FWPS);
+ update_bp_registers(regs, 0, 0);
+ }
}
- update_bp_registers(regs, 0, 0);
}
NOKPROBE_SYMBOL(breakpoint_handler);
@@ -488,12 +499,15 @@ void watchpoint_handler(struct pt_regs *regs)
slots = this_cpu_ptr(wp_on_reg);
for (i = 0; i < boot_cpu_data.watch_dreg_count; ++i) {
- wp = slots[i];
- if (wp == NULL)
- continue;
- perf_bp_event(wp, regs);
+ if ((csr_read32(LOONGARCH_CSR_MWPS) & (0x1 << i))) {
+ wp = slots[i];
+ if (wp == NULL)
+ continue;
+ perf_bp_event(wp, regs);
+ csr_write32(0x1 << i, LOONGARCH_CSR_MWPS);
+ update_bp_registers(regs, 0, 1);
+ }
}
- update_bp_registers(regs, 0, 1);
}
NOKPROBE_SYMBOL(watchpoint_handler);
diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c
index c114c5ef1332..200109de1971 100644
--- a/arch/loongarch/kernel/ptrace.c
+++ b/arch/loongarch/kernel/ptrace.c
@@ -494,28 +494,14 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type,
struct arch_hw_breakpoint_ctrl ctrl,
struct perf_event_attr *attr)
{
- int err, len, type, offset;
+ int err, len, type;
- err = arch_bp_generic_fields(ctrl, &len, &type, &offset);
+ err = arch_bp_generic_fields(ctrl, &len, &type);
if (err)
return err;
- switch (note_type) {
- case NT_LOONGARCH_HW_BREAK:
- if ((type & HW_BREAKPOINT_X) != type)
- return -EINVAL;
- break;
- case NT_LOONGARCH_HW_WATCH:
- if ((type & HW_BREAKPOINT_RW) != type)
- return -EINVAL;
- break;
- default:
- return -EINVAL;
- }
-
attr->bp_len = len;
attr->bp_type = type;
- attr->bp_addr += offset;
return 0;
}
@@ -609,10 +595,27 @@ static int ptrace_hbp_set_ctrl(unsigned int note_type,
return PTR_ERR(bp);
attr = bp->attr;
- decode_ctrl_reg(uctrl, &ctrl);
- err = ptrace_hbp_fill_attr_ctrl(note_type, ctrl, &attr);
- if (err)
- return err;
+
+ switch (note_type) {
+ case NT_LOONGARCH_HW_BREAK:
+ ctrl.type = LOONGARCH_BREAKPOINT_EXECUTE;
+ ctrl.len = LOONGARCH_BREAKPOINT_LEN_4;
+ break;
+ case NT_LOONGARCH_HW_WATCH:
+ decode_ctrl_reg(uctrl, &ctrl);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (uctrl & CTRL_PLV_ENABLE) {
+ err = ptrace_hbp_fill_attr_ctrl(note_type, ctrl, &attr);
+ if (err)
+ return err;
+ attr.disabled = 0;
+ } else {
+ attr.disabled = 1;
+ }
return modify_user_hw_breakpoint(bp, &attr);
}
@@ -643,6 +646,10 @@ static int ptrace_hbp_set_addr(unsigned int note_type,
struct perf_event *bp;
struct perf_event_attr attr;
+ /* Kernel-space address cannot be monitored by user-space */
+ if ((unsigned long)addr >= XKPRANGE)
+ return -EINVAL;
+
bp = ptrace_hbp_get_initialised_bp(note_type, tsk, idx);
if (IS_ERR(bp))
return PTR_ERR(bp);
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 60e0fe97f61a..3d048f1be143 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -282,7 +282,7 @@ static void __init fdt_setup(void)
return;
/* Prefer to use built-in dtb, checking its legality first. */
- if (!fdt_check_header(__dtb_start))
+ if (IS_ENABLED(CONFIG_BUILTIN_DTB) && !fdt_check_header(__dtb_start))
fdt_pointer = __dtb_start;
else
fdt_pointer = efi_fdt_pointer(); /* Fallback to firmware dtb */
@@ -351,10 +351,8 @@ void __init platform_init(void)
arch_reserve_vmcore();
arch_reserve_crashkernel();
-#ifdef CONFIG_ACPI_TABLE_UPGRADE
- acpi_table_upgrade();
-#endif
#ifdef CONFIG_ACPI
+ acpi_table_upgrade();
acpi_gbl_use_default_register_widths = false;
acpi_boot_table_init();
#endif
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 0dfe2388ef41..1436d2465939 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -273,7 +273,6 @@ static void __init fdt_smp_setup(void)
if (cpuid == loongson_sysconf.boot_cpu_id) {
cpu = 0;
- numa_add_cpu(cpu);
} else {
cpu = cpumask_next_zero(-1, cpu_present_mask);
}
@@ -283,6 +282,9 @@ static void __init fdt_smp_setup(void)
set_cpu_present(cpu, true);
__cpu_number_map[cpuid] = cpu;
__cpu_logical_map[cpu] = cpuid;
+
+ early_numa_add_cpu(cpu, 0);
+ set_cpuid_to_node(cpuid, 0);
}
loongson_sysconf.nr_cpus = num_processors;
@@ -468,6 +470,7 @@ void smp_prepare_boot_cpu(void)
set_cpu_possible(0, true);
set_cpu_online(0, true);
set_my_cpu_offset(per_cpu_offset(0));
+ numa_add_cpu(0);
rr_node = first_node(node_online_map);
for_each_possible_cpu(cpu) {
diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S
index e8e97dbf9ca4..3c7595342730 100644
--- a/arch/loongarch/kernel/vmlinux.lds.S
+++ b/arch/loongarch/kernel/vmlinux.lds.S
@@ -6,6 +6,7 @@
#define PAGE_SIZE _PAGE_SIZE
#define RO_EXCEPTION_TABLE_ALIGN 4
+#define PHYSADDR_MASK 0xffffffffffff /* 48-bit */
/*
* Put .bss..swapper_pg_dir as the first thing in .bss. This will
@@ -142,10 +143,11 @@ SECTIONS
#ifdef CONFIG_EFI_STUB
/* header symbols */
- _kernel_asize = _end - _text;
- _kernel_fsize = _edata - _text;
- _kernel_vsize = _end - __initdata_begin;
- _kernel_rsize = _edata - __initdata_begin;
+ _kernel_entry = ABSOLUTE(kernel_entry & PHYSADDR_MASK);
+ _kernel_asize = ABSOLUTE(_end - _text);
+ _kernel_fsize = ABSOLUTE(_edata - _text);
+ _kernel_vsize = ABSOLUTE(_end - __initdata_begin);
+ _kernel_rsize = ABSOLUTE(_edata - __initdata_begin);
#endif
.gptab.sdata : {
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index c86e099af5ca..a68573e091c0 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -761,7 +761,7 @@ static void kvm_handle_service(struct kvm_vcpu *vcpu)
default:
ret = KVM_HCALL_INVALID_CODE;
break;
- };
+ }
kvm_write_reg(vcpu, LOONGARCH_GPR_A0, ret);
}
diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c
index ec180ab92eaa..66a8ba19c287 100644
--- a/arch/mips/bmips/setup.c
+++ b/arch/mips/bmips/setup.c
@@ -110,7 +110,8 @@ static void bcm6358_quirks(void)
* RAC flush causes kernel panics on BCM6358 when booting from TP1
* because the bootloader is not initializing it properly.
*/
- bmips_rac_flush_disable = !!(read_c0_brcm_cmt_local() & (1 << 31));
+ bmips_rac_flush_disable = !!(read_c0_brcm_cmt_local() & (1 << 31)) ||
+ !!BMIPS_GET_CBR();
}
static void bcm6368_quirks(void)
diff --git a/arch/mips/include/asm/mipsmtregs.h b/arch/mips/include/asm/mipsmtregs.h
index 30e86861c206..b1ee3c48e84b 100644
--- a/arch/mips/include/asm/mipsmtregs.h
+++ b/arch/mips/include/asm/mipsmtregs.h
@@ -322,7 +322,7 @@ static inline void ehb(void)
" .set push \n" \
" .set "MIPS_ISA_LEVEL" \n" \
_ASM_SET_MFTC0 \
- " mftc0 $1, " #rt ", " #sel " \n" \
+ " mftc0 %0, " #rt ", " #sel " \n" \
_ASM_UNSET_MFTC0 \
" .set pop \n" \
: "=r" (__res)); \
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 008ebe60263e..81428a2eb660 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -27,7 +27,7 @@
17 o32 break sys_ni_syscall
# 18 was sys_stat
18 o32 unused18 sys_ni_syscall
-19 o32 lseek sys_lseek
+19 o32 lseek sys_lseek compat_sys_lseek
20 o32 getpid sys_getpid
21 o32 mount sys_mount
22 o32 umount sys_oldumount
diff --git a/arch/mips/pci/ops-rc32434.c b/arch/mips/pci/ops-rc32434.c
index 874ed6df9768..34b9323bdabb 100644
--- a/arch/mips/pci/ops-rc32434.c
+++ b/arch/mips/pci/ops-rc32434.c
@@ -112,8 +112,8 @@ retry:
* gives them time to settle
*/
if (where == PCI_VENDOR_ID) {
- if (ret == 0xffffffff || ret == 0x00000000 ||
- ret == 0x0000ffff || ret == 0xffff0000) {
+ if (*val == 0xffffffff || *val == 0x00000000 ||
+ *val == 0x0000ffff || *val == 0xffff0000) {
if (delay > 4)
return 0;
delay *= 2;
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index ba4c05bc24d6..8394718870e1 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -31,18 +31,17 @@ void flush_cache_all_local(void);
void flush_cache_all(void);
void flush_cache_mm(struct mm_struct *mm);
-void flush_kernel_dcache_page_addr(const void *addr);
-
#define flush_kernel_dcache_range(start,size) \
flush_kernel_dcache_range_asm((start), (start)+(size));
+/* The only way to flush a vmap range is to flush whole cache */
#define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
void flush_kernel_vmap_range(void *vaddr, int size);
void invalidate_kernel_vmap_range(void *vaddr, int size);
-#define flush_cache_vmap(start, end) flush_cache_all()
+void flush_cache_vmap(unsigned long start, unsigned long end);
#define flush_cache_vmap_early(start, end) do { } while (0)
-#define flush_cache_vunmap(start, end) flush_cache_all()
+void flush_cache_vunmap(unsigned long start, unsigned long end);
void flush_dcache_folio(struct folio *folio);
#define flush_dcache_folio flush_dcache_folio
@@ -77,17 +76,11 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
void flush_cache_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
-/* defined in pacache.S exported in cache.c used by flush_anon_page */
-void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
-
#define ARCH_HAS_FLUSH_ANON_PAGE
void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr);
#define ARCH_HAS_FLUSH_ON_KUNMAP
-static inline void kunmap_flush_on_unmap(const void *addr)
-{
- flush_kernel_dcache_page_addr(addr);
-}
+void kunmap_flush_on_unmap(const void *addr);
#endif /* _PARISC_CACHEFLUSH_H */
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 974accac05cd..babf65751e81 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -448,14 +448,17 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
return pte;
}
+static inline pte_t ptep_get(pte_t *ptep)
+{
+ return READ_ONCE(*ptep);
+}
+#define ptep_get ptep_get
+
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
pte_t pte;
- if (!pte_young(*ptep))
- return 0;
-
- pte = *ptep;
+ pte = ptep_get(ptep);
if (!pte_young(pte)) {
return 0;
}
@@ -463,17 +466,10 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned
return 1;
}
-struct mm_struct;
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
-{
- pte_t old_pte;
-
- old_pte = *ptep;
- set_pte(ptep, __pte(0));
-
- return old_pte;
-}
+int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep);
+pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep);
+struct mm_struct;
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
set_pte(ptep, pte_wrprotect(*ptep));
@@ -511,7 +507,8 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
#define __HAVE_ARCH_PTE_SAME
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 422f3e1e6d9c..483bfafd930c 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -20,6 +20,7 @@
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/syscalls.h>
+#include <linux/vmalloc.h>
#include <asm/pdc.h>
#include <asm/cache.h>
#include <asm/cacheflush.h>
@@ -31,20 +32,31 @@
#include <asm/mmu_context.h>
#include <asm/cachectl.h>
+#define PTR_PAGE_ALIGN_DOWN(addr) PTR_ALIGN_DOWN(addr, PAGE_SIZE)
+
+/*
+ * When nonzero, use _PAGE_ACCESSED bit to try to reduce the number
+ * of page flushes done flush_cache_page_if_present. There are some
+ * pros and cons in using this option. It may increase the risk of
+ * random segmentation faults.
+ */
+#define CONFIG_FLUSH_PAGE_ACCESSED 0
+
int split_tlb __ro_after_init;
int dcache_stride __ro_after_init;
int icache_stride __ro_after_init;
EXPORT_SYMBOL(dcache_stride);
+/* Internal implementation in arch/parisc/kernel/pacache.S */
void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
EXPORT_SYMBOL(flush_dcache_page_asm);
void purge_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr);
-
-/* Internal implementation in arch/parisc/kernel/pacache.S */
void flush_data_cache_local(void *); /* flushes local data-cache only */
void flush_instruction_cache_local(void); /* flushes local code-cache only */
+static void flush_kernel_dcache_page_addr(const void *addr);
+
/* On some machines (i.e., ones with the Merced bus), there can be
* only a single PxTLB broadcast at a time; this must be guaranteed
* by software. We need a spinlock around all TLB flushes to ensure
@@ -321,6 +333,18 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
{
if (!static_branch_likely(&parisc_has_cache))
return;
+
+ /*
+ * The TLB is the engine of coherence on parisc. The CPU is
+ * entitled to speculate any page with a TLB mapping, so here
+ * we kill the mapping then flush the page along a special flush
+ * only alias mapping. This guarantees that the page is no-longer
+ * in the cache for any process and nor may it be speculatively
+ * read in (until the user or kernel specifically accesses it,
+ * of course).
+ */
+ flush_tlb_page(vma, vmaddr);
+
preempt_disable();
flush_dcache_page_asm(physaddr, vmaddr);
if (vma->vm_flags & VM_EXEC)
@@ -328,46 +352,44 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
preempt_enable();
}
-static void flush_user_cache_page(struct vm_area_struct *vma, unsigned long vmaddr)
+static void flush_kernel_dcache_page_addr(const void *addr)
{
- unsigned long flags, space, pgd, prot;
-#ifdef CONFIG_TLB_PTLOCK
- unsigned long pgd_lock;
-#endif
+ unsigned long vaddr = (unsigned long)addr;
+ unsigned long flags;
- vmaddr &= PAGE_MASK;
+ /* Purge TLB entry to remove translation on all CPUs */
+ purge_tlb_start(flags);
+ pdtlb(SR_KERNEL, addr);
+ purge_tlb_end(flags);
+ /* Use tmpalias flush to prevent data cache move-in */
preempt_disable();
+ flush_dcache_page_asm(__pa(vaddr), vaddr);
+ preempt_enable();
+}
- /* Set context for flush */
- local_irq_save(flags);
- prot = mfctl(8);
- space = mfsp(SR_USER);
- pgd = mfctl(25);
-#ifdef CONFIG_TLB_PTLOCK
- pgd_lock = mfctl(28);
-#endif
- switch_mm_irqs_off(NULL, vma->vm_mm, NULL);
- local_irq_restore(flags);
-
- flush_user_dcache_range_asm(vmaddr, vmaddr + PAGE_SIZE);
- if (vma->vm_flags & VM_EXEC)
- flush_user_icache_range_asm(vmaddr, vmaddr + PAGE_SIZE);
- flush_tlb_page(vma, vmaddr);
+static void flush_kernel_icache_page_addr(const void *addr)
+{
+ unsigned long vaddr = (unsigned long)addr;
+ unsigned long flags;
- /* Restore previous context */
- local_irq_save(flags);
-#ifdef CONFIG_TLB_PTLOCK
- mtctl(pgd_lock, 28);
-#endif
- mtctl(pgd, 25);
- mtsp(space, SR_USER);
- mtctl(prot, 8);
- local_irq_restore(flags);
+ /* Purge TLB entry to remove translation on all CPUs */
+ purge_tlb_start(flags);
+ pdtlb(SR_KERNEL, addr);
+ purge_tlb_end(flags);
+ /* Use tmpalias flush to prevent instruction cache move-in */
+ preempt_disable();
+ flush_icache_page_asm(__pa(vaddr), vaddr);
preempt_enable();
}
+void kunmap_flush_on_unmap(const void *addr)
+{
+ flush_kernel_dcache_page_addr(addr);
+}
+EXPORT_SYMBOL(kunmap_flush_on_unmap);
+
void flush_icache_pages(struct vm_area_struct *vma, struct page *page,
unsigned int nr)
{
@@ -375,13 +397,16 @@ void flush_icache_pages(struct vm_area_struct *vma, struct page *page,
for (;;) {
flush_kernel_dcache_page_addr(kaddr);
- flush_kernel_icache_page(kaddr);
+ flush_kernel_icache_page_addr(kaddr);
if (--nr == 0)
break;
kaddr += PAGE_SIZE;
}
}
+/*
+ * Walk page directory for MM to find PTEP pointer for address ADDR.
+ */
static inline pte_t *get_ptep(struct mm_struct *mm, unsigned long addr)
{
pte_t *ptep = NULL;
@@ -410,6 +435,41 @@ static inline bool pte_needs_flush(pte_t pte)
== (_PAGE_PRESENT | _PAGE_ACCESSED);
}
+/*
+ * Return user physical address. Returns 0 if page is not present.
+ */
+static inline unsigned long get_upa(struct mm_struct *mm, unsigned long addr)
+{
+ unsigned long flags, space, pgd, prot, pa;
+#ifdef CONFIG_TLB_PTLOCK
+ unsigned long pgd_lock;
+#endif
+
+ /* Save context */
+ local_irq_save(flags);
+ prot = mfctl(8);
+ space = mfsp(SR_USER);
+ pgd = mfctl(25);
+#ifdef CONFIG_TLB_PTLOCK
+ pgd_lock = mfctl(28);
+#endif
+
+ /* Set context for lpa_user */
+ switch_mm_irqs_off(NULL, mm, NULL);
+ pa = lpa_user(addr);
+
+ /* Restore previous context */
+#ifdef CONFIG_TLB_PTLOCK
+ mtctl(pgd_lock, 28);
+#endif
+ mtctl(pgd, 25);
+ mtsp(space, SR_USER);
+ mtctl(prot, 8);
+ local_irq_restore(flags);
+
+ return pa;
+}
+
void flush_dcache_folio(struct folio *folio)
{
struct address_space *mapping = folio_flush_mapping(folio);
@@ -458,50 +518,23 @@ void flush_dcache_folio(struct folio *folio)
if (addr + nr * PAGE_SIZE > vma->vm_end)
nr = (vma->vm_end - addr) / PAGE_SIZE;
- if (parisc_requires_coherency()) {
- for (i = 0; i < nr; i++) {
- pte_t *ptep = get_ptep(vma->vm_mm,
- addr + i * PAGE_SIZE);
- if (!ptep)
- continue;
- if (pte_needs_flush(*ptep))
- flush_user_cache_page(vma,
- addr + i * PAGE_SIZE);
- /* Optimise accesses to the same table? */
- pte_unmap(ptep);
- }
- } else {
+ if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1))
+ != (addr & (SHM_COLOUR - 1))) {
+ for (i = 0; i < nr; i++)
+ __flush_cache_page(vma,
+ addr + i * PAGE_SIZE,
+ (pfn + i) * PAGE_SIZE);
/*
- * The TLB is the engine of coherence on parisc:
- * The CPU is entitled to speculate any page
- * with a TLB mapping, so here we kill the
- * mapping then flush the page along a special
- * flush only alias mapping. This guarantees that
- * the page is no-longer in the cache for any
- * process and nor may it be speculatively read
- * in (until the user or kernel specifically
- * accesses it, of course)
+ * Software is allowed to have any number
+ * of private mappings to a page.
*/
- for (i = 0; i < nr; i++)
- flush_tlb_page(vma, addr + i * PAGE_SIZE);
- if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1))
- != (addr & (SHM_COLOUR - 1))) {
- for (i = 0; i < nr; i++)
- __flush_cache_page(vma,
- addr + i * PAGE_SIZE,
- (pfn + i) * PAGE_SIZE);
- /*
- * Software is allowed to have any number
- * of private mappings to a page.
- */
- if (!(vma->vm_flags & VM_SHARED))
- continue;
- if (old_addr)
- pr_err("INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n",
- old_addr, addr, vma->vm_file);
- if (nr == folio_nr_pages(folio))
- old_addr = addr;
- }
+ if (!(vma->vm_flags & VM_SHARED))
+ continue;
+ if (old_addr)
+ pr_err("INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n",
+ old_addr, addr, vma->vm_file);
+ if (nr == folio_nr_pages(folio))
+ old_addr = addr;
}
WARN_ON(++count == 4096);
}
@@ -591,35 +624,28 @@ extern void purge_kernel_dcache_page_asm(unsigned long);
extern void clear_user_page_asm(void *, unsigned long);
extern void copy_user_page_asm(void *, void *, unsigned long);
-void flush_kernel_dcache_page_addr(const void *addr)
-{
- unsigned long flags;
-
- flush_kernel_dcache_page_asm(addr);
- purge_tlb_start(flags);
- pdtlb(SR_KERNEL, addr);
- purge_tlb_end(flags);
-}
-EXPORT_SYMBOL(flush_kernel_dcache_page_addr);
-
static void flush_cache_page_if_present(struct vm_area_struct *vma,
- unsigned long vmaddr, unsigned long pfn)
+ unsigned long vmaddr)
{
+#if CONFIG_FLUSH_PAGE_ACCESSED
bool needs_flush = false;
- pte_t *ptep;
+ pte_t *ptep, pte;
- /*
- * The pte check is racy and sometimes the flush will trigger
- * a non-access TLB miss. Hopefully, the page has already been
- * flushed.
- */
ptep = get_ptep(vma->vm_mm, vmaddr);
if (ptep) {
- needs_flush = pte_needs_flush(*ptep);
+ pte = ptep_get(ptep);
+ needs_flush = pte_needs_flush(pte);
pte_unmap(ptep);
}
if (needs_flush)
- flush_cache_page(vma, vmaddr, pfn);
+ __flush_cache_page(vma, vmaddr, PFN_PHYS(pte_pfn(pte)));
+#else
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long physaddr = get_upa(mm, vmaddr);
+
+ if (physaddr)
+ __flush_cache_page(vma, vmaddr, PAGE_ALIGN_DOWN(physaddr));
+#endif
}
void copy_user_highpage(struct page *to, struct page *from,
@@ -629,7 +655,7 @@ void copy_user_highpage(struct page *to, struct page *from,
kfrom = kmap_local_page(from);
kto = kmap_local_page(to);
- flush_cache_page_if_present(vma, vaddr, page_to_pfn(from));
+ __flush_cache_page(vma, vaddr, PFN_PHYS(page_to_pfn(from)));
copy_page_asm(kto, kfrom);
kunmap_local(kto);
kunmap_local(kfrom);
@@ -638,16 +664,17 @@ void copy_user_highpage(struct page *to, struct page *from,
void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long user_vaddr, void *dst, void *src, int len)
{
- flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page));
+ __flush_cache_page(vma, user_vaddr, PFN_PHYS(page_to_pfn(page)));
memcpy(dst, src, len);
- flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len);
+ flush_kernel_dcache_page_addr(PTR_PAGE_ALIGN_DOWN(dst));
}
void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long user_vaddr, void *dst, void *src, int len)
{
- flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page));
+ __flush_cache_page(vma, user_vaddr, PFN_PHYS(page_to_pfn(page)));
memcpy(dst, src, len);
+ flush_kernel_dcache_page_addr(PTR_PAGE_ALIGN_DOWN(src));
}
/* __flush_tlb_range()
@@ -681,32 +708,10 @@ int __flush_tlb_range(unsigned long sid, unsigned long start,
static void flush_cache_pages(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
- unsigned long addr, pfn;
- pte_t *ptep;
-
- for (addr = start; addr < end; addr += PAGE_SIZE) {
- bool needs_flush = false;
- /*
- * The vma can contain pages that aren't present. Although
- * the pte search is expensive, we need the pte to find the
- * page pfn and to check whether the page should be flushed.
- */
- ptep = get_ptep(vma->vm_mm, addr);
- if (ptep) {
- needs_flush = pte_needs_flush(*ptep);
- pfn = pte_pfn(*ptep);
- pte_unmap(ptep);
- }
- if (needs_flush) {
- if (parisc_requires_coherency()) {
- flush_user_cache_page(vma, addr);
- } else {
- if (WARN_ON(!pfn_valid(pfn)))
- return;
- __flush_cache_page(vma, addr, PFN_PHYS(pfn));
- }
- }
- }
+ unsigned long addr;
+
+ for (addr = start; addr < end; addr += PAGE_SIZE)
+ flush_cache_page_if_present(vma, addr);
}
static inline unsigned long mm_total_size(struct mm_struct *mm)
@@ -757,21 +762,19 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned
if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled()))
return;
flush_tlb_range(vma, start, end);
- flush_cache_all();
+ if (vma->vm_flags & VM_EXEC)
+ flush_cache_all();
+ else
+ flush_data_cache();
return;
}
- flush_cache_pages(vma, start, end);
+ flush_cache_pages(vma, start & PAGE_MASK, end);
}
void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn)
{
- if (WARN_ON(!pfn_valid(pfn)))
- return;
- if (parisc_requires_coherency())
- flush_user_cache_page(vma, vmaddr);
- else
- __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
+ __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
}
void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
@@ -779,34 +782,133 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned lon
if (!PageAnon(page))
return;
- if (parisc_requires_coherency()) {
- if (vma->vm_flags & VM_SHARED)
- flush_data_cache();
- else
- flush_user_cache_page(vma, vmaddr);
+ __flush_cache_page(vma, vmaddr, PFN_PHYS(page_to_pfn(page)));
+}
+
+int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep)
+{
+ pte_t pte = ptep_get(ptep);
+
+ if (!pte_young(pte))
+ return 0;
+ set_pte(ptep, pte_mkold(pte));
+#if CONFIG_FLUSH_PAGE_ACCESSED
+ __flush_cache_page(vma, addr, PFN_PHYS(pte_pfn(pte)));
+#endif
+ return 1;
+}
+
+/*
+ * After a PTE is cleared, we have no way to flush the cache for
+ * the physical page. On PA8800 and PA8900 processors, these lines
+ * can cause random cache corruption. Thus, we must flush the cache
+ * as well as the TLB when clearing a PTE that's valid.
+ */
+pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep)
+{
+ struct mm_struct *mm = (vma)->vm_mm;
+ pte_t pte = ptep_get_and_clear(mm, addr, ptep);
+ unsigned long pfn = pte_pfn(pte);
+
+ if (pfn_valid(pfn))
+ __flush_cache_page(vma, addr, PFN_PHYS(pfn));
+ else if (pte_accessible(mm, pte))
+ flush_tlb_page(vma, addr);
+
+ return pte;
+}
+
+/*
+ * The physical address for pages in the ioremap case can be obtained
+ * from the vm_struct struct. I wasn't able to successfully handle the
+ * vmalloc and vmap cases. We have an array of struct page pointers in
+ * the uninitialized vmalloc case but the flush failed using page_to_pfn.
+ */
+void flush_cache_vmap(unsigned long start, unsigned long end)
+{
+ unsigned long addr, physaddr;
+ struct vm_struct *vm;
+
+ /* Prevent cache move-in */
+ flush_tlb_kernel_range(start, end);
+
+ if (end - start >= parisc_cache_flush_threshold) {
+ flush_cache_all();
return;
}
- flush_tlb_page(vma, vmaddr);
- preempt_disable();
- flush_dcache_page_asm(page_to_phys(page), vmaddr);
- preempt_enable();
+ if (WARN_ON_ONCE(!is_vmalloc_addr((void *)start))) {
+ flush_cache_all();
+ return;
+ }
+
+ vm = find_vm_area((void *)start);
+ if (WARN_ON_ONCE(!vm)) {
+ flush_cache_all();
+ return;
+ }
+
+ /* The physical addresses of IOREMAP regions are contiguous */
+ if (vm->flags & VM_IOREMAP) {
+ physaddr = vm->phys_addr;
+ for (addr = start; addr < end; addr += PAGE_SIZE) {
+ preempt_disable();
+ flush_dcache_page_asm(physaddr, start);
+ flush_icache_page_asm(physaddr, start);
+ preempt_enable();
+ physaddr += PAGE_SIZE;
+ }
+ return;
+ }
+
+ flush_cache_all();
}
+EXPORT_SYMBOL(flush_cache_vmap);
+/*
+ * The vm_struct has been retired and the page table is set up. The
+ * last page in the range is a guard page. Its physical address can't
+ * be determined using lpa, so there is no way to flush the range
+ * using flush_dcache_page_asm.
+ */
+void flush_cache_vunmap(unsigned long start, unsigned long end)
+{
+ /* Prevent cache move-in */
+ flush_tlb_kernel_range(start, end);
+ flush_data_cache();
+}
+EXPORT_SYMBOL(flush_cache_vunmap);
+
+/*
+ * On systems with PA8800/PA8900 processors, there is no way to flush
+ * a vmap range other than using the architected loop to flush the
+ * entire cache. The page directory is not set up, so we can't use
+ * fdc, etc. FDCE/FICE don't work to flush a portion of the cache.
+ * L2 is physically indexed but FDCE/FICE instructions in virtual
+ * mode output their virtual address on the core bus, not their
+ * real address. As a result, the L2 cache index formed from the
+ * virtual address will most likely not be the same as the L2 index
+ * formed from the real address.
+ */
void flush_kernel_vmap_range(void *vaddr, int size)
{
unsigned long start = (unsigned long)vaddr;
unsigned long end = start + size;
- if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
- (unsigned long)size >= parisc_cache_flush_threshold) {
- flush_tlb_kernel_range(start, end);
- flush_data_cache();
+ flush_tlb_kernel_range(start, end);
+
+ if (!static_branch_likely(&parisc_has_dcache))
+ return;
+
+ /* If interrupts are disabled, we can only do local flush */
+ if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled())) {
+ flush_data_cache_local(NULL);
return;
}
- flush_kernel_dcache_range_asm(start, end);
- flush_tlb_kernel_range(start, end);
+ flush_data_cache();
}
EXPORT_SYMBOL(flush_kernel_vmap_range);
@@ -818,15 +920,18 @@ void invalidate_kernel_vmap_range(void *vaddr, int size)
/* Ensure DMA is complete */
asm_syncdma();
- if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) &&
- (unsigned long)size >= parisc_cache_flush_threshold) {
- flush_tlb_kernel_range(start, end);
- flush_data_cache();
+ flush_tlb_kernel_range(start, end);
+
+ if (!static_branch_likely(&parisc_has_dcache))
+ return;
+
+ /* If interrupts are disabled, we can only do local flush */
+ if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled())) {
+ flush_data_cache_local(NULL);
return;
}
- purge_kernel_dcache_range_asm(start, end);
- flush_tlb_kernel_range(start, end);
+ flush_data_cache();
}
EXPORT_SYMBOL(invalidate_kernel_vmap_range);
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3c968f2f4ac4..c88c6d46a5bc 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -137,7 +137,7 @@ config PPC
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_HUGEPD if HUGETLB_PAGE
select ARCH_HAS_KCOV
- select ARCH_HAS_KERNEL_FPU_SUPPORT if PPC_FPU
+ select ARCH_HAS_KERNEL_FPU_SUPPORT if PPC64 && PPC_FPU
select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_MEMREMAP_COMPAT_ALIGN if PPC_64S_HASH_MMU
diff --git a/arch/powerpc/crypto/.gitignore b/arch/powerpc/crypto/.gitignore
index e1094f08f713..e9fe73aac8b6 100644
--- a/arch/powerpc/crypto/.gitignore
+++ b/arch/powerpc/crypto/.gitignore
@@ -1,3 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
aesp10-ppc.S
+aesp8-ppc.S
ghashp10-ppc.S
+ghashp8-ppc.S
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index de10437fd206..fd594bf6c6a9 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -92,9 +92,25 @@ __pu_failed: \
: label)
#endif
+#ifdef CONFIG_CC_IS_CLANG
+#define DS_FORM_CONSTRAINT "Z<>"
+#else
+#define DS_FORM_CONSTRAINT "YZ<>"
+#endif
+
#ifdef __powerpc64__
+#ifdef CONFIG_PPC_KERNEL_PREFIXED
#define __put_user_asm2_goto(x, ptr, label) \
__put_user_asm_goto(x, ptr, label, "std")
+#else
+#define __put_user_asm2_goto(x, addr, label) \
+ asm goto ("1: std%U1%X1 %0,%1 # put_user\n" \
+ EX_TABLE(1b, %l2) \
+ : \
+ : "r" (x), DS_FORM_CONSTRAINT (*addr) \
+ : \
+ : label)
+#endif // CONFIG_PPC_KERNEL_PREFIXED
#else /* __powerpc64__ */
#define __put_user_asm2_goto(x, addr, label) \
asm goto( \
@@ -165,8 +181,19 @@ do { \
#endif
#ifdef __powerpc64__
+#ifdef CONFIG_PPC_KERNEL_PREFIXED
#define __get_user_asm2_goto(x, addr, label) \
__get_user_asm_goto(x, addr, label, "ld")
+#else
+#define __get_user_asm2_goto(x, addr, label) \
+ asm_goto_output( \
+ "1: ld%U1%X1 %0, %1 # get_user\n" \
+ EX_TABLE(1b, %l2) \
+ : "=r" (x) \
+ : DS_FORM_CONSTRAINT (*addr) \
+ : \
+ : label)
+#endif // CONFIG_PPC_KERNEL_PREFIXED
#else /* __powerpc64__ */
#define __get_user_asm2_goto(x, addr, label) \
asm_goto_output( \
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index b569ebaa590e..3ff3de9a52ac 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -130,14 +130,16 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
}
rcu_read_unlock();
- fdput(f);
-
- if (!found)
+ if (!found) {
+ fdput(f);
return -EINVAL;
+ }
table_group = iommu_group_get_iommudata(grp);
- if (WARN_ON(!table_group))
+ if (WARN_ON(!table_group)) {
+ fdput(f);
return -EFAULT;
+ }
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
struct iommu_table *tbltmp = table_group->tables[i];
@@ -158,8 +160,10 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
break;
}
}
- if (!tbl)
+ if (!tbl) {
+ fdput(f);
return -EINVAL;
+ }
rcu_read_lock();
list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
@@ -170,6 +174,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
/* stit is being destroyed */
iommu_tce_table_put(tbl);
rcu_read_unlock();
+ fdput(f);
return -ENOTTY;
}
/*
@@ -177,6 +182,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
* its KVM reference counter and can return.
*/
rcu_read_unlock();
+ fdput(f);
return 0;
}
rcu_read_unlock();
@@ -184,6 +190,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
stit = kzalloc(sizeof(*stit), GFP_KERNEL);
if (!stit) {
iommu_tce_table_put(tbl);
+ fdput(f);
return -ENOMEM;
}
@@ -192,6 +199,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
list_add_rcu(&stit->next, &stt->iommu_tables);
+ fdput(f);
return 0;
}
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 43b97032a91c..a0c4f1bde83e 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -900,6 +900,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
/* Get offset into TMP_REG */
EMIT(PPC_RAW_LI(tmp_reg, off));
+ /*
+ * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
+ * before and after the operation.
+ *
+ * This is a requirement in the Linux Kernel Memory Model.
+ * See __cmpxchg_u32() in asm/cmpxchg.h as an example.
+ */
+ if ((imm & BPF_FETCH) && IS_ENABLED(CONFIG_SMP))
+ EMIT(PPC_RAW_SYNC());
tmp_idx = ctx->idx * 4;
/* load value from memory into r0 */
EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0));
@@ -953,6 +962,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
/* For the BPF_FETCH variant, get old data into src_reg */
if (imm & BPF_FETCH) {
+ /* Emit 'sync' to enforce full ordering */
+ if (IS_ENABLED(CONFIG_SMP))
+ EMIT(PPC_RAW_SYNC());
EMIT(PPC_RAW_MR(ret_reg, ax_reg));
if (!fp->aux->verifier_zext)
EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* higher 32-bit */
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 8afc14a4a125..7703dcf48be8 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -846,6 +846,15 @@ emit_clear:
/* Get offset into TMP_REG_1 */
EMIT(PPC_RAW_LI(tmp1_reg, off));
+ /*
+ * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
+ * before and after the operation.
+ *
+ * This is a requirement in the Linux Kernel Memory Model.
+ * See __cmpxchg_u64() in asm/cmpxchg.h as an example.
+ */
+ if ((imm & BPF_FETCH) && IS_ENABLED(CONFIG_SMP))
+ EMIT(PPC_RAW_SYNC());
tmp_idx = ctx->idx * 4;
/* load value from memory into TMP_REG_2 */
if (size == BPF_DW)
@@ -908,6 +917,9 @@ emit_clear:
PPC_BCC_SHORT(COND_NE, tmp_idx);
if (imm & BPF_FETCH) {
+ /* Emit 'sync' to enforce full ordering */
+ if (IS_ENABLED(CONFIG_SMP))
+ EMIT(PPC_RAW_SYNC());
EMIT(PPC_RAW_MR(ret_reg, _R0));
/*
* Skip unnecessary zero-extension for 32-bit cmpxchg.
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index 6e7029640c0c..62da20f9700a 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -371,8 +371,8 @@ static int read_dt_lpar_name(struct seq_file *m)
static void read_lpar_name(struct seq_file *m)
{
- if (read_rtas_lpar_name(m) && read_dt_lpar_name(m))
- pr_err_once("Error can't get the LPAR name");
+ if (read_rtas_lpar_name(m))
+ read_dt_lpar_name(m);
}
#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index b94176e25be1..0525ee2d63c7 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -106,7 +106,7 @@ config RISCV
select HAS_IOPORT if MMU
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
- select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT && !XIP_KERNEL
+ select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT
select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL
select HAVE_ARCH_KASAN if MMU && 64BIT
diff --git a/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts b/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts
index cd013588adc0..375ff2661b6e 100644
--- a/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts
+++ b/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts
@@ -45,6 +45,7 @@
no-1-8-v;
no-mmc;
no-sdio;
+ disable-wp;
};
&uart0 {
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index ddb002ed89de..808b4c78462e 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -10,7 +10,7 @@
#include <asm/fence.h>
-#define __arch_xchg_masked(prepend, append, r, p, n) \
+#define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \
({ \
u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
@@ -25,7 +25,7 @@
"0: lr.w %0, %2\n" \
" and %1, %0, %z4\n" \
" or %1, %1, %z3\n" \
- " sc.w %1, %1, %2\n" \
+ " sc.w" sc_sfx " %1, %1, %2\n" \
" bnez %1, 0b\n" \
append \
: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
@@ -46,7 +46,8 @@
: "memory"); \
})
-#define _arch_xchg(ptr, new, sfx, prepend, append) \
+#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \
+ sc_append, swap_append) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(__ptr)) __new = (new); \
@@ -55,15 +56,15 @@
switch (sizeof(*__ptr)) { \
case 1: \
case 2: \
- __arch_xchg_masked(prepend, append, \
+ __arch_xchg_masked(sc_sfx, prepend, sc_append, \
__ret, __ptr, __new); \
break; \
case 4: \
- __arch_xchg(".w" sfx, prepend, append, \
+ __arch_xchg(".w" swap_sfx, prepend, swap_append, \
__ret, __ptr, __new); \
break; \
case 8: \
- __arch_xchg(".d" sfx, prepend, append, \
+ __arch_xchg(".d" swap_sfx, prepend, swap_append, \
__ret, __ptr, __new); \
break; \
default: \
@@ -73,16 +74,17 @@
})
#define arch_xchg_relaxed(ptr, x) \
- _arch_xchg(ptr, x, "", "", "")
+ _arch_xchg(ptr, x, "", "", "", "", "")
#define arch_xchg_acquire(ptr, x) \
- _arch_xchg(ptr, x, "", "", RISCV_ACQUIRE_BARRIER)
+ _arch_xchg(ptr, x, "", "", "", \
+ RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
#define arch_xchg_release(ptr, x) \
- _arch_xchg(ptr, x, "", RISCV_RELEASE_BARRIER, "")
+ _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
#define arch_xchg(ptr, x) \
- _arch_xchg(ptr, x, ".aqrl", "", "")
+ _arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "")
#define xchg32(ptr, x) \
({ \
diff --git a/arch/riscv/include/asm/insn.h b/arch/riscv/include/asm/insn.h
index 06e439eeef9a..09fde95a5e8f 100644
--- a/arch/riscv/include/asm/insn.h
+++ b/arch/riscv/include/asm/insn.h
@@ -145,7 +145,7 @@
/* parts of opcode for RVF, RVD and RVQ */
#define RVFDQ_FL_FS_WIDTH_OFF 12
-#define RVFDQ_FL_FS_WIDTH_MASK GENMASK(3, 0)
+#define RVFDQ_FL_FS_WIDTH_MASK GENMASK(2, 0)
#define RVFDQ_FL_FS_WIDTH_W 2
#define RVFDQ_FL_FS_WIDTH_D 3
#define RVFDQ_LS_FS_WIDTH_Q 4
diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c
index 1cc7df740edd..e6fbaaf54956 100644
--- a/arch/riscv/kernel/cpu_ops_sbi.c
+++ b/arch/riscv/kernel/cpu_ops_sbi.c
@@ -72,7 +72,7 @@ static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle)
/* Make sure tidle is updated */
smp_mb();
bdata->task_ptr = tidle;
- bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE;
+ bdata->stack_ptr = task_pt_regs(tidle);
/* Make sure boot data is updated */
smp_mb();
hsm_data = __pa(bdata);
diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c
index 613872b0a21a..24869eb88908 100644
--- a/arch/riscv/kernel/cpu_ops_spinwait.c
+++ b/arch/riscv/kernel/cpu_ops_spinwait.c
@@ -34,8 +34,7 @@ static void cpu_update_secondary_bootdata(unsigned int cpuid,
/* Make sure tidle is updated */
smp_mb();
- WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid],
- task_stack_page(tidle) + THREAD_SIZE);
+ WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], task_pt_regs(tidle));
WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle);
}
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index 87cbd86576b2..4b95c574fd04 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -120,9 +120,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
out = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
mutex_unlock(&text_mutex);
- if (!mod)
- local_flush_icache_range(rec->ip, rec->ip + MCOUNT_INSN_SIZE);
-
return out;
}
@@ -156,9 +153,9 @@ static int __ftrace_modify_code(void *data)
} else {
while (atomic_read(&param->cpu_count) <= num_online_cpus())
cpu_relax();
- }
- local_flush_icache_all();
+ local_flush_icache_all();
+ }
return 0;
}
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
index 4007563fb607..ab03732d06c4 100644
--- a/arch/riscv/kernel/patch.c
+++ b/arch/riscv/kernel/patch.c
@@ -89,6 +89,14 @@ static int __patch_insn_set(void *addr, u8 c, size_t len)
memset(waddr, c, len);
+ /*
+ * We could have just patched a function that is about to be
+ * called so make sure we don't execute partially patched
+ * instructions by flushing the icache as soon as possible.
+ */
+ local_flush_icache_range((unsigned long)waddr,
+ (unsigned long)waddr + len);
+
patch_unmap(FIX_TEXT_POKE0);
if (across_pages)
@@ -135,6 +143,14 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len)
ret = copy_to_kernel_nofault(waddr, insn, len);
+ /*
+ * We could have just patched a function that is about to be
+ * called so make sure we don't execute partially patched
+ * instructions by flushing the icache as soon as possible.
+ */
+ local_flush_icache_range((unsigned long)waddr,
+ (unsigned long)waddr + len);
+
patch_unmap(FIX_TEXT_POKE0);
if (across_pages)
@@ -189,9 +205,6 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len)
ret = patch_insn_set(tp, c, len);
- if (!ret)
- flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len);
-
return ret;
}
NOKPROBE_SYMBOL(patch_text_set_nosync);
@@ -224,9 +237,6 @@ int patch_text_nosync(void *addr, const void *insns, size_t len)
ret = patch_insn_write(tp, insns, len);
- if (!ret)
- flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
-
return ret;
}
NOKPROBE_SYMBOL(patch_text_nosync);
@@ -253,9 +263,9 @@ static int patch_text_cb(void *data)
} else {
while (atomic_read(&patch->cpu_count) <= num_online_cpus())
cpu_relax();
- }
- local_flush_icache_all();
+ local_flush_icache_all();
+ }
return ret;
}
diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c
index 0eb689351b7d..5cd407c6a8e4 100644
--- a/arch/riscv/kvm/aia_device.c
+++ b/arch/riscv/kvm/aia_device.c
@@ -237,10 +237,11 @@ static gpa_t aia_imsic_ppn(struct kvm_aia *aia, gpa_t addr)
static u32 aia_imsic_hart_index(struct kvm_aia *aia, gpa_t addr)
{
- u32 hart, group = 0;
+ u32 hart = 0, group = 0;
- hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
- GENMASK_ULL(aia->nr_hart_bits - 1, 0);
+ if (aia->nr_hart_bits)
+ hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
+ GENMASK_ULL(aia->nr_hart_bits - 1, 0);
if (aia->nr_group_bits)
group = (addr >> aia->nr_group_shift) &
GENMASK_ULL(aia->nr_group_bits - 1, 0);
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index c676275ea0a0..62874fbca29f 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -724,9 +724,9 @@ static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
switch (reg_subtype) {
case KVM_REG_RISCV_ISA_SINGLE:
return riscv_vcpu_set_isa_ext_single(vcpu, reg_num, reg_val);
- case KVM_REG_RISCV_SBI_MULTI_EN:
+ case KVM_REG_RISCV_ISA_MULTI_EN:
return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, true);
- case KVM_REG_RISCV_SBI_MULTI_DIS:
+ case KVM_REG_RISCV_ISA_MULTI_DIS:
return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, false);
default:
return -ENOENT;
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index b3fcf7d67efb..5224f3733802 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -293,8 +293,8 @@ void handle_page_fault(struct pt_regs *regs)
if (unlikely(access_error(cause, vma))) {
vma_end_read(vma);
count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
- tsk->thread.bad_cause = SEGV_ACCERR;
- bad_area_nosemaphore(regs, code, addr);
+ tsk->thread.bad_cause = cause;
+ bad_area_nosemaphore(regs, SEGV_ACCERR, addr);
return;
}
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index e3218d65f21d..e3405e4b99af 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -250,18 +250,19 @@ static void __init setup_bootmem(void)
kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base;
/*
- * memblock allocator is not aware of the fact that last 4K bytes of
- * the addressable memory can not be mapped because of IS_ERR_VALUE
- * macro. Make sure that last 4k bytes are not usable by memblock
- * if end of dram is equal to maximum addressable memory. For 64-bit
- * kernel, this problem can't happen here as the end of the virtual
- * address space is occupied by the kernel mapping then this check must
- * be done as soon as the kernel mapping base address is determined.
+ * Reserve physical address space that would be mapped to virtual
+ * addresses greater than (void *)(-PAGE_SIZE) because:
+ * - This memory would overlap with ERR_PTR
+ * - This memory belongs to high memory, which is not supported
+ *
+ * This is not applicable to 64-bit kernel, because virtual addresses
+ * after (void *)(-PAGE_SIZE) are not linearly mapped: they are
+ * occupied by kernel mapping. Also it is unrealistic for high memory
+ * to exist on 64-bit platforms.
*/
if (!IS_ENABLED(CONFIG_64BIT)) {
- max_mapped_addr = __pa(~(ulong)0);
- if (max_mapped_addr == (phys_ram_end - 1))
- memblock_set_current_limit(max_mapped_addr - 4096);
+ max_mapped_addr = __va_to_pa_nodebug(-PAGE_SIZE);
+ memblock_reserve(max_mapped_addr, (phys_addr_t)-max_mapped_addr);
}
min_low_pfn = PFN_UP(phys_ram_base);
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 182aac6a0f77..48ef5fe5c08a 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -384,7 +384,7 @@ static void fixup_vmlinux_info(void)
void startup_kernel(void)
{
unsigned long kernel_size = vmlinux.image_size + vmlinux.bss_size;
- unsigned long nokaslr_offset_phys = mem_safe_offset();
+ unsigned long nokaslr_offset_phys, kaslr_large_page_offset;
unsigned long amode31_lma = 0;
unsigned long max_physmem_end;
unsigned long asce_limit;
@@ -393,6 +393,12 @@ void startup_kernel(void)
fixup_vmlinux_info();
setup_lpp();
+
+ /*
+ * Non-randomized kernel physical start address must be _SEGMENT_SIZE
+ * aligned (see blow).
+ */
+ nokaslr_offset_phys = ALIGN(mem_safe_offset(), _SEGMENT_SIZE);
safe_addr = PAGE_ALIGN(nokaslr_offset_phys + kernel_size);
/*
@@ -425,10 +431,25 @@ void startup_kernel(void)
save_ipl_cert_comp_list();
rescue_initrd(safe_addr, ident_map_size);
- if (kaslr_enabled())
- __kaslr_offset_phys = randomize_within_range(kernel_size, THREAD_SIZE, 0, ident_map_size);
+ /*
+ * __kaslr_offset_phys must be _SEGMENT_SIZE aligned, so the lower
+ * 20 bits (the offset within a large page) are zero. Copy the last
+ * 20 bits of __kaslr_offset, which is THREAD_SIZE aligned, to
+ * __kaslr_offset_phys.
+ *
+ * With this the last 20 bits of __kaslr_offset_phys and __kaslr_offset
+ * are identical, which is required to allow for large mappings of the
+ * kernel image.
+ */
+ kaslr_large_page_offset = __kaslr_offset & ~_SEGMENT_MASK;
+ if (kaslr_enabled()) {
+ unsigned long end = ident_map_size - kaslr_large_page_offset;
+
+ __kaslr_offset_phys = randomize_within_range(kernel_size, _SEGMENT_SIZE, 0, end);
+ }
if (!__kaslr_offset_phys)
__kaslr_offset_phys = nokaslr_offset_phys;
+ __kaslr_offset_phys |= kaslr_large_page_offset;
kaslr_adjust_vmlinux_info(__kaslr_offset_phys);
physmem_reserve(RR_VMLINUX, __kaslr_offset_phys, kernel_size);
deploy_kernel((void *)__kaslr_offset_phys);
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index 96d48b7112d4..40cfce2687c4 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -261,21 +261,27 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m
static bool large_allowed(enum populate_mode mode)
{
- return (mode == POPULATE_DIRECT) || (mode == POPULATE_IDENTITY);
+ return (mode == POPULATE_DIRECT) || (mode == POPULATE_IDENTITY) || (mode == POPULATE_KERNEL);
}
static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end,
enum populate_mode mode)
{
+ unsigned long size = end - addr;
+
return machine.has_edat2 && large_allowed(mode) &&
- IS_ALIGNED(addr, PUD_SIZE) && (end - addr) >= PUD_SIZE;
+ IS_ALIGNED(addr, PUD_SIZE) && (size >= PUD_SIZE) &&
+ IS_ALIGNED(_pa(addr, size, mode), PUD_SIZE);
}
static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end,
enum populate_mode mode)
{
+ unsigned long size = end - addr;
+
return machine.has_edat1 && large_allowed(mode) &&
- IS_ALIGNED(addr, PMD_SIZE) && (end - addr) >= PMD_SIZE;
+ IS_ALIGNED(addr, PMD_SIZE) && (size >= PMD_SIZE) &&
+ IS_ALIGNED(_pa(addr, size, mode), PMD_SIZE);
}
static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S
index 1fe5a1d3ff60..a750711d44c8 100644
--- a/arch/s390/boot/vmlinux.lds.S
+++ b/arch/s390/boot/vmlinux.lds.S
@@ -109,6 +109,7 @@ SECTIONS
#ifdef CONFIG_KERNEL_UNCOMPRESSED
. = ALIGN(PAGE_SIZE);
. += AMODE31_SIZE; /* .amode31 section */
+ . = ALIGN(1 << 20); /* _SEGMENT_SIZE */
#else
. = ALIGN(8);
#endif
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 145342e46ea8..8c4adece8911 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -43,7 +43,6 @@ CONFIG_PROFILING=y
CONFIG_KEXEC=y
CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
-CONFIG_CRASH_DUMP=y
CONFIG_LIVEPATCH=y
CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=512
@@ -51,6 +50,7 @@ CONFIG_NUMA=y
CONFIG_HZ_100=y
CONFIG_CERT_STORE=y
CONFIG_EXPOLINE=y
+# CONFIG_EXPOLINE_EXTERN is not set
CONFIG_EXPOLINE_AUTO=y
CONFIG_CHSC_SCH=y
CONFIG_VFIO_CCW=m
@@ -76,6 +76,7 @@ CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_MODULE_SIG_SHA256=y
CONFIG_BLK_DEV_THROTTLING=y
CONFIG_BLK_WBT=y
CONFIG_BLK_CGROUP_IOLATENCY=y
@@ -100,7 +101,6 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_CMA_DEBUG=y
CONFIG_CMA_DEBUGFS=y
CONFIG_CMA_SYSFS=y
CONFIG_CMA_AREAS=7
@@ -119,6 +119,7 @@ CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
CONFIG_SMC_DIAG=m
+CONFIG_SMC_LO=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
@@ -133,7 +134,6 @@ CONFIG_IP_MROUTE=y
CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
CONFIG_NET_IPVTI=m
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
@@ -167,6 +167,7 @@ CONFIG_BRIDGE_NETFILTER=m
CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CONNTRACK_PROCFS=y
CONFIG_NF_CONNTRACK_EVENTS=y
CONFIG_NF_CONNTRACK_TIMEOUT=y
@@ -183,17 +184,39 @@ CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_CT_NETLINK_HELPER=m
+CONFIG_NETFILTER_NETLINK_GLUE_CT=y
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
+CONFIG_NFT_NUMGEN=m
CONFIG_NFT_CT=m
+CONFIG_NFT_FLOW_OFFLOAD=m
+CONFIG_NFT_CONNLIMIT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
+CONFIG_NFT_MASQ=m
+CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
+CONFIG_NFT_TUNNEL=m
+CONFIG_NFT_QUEUE=m
+CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
CONFIG_NFT_COMPAT=m
CONFIG_NFT_HASH=m
CONFIG_NFT_FIB_INET=m
-CONFIG_NETFILTER_XTABLES_COMPAT=y
+CONFIG_NFT_XFRM=m
+CONFIG_NFT_SOCKET=m
+CONFIG_NFT_OSF=m
+CONFIG_NFT_TPROXY=m
+CONFIG_NFT_SYNPROXY=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
+CONFIG_NFT_FIB_NETDEV=m
+CONFIG_NFT_REJECT_NETDEV=m
+CONFIG_NF_FLOW_TABLE_INET=m
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_FLOW_TABLE_PROCFS=y
CONFIG_NETFILTER_XT_SET=m
CONFIG_NETFILTER_XT_TARGET_AUDIT=m
CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
@@ -206,8 +229,10 @@ CONFIG_NETFILTER_XT_TARGET_HMARK=m
CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
CONFIG_NETFILTER_XT_TARGET_LOG=m
CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NETMAP=m
CONFIG_NETFILTER_XT_TARGET_NFLOG=m
CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
CONFIG_NETFILTER_XT_TARGET_TEE=m
CONFIG_NETFILTER_XT_TARGET_TPROXY=m
CONFIG_NETFILTER_XT_TARGET_TRACE=m
@@ -216,6 +241,7 @@ CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CGROUP=m
CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
CONFIG_NETFILTER_XT_MATCH_COMMENT=m
CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
@@ -230,6 +256,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m
CONFIG_NETFILTER_XT_MATCH_ESP=m
CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPCOMP=m
CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
CONFIG_NETFILTER_XT_MATCH_IPVS=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
@@ -247,6 +274,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
CONFIG_NETFILTER_XT_MATCH_RATEEST=m
CONFIG_NETFILTER_XT_MATCH_REALM=m
CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
CONFIG_NETFILTER_XT_MATCH_STATE=m
CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -302,7 +330,6 @@ CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_FIB_IPV6=m
@@ -373,7 +400,6 @@ CONFIG_NET_ACT_POLICE=m
CONFIG_NET_ACT_GACT=m
CONFIG_GACT_PROB=y
CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_IPT=m
CONFIG_NET_ACT_NAT=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
@@ -462,6 +488,7 @@ CONFIG_DM_VERITY=m
CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
CONFIG_DM_SWITCH=m
CONFIG_DM_INTEGRITY=m
+CONFIG_DM_VDO=m
CONFIG_NETDEVICES=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
@@ -574,7 +601,6 @@ CONFIG_WATCHDOG=y
CONFIG_WATCHDOG_NOWAYOUT=y
CONFIG_SOFT_WATCHDOG=m
CONFIG_DIAG288_WATCHDOG=m
-# CONFIG_DRM_DEBUG_MODESET_LOCK is not set
CONFIG_FB=y
# CONFIG_FB_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
@@ -645,7 +671,6 @@ CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
CONFIG_EXFAT_FS=m
CONFIG_NTFS_FS=m
-CONFIG_NTFS_RW=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
@@ -663,6 +688,7 @@ CONFIG_SQUASHFS_XZ=y
CONFIG_SQUASHFS_ZSTD=y
CONFIG_ROMFS_FS=m
CONFIG_NFS_FS=m
+CONFIG_NFS_V2=m
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
@@ -879,6 +905,5 @@ CONFIG_RBTREE_TEST=y
CONFIG_INTERVAL_TREE_TEST=m
CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
-CONFIG_STRING_SELFTEST=y
CONFIG_TEST_BITOPS=m
CONFIG_TEST_BPF=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index dc237896f99d..6dd11d3b6aaa 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -41,7 +41,6 @@ CONFIG_PROFILING=y
CONFIG_KEXEC=y
CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
-CONFIG_CRASH_DUMP=y
CONFIG_LIVEPATCH=y
CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=512
@@ -49,6 +48,7 @@ CONFIG_NUMA=y
CONFIG_HZ_100=y
CONFIG_CERT_STORE=y
CONFIG_EXPOLINE=y
+# CONFIG_EXPOLINE_EXTERN is not set
CONFIG_EXPOLINE_AUTO=y
CONFIG_CHSC_SCH=y
CONFIG_VFIO_CCW=m
@@ -71,6 +71,7 @@ CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_MODULE_SIG_SHA256=y
CONFIG_BLK_DEV_THROTTLING=y
CONFIG_BLK_WBT=y
CONFIG_BLK_CGROUP_IOLATENCY=y
@@ -110,6 +111,7 @@ CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
CONFIG_SMC_DIAG=m
+CONFIG_SMC_LO=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
@@ -124,7 +126,6 @@ CONFIG_IP_MROUTE=y
CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
CONFIG_NET_IPVTI=m
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
@@ -158,6 +159,7 @@ CONFIG_BRIDGE_NETFILTER=m
CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CONNTRACK_PROCFS=y
CONFIG_NF_CONNTRACK_EVENTS=y
CONFIG_NF_CONNTRACK_TIMEOUT=y
@@ -174,17 +176,39 @@ CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_CT_NETLINK_HELPER=m
+CONFIG_NETFILTER_NETLINK_GLUE_CT=y
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
+CONFIG_NFT_NUMGEN=m
CONFIG_NFT_CT=m
+CONFIG_NFT_FLOW_OFFLOAD=m
+CONFIG_NFT_CONNLIMIT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
+CONFIG_NFT_MASQ=m
+CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
+CONFIG_NFT_TUNNEL=m
+CONFIG_NFT_QUEUE=m
+CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
CONFIG_NFT_COMPAT=m
CONFIG_NFT_HASH=m
CONFIG_NFT_FIB_INET=m
-CONFIG_NETFILTER_XTABLES_COMPAT=y
+CONFIG_NFT_XFRM=m
+CONFIG_NFT_SOCKET=m
+CONFIG_NFT_OSF=m
+CONFIG_NFT_TPROXY=m
+CONFIG_NFT_SYNPROXY=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
+CONFIG_NFT_FIB_NETDEV=m
+CONFIG_NFT_REJECT_NETDEV=m
+CONFIG_NF_FLOW_TABLE_INET=m
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_FLOW_TABLE_PROCFS=y
CONFIG_NETFILTER_XT_SET=m
CONFIG_NETFILTER_XT_TARGET_AUDIT=m
CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
@@ -197,8 +221,10 @@ CONFIG_NETFILTER_XT_TARGET_HMARK=m
CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
CONFIG_NETFILTER_XT_TARGET_LOG=m
CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NETMAP=m
CONFIG_NETFILTER_XT_TARGET_NFLOG=m
CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
CONFIG_NETFILTER_XT_TARGET_TEE=m
CONFIG_NETFILTER_XT_TARGET_TPROXY=m
CONFIG_NETFILTER_XT_TARGET_TRACE=m
@@ -207,6 +233,7 @@ CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CGROUP=m
CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
CONFIG_NETFILTER_XT_MATCH_COMMENT=m
CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
@@ -221,6 +248,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m
CONFIG_NETFILTER_XT_MATCH_ESP=m
CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPCOMP=m
CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
CONFIG_NETFILTER_XT_MATCH_IPVS=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
@@ -238,6 +266,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
CONFIG_NETFILTER_XT_MATCH_RATEEST=m
CONFIG_NETFILTER_XT_MATCH_REALM=m
CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
CONFIG_NETFILTER_XT_MATCH_STATE=m
CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -293,7 +322,6 @@ CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_FIB_IPV6=m
@@ -363,7 +391,6 @@ CONFIG_NET_ACT_POLICE=m
CONFIG_NET_ACT_GACT=m
CONFIG_GACT_PROB=y
CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_IPT=m
CONFIG_NET_ACT_NAT=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
@@ -452,6 +479,7 @@ CONFIG_DM_VERITY=m
CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
CONFIG_DM_SWITCH=m
CONFIG_DM_INTEGRITY=m
+CONFIG_DM_VDO=m
CONFIG_NETDEVICES=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
@@ -630,7 +658,6 @@ CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
CONFIG_EXFAT_FS=m
CONFIG_NTFS_FS=m
-CONFIG_NTFS_RW=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
@@ -649,6 +676,7 @@ CONFIG_SQUASHFS_XZ=y
CONFIG_SQUASHFS_ZSTD=y
CONFIG_ROMFS_FS=m
CONFIG_NFS_FS=m
+CONFIG_NFS_V2=m
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index c51f3ec4eb28..8c2b61363bab 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -9,25 +9,22 @@ CONFIG_BPF_SYSCALL=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_KEXEC=y
-CONFIG_CRASH_DUMP=y
CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=2
CONFIG_HZ_100=y
# CONFIG_CHSC_SCH is not set
# CONFIG_SCM_BUS is not set
+# CONFIG_AP is not set
# CONFIG_PFAULT is not set
# CONFIG_S390_HYPFS is not set
# CONFIG_VIRTUALIZATION is not set
# CONFIG_S390_GUEST is not set
# CONFIG_SECCOMP is not set
-# CONFIG_GCC_PLUGINS is not set
# CONFIG_BLOCK_LEGACY_AUTOLOAD is not set
CONFIG_PARTITION_ADVANCED=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
# CONFIG_SWAP is not set
# CONFIG_COMPAT_BRK is not set
-# CONFIG_COMPACTION is not set
-# CONFIG_MIGRATION is not set
CONFIG_NET=y
# CONFIG_IUCV is not set
# CONFIG_PCPU_DEV_REFCNT is not set
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 9863ebe75019..edae13416196 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -451,7 +451,7 @@ static void *nt_final(void *ptr)
/*
* Initialize ELF header (new kernel)
*/
-static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
+static void *ehdr_init(Elf64_Ehdr *ehdr, int phdr_count)
{
memset(ehdr, 0, sizeof(*ehdr));
memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
@@ -465,11 +465,8 @@ static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
ehdr->e_phoff = sizeof(Elf64_Ehdr);
ehdr->e_ehsize = sizeof(Elf64_Ehdr);
ehdr->e_phentsize = sizeof(Elf64_Phdr);
- /*
- * Number of memory chunk PT_LOAD program headers plus one kernel
- * image PT_LOAD program header plus one PT_NOTE program header.
- */
- ehdr->e_phnum = mem_chunk_cnt + 1 + 1;
+ /* Number of PT_LOAD program headers plus PT_NOTE program header */
+ ehdr->e_phnum = phdr_count + 1;
return ehdr + 1;
}
@@ -503,12 +500,14 @@ static int get_mem_chunk_cnt(void)
/*
* Initialize ELF loads (new kernel)
*/
-static void loads_init(Elf64_Phdr *phdr)
+static void loads_init(Elf64_Phdr *phdr, bool os_info_has_vm)
{
- unsigned long old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
+ unsigned long old_identity_base = 0;
phys_addr_t start, end;
u64 idx;
+ if (os_info_has_vm)
+ old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
for_each_physmem_range(idx, &oldmem_type, &start, &end) {
phdr->p_type = PT_LOAD;
phdr->p_vaddr = old_identity_base + start;
@@ -522,6 +521,11 @@ static void loads_init(Elf64_Phdr *phdr)
}
}
+static bool os_info_has_vm(void)
+{
+ return os_info_old_value(OS_INFO_KASLR_OFFSET);
+}
+
/*
* Prepare PT_LOAD type program header for kernel image region
*/
@@ -566,7 +570,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
return ptr;
}
-static size_t get_elfcorehdr_size(int mem_chunk_cnt)
+static size_t get_elfcorehdr_size(int phdr_count)
{
size_t size;
@@ -581,10 +585,8 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
size += nt_vmcoreinfo_size();
/* nt_final */
size += sizeof(Elf64_Nhdr);
- /* PT_LOAD type program header for kernel text region */
- size += sizeof(Elf64_Phdr);
/* PT_LOADS */
- size += mem_chunk_cnt * sizeof(Elf64_Phdr);
+ size += phdr_count * sizeof(Elf64_Phdr);
return size;
}
@@ -595,8 +597,8 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
{
Elf64_Phdr *phdr_notes, *phdr_loads, *phdr_text;
+ int mem_chunk_cnt, phdr_text_cnt;
size_t alloc_size;
- int mem_chunk_cnt;
void *ptr, *hdr;
u64 hdr_off;
@@ -615,12 +617,14 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
}
mem_chunk_cnt = get_mem_chunk_cnt();
+ phdr_text_cnt = os_info_has_vm() ? 1 : 0;
- alloc_size = get_elfcorehdr_size(mem_chunk_cnt);
+ alloc_size = get_elfcorehdr_size(mem_chunk_cnt + phdr_text_cnt);
hdr = kzalloc(alloc_size, GFP_KERNEL);
- /* Without elfcorehdr /proc/vmcore cannot be created. Thus creating
+ /*
+ * Without elfcorehdr /proc/vmcore cannot be created. Thus creating
* a dump with this crash kernel will fail. Panic now to allow other
* dump mechanisms to take over.
*/
@@ -628,21 +632,23 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
panic("s390 kdump allocating elfcorehdr failed");
/* Init elf header */
- ptr = ehdr_init(hdr, mem_chunk_cnt);
+ phdr_notes = ehdr_init(hdr, mem_chunk_cnt + phdr_text_cnt);
/* Init program headers */
- phdr_notes = ptr;
- ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
- phdr_text = ptr;
- ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
- phdr_loads = ptr;
- ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt);
+ if (phdr_text_cnt) {
+ phdr_text = phdr_notes + 1;
+ phdr_loads = phdr_text + 1;
+ } else {
+ phdr_loads = phdr_notes + 1;
+ }
+ ptr = PTR_ADD(phdr_loads, sizeof(Elf64_Phdr) * mem_chunk_cnt);
/* Init notes */
hdr_off = PTR_DIFF(ptr, hdr);
ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
/* Init kernel text program header */
- text_init(phdr_text);
+ if (phdr_text_cnt)
+ text_init(phdr_text);
/* Init loads */
- loads_init(phdr_loads);
+ loads_init(phdr_loads, phdr_text_cnt);
/* Finalize program headers */
hdr_off = PTR_DIFF(ptr, hdr);
*addr = (unsigned long long) hdr;
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 243ee86cb1b1..f2051644de94 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -105,9 +105,9 @@ vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/mem.o
vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o
vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o
-vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
+vmlinux-libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
-$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
+$(obj)/vmlinux: $(vmlinux-objs-y) $(vmlinux-libs-y) FORCE
$(call if_changed,ld)
OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index e64eaa8dda5a..9d6e8f13d13a 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -114,6 +114,7 @@
#include "../perf_event.h"
#include "../probe.h"
+MODULE_DESCRIPTION("Support for Intel cstate performance events");
MODULE_LICENSE("GPL");
#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 419c517b8594..c68f5b39952b 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -34,6 +34,7 @@ static struct event_constraint uncore_constraint_fixed =
struct event_constraint uncore_constraint_empty =
EVENT_CONSTRAINT(0, 0, 0);
+MODULE_DESCRIPTION("Support for Intel uncore performance events");
MODULE_LICENSE("GPL");
int uncore_pcibus_to_dieid(struct pci_bus *bus)
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 46e673585560..0c5e7a7c43ac 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -64,6 +64,7 @@
#include "perf_event.h"
#include "probe.h"
+MODULE_DESCRIPTION("Support Intel/AMD RAPL energy consumption counters");
MODULE_LICENSE("GPL");
/*
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 1dc600fa3ba5..481096177500 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -401,7 +401,6 @@ extern int __init efi_memmap_alloc(unsigned int num_entries,
struct efi_memory_map_data *data);
extern void __efi_memmap_free(u64 phys, unsigned long size,
unsigned long flags);
-#define __efi_memmap_free __efi_memmap_free
extern int __init efi_memmap_install(struct efi_memory_map_data *data);
extern int __init efi_memmap_split_count(efi_memory_desc_t *md,
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ece45b3f6f20..f8ca74e7678f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2154,6 +2154,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
void *insn, int insn_len);
+void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
u64 addr, unsigned long roots);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 0f9bab92a43d..3a7755c1a441 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -78,10 +78,10 @@ extern int __get_user_bad(void);
int __ret_gu; \
register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \
__chk_user_ptr(ptr); \
- asm volatile("call __" #fn "_%c4" \
+ asm volatile("call __" #fn "_%c[size]" \
: "=a" (__ret_gu), "=r" (__val_gu), \
ASM_CALL_CONSTRAINT \
- : "0" (ptr), "i" (sizeof(*(ptr)))); \
+ : "0" (ptr), [size] "i" (sizeof(*(ptr)))); \
instrument_get_user(__val_gu); \
(x) = (__force __typeof__(*(ptr))) __val_gu; \
__builtin_expect(__ret_gu, 0); \
diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h
index 266daf5b5b84..695f36664889 100644
--- a/arch/x86/include/asm/vmxfeatures.h
+++ b/arch/x86/include/asm/vmxfeatures.h
@@ -77,7 +77,7 @@
#define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* "" VM-Exit on ENCLS (leaf dependent) */
#define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* "" VM-Exit on RDSEED */
#define VMX_FEATURE_PAGE_MOD_LOGGING ( 2*32+ 17) /* "pml" Log dirty pages into buffer */
-#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* "" Conditionally reflect EPT violations as #VE exceptions */
+#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* Conditionally reflect EPT violations as #VE exceptions */
#define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* "" Suppress VMX indicators in Processor Trace */
#define VMX_FEATURE_XSAVES ( 2*32+ 20) /* "" Enable XSAVES and XRSTORS in guest */
#define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 3cf156f70859..027a8c7a2c9e 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -215,7 +215,14 @@ out:
int amd_smn_read(u16 node, u32 address, u32 *value)
{
- return __amd_smn_rw(node, address, value, false);
+ int err = __amd_smn_rw(node, address, value, false);
+
+ if (PCI_POSSIBLE_ERROR(*value)) {
+ err = -ENODEV;
+ *value = 0;
+ }
+
+ return err;
}
EXPORT_SYMBOL_GPL(amd_smn_read);
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index f9a8c7b7943f..b3fa61d45352 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -345,6 +345,7 @@ static DECLARE_WORK(disable_freq_invariance_work,
disable_freq_invariance_workfn);
DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
+EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale);
static void scale_freq_tick(u64 acnt, u64 mcnt)
{
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2b170da84f97..d4e539d4e158 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1075,6 +1075,10 @@ void get_cpu_address_sizes(struct cpuinfo_x86 *c)
c->x86_virt_bits = (eax >> 8) & 0xff;
c->x86_phys_bits = eax & 0xff;
+
+ /* Provide a sane default if not enumerated: */
+ if (!c->x86_clflush_size)
+ c->x86_clflush_size = 32;
}
c->x86_cache_bits = c->x86_phys_bits;
@@ -1585,6 +1589,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
if (have_cpuid_p()) {
cpu_detect(c);
get_cpu_vendor(c);
+ intel_unlock_cpuid_leafs(c);
get_cpu_cap(c);
setup_force_cpu_cap(X86_FEATURE_CPUID);
get_cpu_address_sizes(c);
@@ -1744,7 +1749,7 @@ static void generic_identify(struct cpuinfo_x86 *c)
cpu_detect(c);
get_cpu_vendor(c);
-
+ intel_unlock_cpuid_leafs(c);
get_cpu_cap(c);
get_cpu_address_sizes(c);
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index ea9e07d57c8d..1beccefbaff9 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -61,9 +61,11 @@ extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state;
extern void __init tsx_init(void);
void tsx_ap_init(void);
+void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c);
#else
static inline void tsx_init(void) { }
static inline void tsx_ap_init(void) { }
+static inline void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c) { }
#endif /* CONFIG_CPU_SUP_INTEL */
extern void init_spectral_chicken(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 3c3e7e5695ba..fdf3489d92a4 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -269,19 +269,26 @@ detect_keyid_bits:
c->x86_phys_bits -= keyid_bits;
}
+void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return;
+
+ if (c->x86 < 6 || (c->x86 == 6 && c->x86_model < 0xd))
+ return;
+
+ /*
+ * The BIOS can have limited CPUID to leaf 2, which breaks feature
+ * enumeration. Unlock it and update the maximum leaf info.
+ */
+ if (msr_clear_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0)
+ c->cpuid_level = cpuid_eax(0);
+}
+
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
- /* Unmask CPUID levels if masked: */
- if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
- if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
- MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) {
- c->cpuid_level = cpuid_eax(0);
- get_cpu_cap(c);
- }
- }
-
if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
(c->x86 == 0x6 && c->x86_model >= 0x0e))
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 2345e6836593..366f496ca3ce 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -519,7 +519,8 @@ void free_rmid(u32 closid, u32 rmid)
* allows architectures that ignore the closid parameter to avoid an
* unnecessary check.
*/
- if (idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
+ if (!resctrl_arch_mon_capable() ||
+ idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
RESCTRL_RESERVED_RMID))
return;
diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c
index d419deed6a48..7d476fa697ca 100644
--- a/arch/x86/kernel/cpu/topology_amd.c
+++ b/arch/x86/kernel/cpu/topology_amd.c
@@ -84,9 +84,9 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_topoext)
/*
* If leaf 0xb is available, then the domain shifts are set
- * already and nothing to do here.
+ * already and nothing to do here. Only valid for family >= 0x17.
*/
- if (!has_topoext) {
+ if (!has_topoext && tscan->c->x86 >= 0x17) {
/*
* Leaf 0x80000008 set the CORE domain shift already.
* Update the SMT domain, but do not propagate it.
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index b180d8e497c3..cc0f7f70b17b 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -295,8 +295,15 @@ void machine_kexec_cleanup(struct kimage *image)
void machine_kexec(struct kimage *image)
{
unsigned long page_list[PAGES_NR];
- void *control_page;
+ unsigned int host_mem_enc_active;
int save_ftrace_enabled;
+ void *control_page;
+
+ /*
+ * This must be done before load_segments() since if call depth tracking
+ * is used then GS must be valid to make any function calls.
+ */
+ host_mem_enc_active = cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT);
#ifdef CONFIG_KEXEC_JUMP
if (image->preserve_context)
@@ -358,7 +365,7 @@ void machine_kexec(struct kimage *image)
(unsigned long)page_list,
image->start,
image->preserve_context,
- cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT));
+ host_mem_enc_active);
#ifdef CONFIG_KEXEC_JUMP
if (image->preserve_context)
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index d64fb2b3eb69..fec95a770270 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -44,6 +44,7 @@ config KVM
select KVM_VFIO
select HAVE_KVM_PM_NOTIFIER if PM
select KVM_GENERIC_HARDWARE_ENABLING
+ select KVM_WERROR if WERROR
help
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
@@ -66,7 +67,7 @@ config KVM_WERROR
# FRAME_WARN, i.e. KVM_WERROR=y with KASAN=y requires special tuning.
# Building KVM with -Werror and KASAN is still doable via enabling
# the kernel-wide WERROR=y.
- depends on KVM && EXPERT && !KASAN
+ depends on KVM && ((EXPERT && !KASAN) || WERROR)
help
Add -Werror to the build flags for KVM.
@@ -97,15 +98,17 @@ config KVM_INTEL
config KVM_INTEL_PROVE_VE
bool "Check that guests do not receive #VE exceptions"
- default KVM_PROVE_MMU || DEBUG_KERNEL
- depends on KVM_INTEL
+ depends on KVM_INTEL && EXPERT
help
-
Checks that KVM's page table management code will not incorrectly
let guests receive a virtualization exception. Virtualization
exceptions will be trapped by the hypervisor rather than injected
in the guest.
+ Note: some CPUs appear to generate spurious EPT Violations #VEs
+ that trigger KVM's WARN, in particular with eptad=0 and/or nested
+ virtualization.
+
If unsure, say N.
config X86_SGX_KVM
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ebf41023be38..acd7d48100a1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -59,7 +59,17 @@
#define MAX_APIC_VECTOR 256
#define APIC_VECTORS_PER_REG 32
-static bool lapic_timer_advance_dynamic __read_mostly;
+/*
+ * Enable local APIC timer advancement (tscdeadline mode only) with adaptive
+ * tuning. When enabled, KVM programs the host timer event to fire early, i.e.
+ * before the deadline expires, to account for the delay between taking the
+ * VM-Exit (to inject the guest event) and the subsequent VM-Enter to resume
+ * the guest, i.e. so that the interrupt arrives in the guest with minimal
+ * latency relative to the deadline programmed by the guest.
+ */
+static bool lapic_timer_advance __read_mostly = true;
+module_param(lapic_timer_advance, bool, 0444);
+
#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */
#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */
#define LAPIC_TIMER_ADVANCE_NS_INIT 1000
@@ -1854,16 +1864,14 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
- if (lapic_timer_advance_dynamic) {
- adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
- /*
- * If the timer fired early, reread the TSC to account for the
- * overhead of the above adjustment to avoid waiting longer
- * than is necessary.
- */
- if (guest_tsc < tsc_deadline)
- guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
- }
+ adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
+
+ /*
+ * If the timer fired early, reread the TSC to account for the overhead
+ * of the above adjustment to avoid waiting longer than is necessary.
+ */
+ if (guest_tsc < tsc_deadline)
+ guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
if (guest_tsc < tsc_deadline)
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
@@ -2812,7 +2820,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
return HRTIMER_NORESTART;
}
-int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
+int kvm_create_lapic(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic;
@@ -2845,13 +2853,8 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
HRTIMER_MODE_ABS_HARD);
apic->lapic_timer.timer.function = apic_timer_fn;
- if (timer_advance_ns == -1) {
+ if (lapic_timer_advance)
apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
- lapic_timer_advance_dynamic = true;
- } else {
- apic->lapic_timer.timer_advance_ns = timer_advance_ns;
- lapic_timer_advance_dynamic = false;
- }
/*
* Stuff the APIC ENABLE bit in lieu of temporarily incrementing
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 0a0ea4b5dd8c..a69e706b9080 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -85,7 +85,7 @@ struct kvm_lapic {
struct dest_map;
-int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns);
+int kvm_create_lapic(struct kvm_vcpu *vcpu);
void kvm_free_lapic(struct kvm_vcpu *vcpu);
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 662f62dfb2aa..8d74bdef68c1 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -336,16 +336,19 @@ static int is_cpuid_PSE36(void)
#ifdef CONFIG_X86_64
static void __set_spte(u64 *sptep, u64 spte)
{
+ KVM_MMU_WARN_ON(is_ept_ve_possible(spte));
WRITE_ONCE(*sptep, spte);
}
static void __update_clear_spte_fast(u64 *sptep, u64 spte)
{
+ KVM_MMU_WARN_ON(is_ept_ve_possible(spte));
WRITE_ONCE(*sptep, spte);
}
static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
{
+ KVM_MMU_WARN_ON(is_ept_ve_possible(spte));
return xchg(sptep, spte);
}
@@ -4101,23 +4104,31 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
return leaf;
}
-/* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */
-static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
+static int get_sptes_lockless(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+ int *root_level)
{
- u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
- struct rsvd_bits_validate *rsvd_check;
- int root, leaf, level;
- bool reserved = false;
+ int leaf;
walk_shadow_page_lockless_begin(vcpu);
if (is_tdp_mmu_active(vcpu))
- leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
+ leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, root_level);
else
- leaf = get_walk(vcpu, addr, sptes, &root);
+ leaf = get_walk(vcpu, addr, sptes, root_level);
walk_shadow_page_lockless_end(vcpu);
+ return leaf;
+}
+
+/* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */
+static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
+{
+ u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
+ struct rsvd_bits_validate *rsvd_check;
+ int root, leaf, level;
+ bool reserved = false;
+ leaf = get_sptes_lockless(vcpu, addr, sptes, &root);
if (unlikely(leaf < 0)) {
*sptep = 0ull;
return reserved;
@@ -4400,9 +4411,6 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
return RET_PF_EMULATE;
}
- fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq;
- smp_rmb();
-
/*
* Check for a relevant mmu_notifier invalidation event before getting
* the pfn from the primary MMU, and before acquiring mmu_lock.
@@ -5921,6 +5929,22 @@ emulate:
}
EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
+void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg)
+{
+ u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
+ int root_level, leaf, level;
+
+ leaf = get_sptes_lockless(vcpu, gpa, sptes, &root_level);
+ if (unlikely(leaf < 0))
+ return;
+
+ pr_err("%s %llx", msg, gpa);
+ for (level = root_level; level >= leaf; level--)
+ pr_cont(", spte[%d] = 0x%llx", level, sptes[level]);
+ pr_cont("\n");
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_print_sptes);
+
static void __kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
u64 addr, hpa_t root_hpa)
{
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 5dd5405fa07a..52fa004a1fbc 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -3,6 +3,8 @@
#ifndef KVM_X86_MMU_SPTE_H
#define KVM_X86_MMU_SPTE_H
+#include <asm/vmx.h>
+
#include "mmu.h"
#include "mmu_internal.h"
@@ -276,6 +278,13 @@ static inline bool is_shadow_present_pte(u64 pte)
return !!(pte & SPTE_MMU_PRESENT_MASK);
}
+static inline bool is_ept_ve_possible(u64 spte)
+{
+ return (shadow_present_mask & VMX_EPT_SUPPRESS_VE_BIT) &&
+ !(spte & VMX_EPT_SUPPRESS_VE_BIT) &&
+ (spte & VMX_EPT_RWX_MASK) != VMX_EPT_MISCONFIG_WX_VALUE;
+}
+
/*
* Returns true if A/D bits are supported in hardware and are enabled by KVM.
* When enabled, KVM uses A/D bits for all non-nested MMUs. Because L1 can
diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
index fae559559a80..2880fd392e0c 100644
--- a/arch/x86/kvm/mmu/tdp_iter.h
+++ b/arch/x86/kvm/mmu/tdp_iter.h
@@ -21,11 +21,13 @@ static inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep)
static inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte)
{
+ KVM_MMU_WARN_ON(is_ept_ve_possible(new_spte));
return xchg(rcu_dereference(sptep), new_spte);
}
static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
{
+ KVM_MMU_WARN_ON(is_ept_ve_possible(new_spte));
WRITE_ONCE(*rcu_dereference(sptep), new_spte);
}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 1259dd63defc..36539c1b36cd 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -626,7 +626,7 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
* SPTEs.
*/
handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
- 0, iter->level, true);
+ SHADOW_NONPRESENT_VALUE, iter->level, true);
return 0;
}
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 0623cfaa7bb0..95095a233a45 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -779,6 +779,14 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
*/
fpstate_set_confidential(&vcpu->arch.guest_fpu);
vcpu->arch.guest_state_protected = true;
+
+ /*
+ * SEV-ES guest mandates LBR Virtualization to be _always_ ON. Enable it
+ * only after setting guest_state_protected because KVM_SET_MSRS allows
+ * dynamic toggling of LBRV (for performance reason) on write access to
+ * MSR_IA32_DEBUGCTLMSR when guest_state_protected is not set.
+ */
+ svm_enable_lbrv(vcpu);
return 0;
}
@@ -2406,6 +2414,12 @@ void __init sev_hardware_setup(void)
if (!boot_cpu_has(X86_FEATURE_SEV_ES))
goto out;
+ if (!lbrv) {
+ WARN_ONCE(!boot_cpu_has(X86_FEATURE_LBRV),
+ "LBRV must be present for SEV-ES support");
+ goto out;
+ }
+
/* Has the system been allocated ASIDs for SEV-ES? */
if (min_sev_asid == 1)
goto out;
@@ -3216,7 +3230,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
struct kvm_vcpu *vcpu = &svm->vcpu;
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
- svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
/*
* An SEV-ES guest requires a VMSA area that is a separate from the
@@ -3268,10 +3281,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
/* Clear intercepts on selected MSRs */
set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
- set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
}
void sev_init_vmcb(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c8dc25886c16..c95d3900fe56 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -99,6 +99,7 @@ static const struct svm_direct_access_msrs {
{ .index = MSR_IA32_SPEC_CTRL, .always = false },
{ .index = MSR_IA32_PRED_CMD, .always = false },
{ .index = MSR_IA32_FLUSH_CMD, .always = false },
+ { .index = MSR_IA32_DEBUGCTLMSR, .always = false },
{ .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
{ .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
{ .index = MSR_IA32_LASTINTFROMIP, .always = false },
@@ -215,7 +216,7 @@ int vgif = true;
module_param(vgif, int, 0444);
/* enable/disable LBR virtualization */
-static int lbrv = true;
+int lbrv = true;
module_param(lbrv, int, 0444);
static int tsc_scaling = true;
@@ -990,7 +991,7 @@ void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
vmcb_mark_dirty(to_vmcb, VMCB_LBR);
}
-static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
+void svm_enable_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1000,6 +1001,9 @@ static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
+ if (sev_es_guest(vcpu->kvm))
+ set_msr_interception(vcpu, svm->msrpm, MSR_IA32_DEBUGCTLMSR, 1, 1);
+
/* Move the LBR msrs to the vmcb02 so that the guest can see them. */
if (is_guest_mode(vcpu))
svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
@@ -1009,6 +1013,8 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm);
+
svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
@@ -2822,10 +2828,24 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr)
return 0;
}
+static bool
+sev_es_prevent_msr_access(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+ return sev_es_guest(vcpu->kvm) &&
+ vcpu->arch.guest_state_protected &&
+ svm_msrpm_offset(msr_info->index) != MSR_INVALID &&
+ !msr_write_intercepted(vcpu, msr_info->index);
+}
+
static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ if (sev_es_prevent_msr_access(vcpu, msr_info)) {
+ msr_info->data = 0;
+ return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0;
+ }
+
switch (msr_info->index) {
case MSR_AMD64_TSC_RATIO:
if (!msr_info->host_initiated &&
@@ -2976,6 +2996,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
u32 ecx = msr->index;
u64 data = msr->data;
+
+ if (sev_es_prevent_msr_access(vcpu, msr))
+ return vcpu->kvm->arch.has_protected_state ? -EINVAL : 0;
+
switch (ecx) {
case MSR_AMD64_TSC_RATIO:
@@ -3846,16 +3870,27 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
/*
- * KVM should never request an NMI window when vNMI is enabled, as KVM
- * allows at most one to-be-injected NMI and one pending NMI, i.e. if
- * two NMIs arrive simultaneously, KVM will inject one and set
- * V_NMI_PENDING for the other. WARN, but continue with the standard
- * single-step approach to try and salvage the pending NMI.
+ * If NMIs are outright masked, i.e. the vCPU is already handling an
+ * NMI, and KVM has not yet intercepted an IRET, then there is nothing
+ * more to do at this time as KVM has already enabled IRET intercepts.
+ * If KVM has already intercepted IRET, then single-step over the IRET,
+ * as NMIs aren't architecturally unmasked until the IRET completes.
+ *
+ * If vNMI is enabled, KVM should never request an NMI window if NMIs
+ * are masked, as KVM allows at most one to-be-injected NMI and one
+ * pending NMI. If two NMIs arrive simultaneously, KVM will inject one
+ * NMI and set V_NMI_PENDING for the other, but if and only if NMIs are
+ * unmasked. KVM _will_ request an NMI window in some situations, e.g.
+ * if the vCPU is in an STI shadow or if GIF=0, KVM can't immediately
+ * inject the NMI. In those situations, KVM needs to single-step over
+ * the STI shadow or intercept STGI.
*/
- WARN_ON_ONCE(is_vnmi_enabled(svm));
+ if (svm_get_nmi_mask(vcpu)) {
+ WARN_ON_ONCE(is_vnmi_enabled(svm));
- if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion)
- return; /* IRET will cause a vm exit */
+ if (!svm->awaiting_iret_completion)
+ return; /* IRET will cause a vm exit */
+ }
/*
* SEV-ES guests are responsible for signaling when a vCPU is ready to
@@ -5265,6 +5300,12 @@ static __init int svm_hardware_setup(void)
nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS);
+ if (lbrv) {
+ if (!boot_cpu_has(X86_FEATURE_LBRV))
+ lbrv = false;
+ else
+ pr_info("LBR virtualization supported\n");
+ }
/*
* Note, SEV setup consumes npt_enabled and enable_mmio_caching (which
* may be modified by svm_adjust_mmio_mask()), as well as nrips.
@@ -5318,14 +5359,6 @@ static __init int svm_hardware_setup(void)
svm_x86_ops.set_vnmi_pending = NULL;
}
-
- if (lbrv) {
- if (!boot_cpu_has(X86_FEATURE_LBRV))
- lbrv = false;
- else
- pr_info("LBR virtualization supported\n");
- }
-
if (!enable_pmu)
pr_info("PMU virtualization is disabled\n");
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index be57213cd295..0f1472690b59 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -30,7 +30,7 @@
#define IOPM_SIZE PAGE_SIZE * 3
#define MSRPM_SIZE PAGE_SIZE * 2
-#define MAX_DIRECT_ACCESS_MSRS 47
+#define MAX_DIRECT_ACCESS_MSRS 48
#define MSRPM_OFFSETS 32
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
@@ -39,6 +39,7 @@ extern int vgif;
extern bool intercept_smi;
extern bool x2avic_enabled;
extern bool vnmi;
+extern int lbrv;
/*
* Clean bits in VMCB.
@@ -552,6 +553,7 @@ u32 *svm_vcpu_alloc_msrpm(void);
void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm);
void svm_vcpu_free_msrpm(u32 *msrpm);
void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
+void svm_enable_lbrv(struct kvm_vcpu *vcpu);
void svm_update_lbrv(struct kvm_vcpu *vcpu);
int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index d5b832126e34..643935a0f70a 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2242,6 +2242,9 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
vmcs_write64(EPT_POINTER,
construct_eptp(&vmx->vcpu, 0, PT64_ROOT_4LEVEL));
+ if (vmx->ve_info)
+ vmcs_write64(VE_INFORMATION_ADDRESS, __pa(vmx->ve_info));
+
/* All VMFUNCs are currently emulated through L0 vmexits. */
if (cpu_has_vmx_vmfunc())
vmcs_write64(VM_FUNCTION_CONTROL, 0);
@@ -6230,6 +6233,8 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
else if (is_alignment_check(intr_info) &&
!vmx_guest_inject_ac(vcpu))
return true;
+ else if (is_ve_fault(intr_info))
+ return true;
return false;
case EXIT_REASON_EXTERNAL_INTERRUPT:
return true;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6051fad5945f..b3c83c06f826 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5218,8 +5218,15 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
if (is_invalid_opcode(intr_info))
return handle_ud(vcpu);
- if (KVM_BUG_ON(is_ve_fault(intr_info), vcpu->kvm))
- return -EIO;
+ if (WARN_ON_ONCE(is_ve_fault(intr_info))) {
+ struct vmx_ve_information *ve_info = vmx->ve_info;
+
+ WARN_ONCE(ve_info->exit_reason != EXIT_REASON_EPT_VIOLATION,
+ "Unexpected #VE on VM-Exit reason 0x%x", ve_info->exit_reason);
+ dump_vmcs(vcpu);
+ kvm_mmu_print_sptes(vcpu, ve_info->guest_physical_address, "#VE");
+ return 1;
+ }
error_code = 0;
if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 082ac6d95a3a..0763a0f72a06 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -164,15 +164,6 @@ module_param(kvmclock_periodic_sync, bool, 0444);
static u32 __read_mostly tsc_tolerance_ppm = 250;
module_param(tsc_tolerance_ppm, uint, 0644);
-/*
- * lapic timer advance (tscdeadline mode only) in nanoseconds. '-1' enables
- * adaptive tuning starting from default advancement of 1000ns. '0' disables
- * advancement entirely. Any other value is used as-is and disables adaptive
- * tuning, i.e. allows privileged userspace to set an exact advancement time.
- */
-static int __read_mostly lapic_timer_advance_ns = -1;
-module_param(lapic_timer_advance_ns, int, 0644);
-
static bool __read_mostly vector_hashing = true;
module_param(vector_hashing, bool, 0444);
@@ -10727,13 +10718,12 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
+ static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
+
if (irqchip_split(vcpu->kvm))
kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
- else {
- static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
- if (ioapic_in_kernel(vcpu->kvm))
- kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
- }
+ else if (ioapic_in_kernel(vcpu->kvm))
+ kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
if (is_guest_mode(vcpu))
vcpu->arch.load_eoi_exitmap_pending = true;
@@ -12169,7 +12159,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (r < 0)
return r;
- r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
+ r = kvm_create_lapic(vcpu);
if (r < 0)
goto fail_mmu_destroy;
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 10d5ed8b5990..a1cb3a4e6742 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -44,7 +44,11 @@
or %rdx, %rax
.else
cmp $TASK_SIZE_MAX-\size+1, %eax
+.if \size != 8
jae .Lbad_get_user
+.else
+ jae .Lbad_get_user_8
+.endif
sbb %edx, %edx /* array_index_mask_nospec() */
and %edx, %eax
.endif
@@ -154,7 +158,7 @@ SYM_CODE_END(__get_user_handle_exception)
#ifdef CONFIG_X86_32
SYM_CODE_START_LOCAL(__get_user_8_handle_exception)
ASM_CLAC
-bad_get_user_8:
+.Lbad_get_user_8:
xor %edx,%edx
xor %ecx,%ecx
mov $(-EFAULT),%_ASM_AX
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index ce84ba86e69e..6ce10e3c6228 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -493,7 +493,7 @@ static void __init numa_clear_kernel_node_hotplug(void)
for_each_reserved_mem_region(mb_region) {
int nid = memblock_get_region_node(mb_region);
- if (nid != MAX_NUMNODES)
+ if (nid != NUMA_NO_NODE)
node_set(nid, reserved_nodemask);
}
@@ -614,9 +614,9 @@ static int __init numa_init(int (*init_func)(void))
nodes_clear(node_online_map);
memset(&numa_meminfo, 0, sizeof(numa_meminfo));
WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.memory,
- MAX_NUMNODES));
+ NUMA_NO_NODE));
WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.reserved,
- MAX_NUMNODES));
+ NUMA_NO_NODE));
/* In case that parsing SRAT failed. */
WARN_ON(memblock_clear_hotplug(0, ULLONG_MAX));
numa_reset_distance();
diff --git a/arch/x86/platform/efi/memmap.c b/arch/x86/platform/efi/memmap.c
index 4ef20b49eb5e..6ed1935504b9 100644
--- a/arch/x86/platform/efi/memmap.c
+++ b/arch/x86/platform/efi/memmap.c
@@ -92,12 +92,22 @@ int __init efi_memmap_alloc(unsigned int num_entries,
*/
int __init efi_memmap_install(struct efi_memory_map_data *data)
{
+ unsigned long size = efi.memmap.desc_size * efi.memmap.nr_map;
+ unsigned long flags = efi.memmap.flags;
+ u64 phys = efi.memmap.phys_map;
+ int ret;
+
efi_memmap_unmap();
if (efi_enabled(EFI_PARAVIRT))
return 0;
- return __efi_memmap_init(data);
+ ret = __efi_memmap_init(data);
+ if (ret)
+ return ret;
+
+ __efi_memmap_free(phys, size, flags);
+ return 0;
}
/**