summaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/Kconfig26
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi36
-rw-r--r--arch/arm64/boot/dts/rockchip/rk356x.dtsi7
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-base.dtsi6
-rw-r--r--arch/arm64/crypto/poly1305-armv8.pl6
-rw-r--r--arch/arm64/include/asm/arm_pmuv3.h53
-rw-r--r--arch/arm64/include/asm/cpufeature.h6
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/el2_setup.h25
-rw-r--r--arch/arm64/include/asm/esr.h93
-rw-r--r--arch/arm64/include/asm/fpsimd.h2
-rw-r--r--arch/arm64/include/asm/hwcap.h1
-rw-r--r--arch/arm64/include/asm/hypervisor.h11
-rw-r--r--arch/arm64/include/asm/io.h4
-rw-r--r--arch/arm64/include/asm/kvm_arm.h1
-rw-r--r--arch/arm64/include/asm/kvm_asm.h6
-rw-r--r--arch/arm64/include/asm/kvm_host.h34
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h6
-rw-r--r--arch/arm64/include/asm/kvm_nested.h40
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h42
-rw-r--r--arch/arm64/include/asm/mem_encrypt.h15
-rw-r--r--arch/arm64/include/asm/memory.h2
-rw-r--r--arch/arm64/include/asm/mman.h10
-rw-r--r--arch/arm64/include/asm/mmu.h2
-rw-r--r--arch/arm64/include/asm/mmu_context.h46
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h20
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h8
-rw-r--r--arch/arm64/include/asm/pgtable.h34
-rw-r--r--arch/arm64/include/asm/pkeys.h106
-rw-r--r--arch/arm64/include/asm/por.h33
-rw-r--r--arch/arm64/include/asm/processor.h6
-rw-r--r--arch/arm64/include/asm/ptdump.h43
-rw-r--r--arch/arm64/include/asm/set_memory.h1
-rw-r--r--arch/arm64/include/asm/sysreg.h26
-rw-r--r--arch/arm64/include/asm/thread_info.h2
-rw-r--r--arch/arm64/include/asm/traps.h1
-rw-r--r--arch/arm64/include/asm/vncr_mapping.h1
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h1
-rw-r--r--arch/arm64/include/uapi/asm/mman.h9
-rw-r--r--arch/arm64/include/uapi/asm/sigcontext.h13
-rw-r--r--arch/arm64/kernel/cpu_errata.c10
-rw-r--r--arch/arm64/kernel/cpufeature.c23
-rw-r--r--arch/arm64/kernel/cpuinfo.c3
-rw-r--r--arch/arm64/kernel/hibernate.c2
-rw-r--r--arch/arm64/kernel/pci.c191
-rw-r--r--arch/arm64/kernel/process.c97
-rw-r--r--arch/arm64/kernel/ptrace.c46
-rw-r--r--arch/arm64/kernel/signal.c62
-rw-r--r--arch/arm64/kernel/smp.c160
-rw-r--r--arch/arm64/kernel/traps.c26
-rw-r--r--arch/arm64/kvm/Kconfig17
-rw-r--r--arch/arm64/kvm/Makefile3
-rw-r--r--arch/arm64/kvm/arm.c15
-rw-r--r--arch/arm64/kvm/at.c1101
-rw-r--r--arch/arm64/kvm/emulate-nested.c81
-rw-r--r--arch/arm64/kvm/fpsimd.c5
-rw-r--r--arch/arm64/kvm/guest.c6
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/fault.h5
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h3
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h27
-rw-r--r--arch/arm64/kvm/hyp/nvhe/ffa.c21
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-init.S2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c9
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c9
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c6
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c48
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c97
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c3
-rw-r--r--arch/arm64/kvm/nested.c55
-rw-r--r--arch/arm64/kvm/pmu-emul.c14
-rw-r--r--arch/arm64/kvm/pmu.c87
-rw-r--r--arch/arm64/kvm/ptdump.c268
-rw-r--r--arch/arm64/kvm/sys_regs.c422
-rw-r--r--arch/arm64/kvm/sys_regs.h23
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c12
-rw-r--r--arch/arm64/kvm/vgic/vgic.c14
-rw-r--r--arch/arm64/kvm/vgic/vgic.h6
-rw-r--r--arch/arm64/mm/Makefile2
-rw-r--r--arch/arm64/mm/contpte.c6
-rw-r--r--arch/arm64/mm/fault.c55
-rw-r--r--arch/arm64/mm/init.c12
-rw-r--r--arch/arm64/mm/ioremap.c23
-rw-r--r--arch/arm64/mm/mem_encrypt.c50
-rw-r--r--arch/arm64/mm/mmap.c11
-rw-r--r--arch/arm64/mm/mmu.c45
-rw-r--r--arch/arm64/mm/proc.S4
-rw-r--r--arch/arm64/mm/ptdump.c70
-rw-r--r--arch/arm64/mm/trans_pgd.c6
-rw-r--r--arch/arm64/tools/cpucaps1
-rw-r--r--arch/arm64/tools/sysreg30
91 files changed, 3299 insertions, 783 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a2f8ff354ca6..ed15b876fa74 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -34,6 +34,7 @@ config ARM64
select ARCH_HAS_KERNEL_FPU_SUPPORT if KERNEL_MODE_NEON
select ARCH_HAS_KEEPINITRD
select ARCH_HAS_MEMBARRIER_SYNC_CORE
+ select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_DEVMAP
@@ -423,7 +424,7 @@ config AMPERE_ERRATUM_AC03_CPU_38
default y
help
This option adds an alternative code sequence to work around Ampere
- erratum AC03_CPU_38 on AmpereOne.
+ errata AC03_CPU_38 and AC04_CPU_10 on AmpereOne.
The affected design reports FEAT_HAFDBS as not implemented in
ID_AA64MMFR1_EL1.HAFDBS, but (V)TCR_ELx.{HA,HD} are not RES0
@@ -2137,6 +2138,29 @@ config ARM64_EPAN
if the cpu does not implement the feature.
endmenu # "ARMv8.7 architectural features"
+menu "ARMv8.9 architectural features"
+
+config ARM64_POE
+ prompt "Permission Overlay Extension"
+ def_bool y
+ select ARCH_USES_HIGH_VMA_FLAGS
+ select ARCH_HAS_PKEYS
+ help
+ The Permission Overlay Extension is used to implement Memory
+ Protection Keys. Memory Protection Keys provides a mechanism for
+ enforcing page-based protections, but without requiring modification
+ of the page tables when an application changes protection domains.
+
+ For details, see Documentation/core-api/protection-keys.rst
+
+ If unsure, say y.
+
+config ARCH_PKEY_BITS
+ int
+ default 3
+
+endmenu # "ARMv8.9 architectural features"
+
config ARM64_SVE
bool "ARM Scalable Vector Extension support"
default y
diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts
index a608a219543e..3e08e2fd0a78 100644
--- a/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3328-rock-pi-e.dts
@@ -387,7 +387,7 @@
pmic {
pmic_int_l: pmic-int-l {
- rockchip,pins = <2 RK_PA6 RK_FUNC_GPIO &pcfg_pull_up>;
+ rockchip,pins = <0 RK_PA2 RK_FUNC_GPIO &pcfg_pull_up>;
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
index ccbe3a7a1d2c..d24444cdf54a 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
@@ -154,6 +154,22 @@
};
};
+&gpio3 {
+ /*
+ * The Qseven BIOS_DISABLE signal on the RK3399-Q7 keeps the on-module
+ * eMMC and SPI flash powered-down initially (in fact it keeps the
+ * reset signal asserted). BIOS_DISABLE_OVERRIDE pin allows to override
+ * that signal so that eMMC and SPI can be used regardless of the state
+ * of the signal.
+ */
+ bios-disable-override-hog {
+ gpios = <RK_PD5 GPIO_ACTIVE_LOW>;
+ gpio-hog;
+ line-name = "bios_disable_override";
+ output-high;
+ };
+};
+
&gmac {
assigned-clocks = <&cru SCLK_RMII_SRC>;
assigned-clock-parents = <&clkin_gmac>;
@@ -409,6 +425,7 @@
&i2s0 {
pinctrl-0 = <&i2s0_2ch_bus>;
+ pinctrl-1 = <&i2s0_2ch_bus_bclk_off>;
rockchip,playback-channels = <2>;
rockchip,capture-channels = <2>;
status = "okay";
@@ -417,8 +434,8 @@
/*
* As Q7 does not specify neither a global nor a RX clock for I2S these
* signals are not used. Furthermore I2S0_LRCK_RX is used as GPIO.
- * Therefore we have to redefine the i2s0_2ch_bus definition to prevent
- * conflicts.
+ * Therefore we have to redefine the i2s0_2ch_bus and i2s0_2ch_bus_bclk_off
+ * definitions to prevent conflicts.
*/
&i2s0_2ch_bus {
rockchip,pins =
@@ -428,6 +445,14 @@
<3 RK_PD7 1 &pcfg_pull_none>;
};
+&i2s0_2ch_bus_bclk_off {
+ rockchip,pins =
+ <3 RK_PD0 RK_FUNC_GPIO &pcfg_pull_none>,
+ <3 RK_PD2 1 &pcfg_pull_none>,
+ <3 RK_PD3 1 &pcfg_pull_none>,
+ <3 RK_PD7 1 &pcfg_pull_none>;
+};
+
&io_domains {
status = "okay";
bt656-supply = <&vcc_1v8>;
@@ -449,9 +474,14 @@
&pinctrl {
pinctrl-names = "default";
- pinctrl-0 = <&q7_thermal_pin>;
+ pinctrl-0 = <&q7_thermal_pin &bios_disable_override_hog_pin>;
gpios {
+ bios_disable_override_hog_pin: bios-disable-override-hog-pin {
+ rockchip,pins =
+ <3 RK_PD5 RK_FUNC_GPIO &pcfg_pull_down>;
+ };
+
q7_thermal_pin: q7-thermal-pin {
rockchip,pins =
<0 RK_PA3 RK_FUNC_GPIO &pcfg_pull_up>;
diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
index 4690be841a1c..c72b3a608edd 100644
--- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi
@@ -1592,10 +1592,9 @@
<&cru SRST_TSADCPHY>;
rockchip,grf = <&grf>;
rockchip,hw-tshut-temp = <95000>;
- pinctrl-names = "init", "default", "sleep";
- pinctrl-0 = <&tsadc_pin>;
- pinctrl-1 = <&tsadc_shutorg>;
- pinctrl-2 = <&tsadc_pin>;
+ pinctrl-names = "default", "sleep";
+ pinctrl-0 = <&tsadc_shutorg>;
+ pinctrl-1 = <&tsadc_pin>;
#thermal-sensor-cells = <1>;
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi
index b6e4df180f0b..ee99166ebd46 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi
@@ -582,14 +582,14 @@
};
vo0_grf: syscon@fd5a6000 {
- compatible = "rockchip,rk3588-vo-grf", "syscon";
+ compatible = "rockchip,rk3588-vo0-grf", "syscon";
reg = <0x0 0xfd5a6000 0x0 0x2000>;
clocks = <&cru PCLK_VO0GRF>;
};
vo1_grf: syscon@fd5a8000 {
- compatible = "rockchip,rk3588-vo-grf", "syscon";
- reg = <0x0 0xfd5a8000 0x0 0x100>;
+ compatible = "rockchip,rk3588-vo1-grf", "syscon";
+ reg = <0x0 0xfd5a8000 0x0 0x4000>;
clocks = <&cru PCLK_VO1GRF>;
};
diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/crypto/poly1305-armv8.pl
index cbc980fb02e3..22c9069c0650 100644
--- a/arch/arm64/crypto/poly1305-armv8.pl
+++ b/arch/arm64/crypto/poly1305-armv8.pl
@@ -473,7 +473,8 @@ poly1305_blocks_neon:
subs $len,$len,#64
ldp x9,x13,[$inp,#48]
add $in2,$inp,#96
- adr $zeros,.Lzeros
+ adrp $zeros,.Lzeros
+ add $zeros,$zeros,#:lo12:.Lzeros
lsl $padbit,$padbit,#24
add x15,$ctx,#48
@@ -885,10 +886,13 @@ poly1305_blocks_neon:
ret
.size poly1305_blocks_neon,.-poly1305_blocks_neon
+.pushsection .rodata
.align 5
.Lzeros:
.long 0,0,0,0,0,0,0,0
.asciz "Poly1305 for ARMv8, CRYPTOGAMS by \@dot-asm"
+.popsection
+
.align 2
#if !defined(__KERNEL__) && !defined(_WIN64)
.comm OPENSSL_armcap_P,4,4
diff --git a/arch/arm64/include/asm/arm_pmuv3.h b/arch/arm64/include/asm/arm_pmuv3.h
index a4697a0b6835..468a049bc63b 100644
--- a/arch/arm64/include/asm/arm_pmuv3.h
+++ b/arch/arm64/include/asm/arm_pmuv3.h
@@ -33,6 +33,14 @@ static inline void write_pmevtypern(int n, unsigned long val)
PMEVN_SWITCH(n, WRITE_PMEVTYPERN);
}
+#define RETURN_READ_PMEVTYPERN(n) \
+ return read_sysreg(pmevtyper##n##_el0)
+static inline unsigned long read_pmevtypern(int n)
+{
+ PMEVN_SWITCH(n, RETURN_READ_PMEVTYPERN);
+ return 0;
+}
+
static inline unsigned long read_pmmir(void)
{
return read_cpuid(PMMIR_EL1);
@@ -46,6 +54,14 @@ static inline u32 read_pmuver(void)
ID_AA64DFR0_EL1_PMUVer_SHIFT);
}
+static inline bool pmuv3_has_icntr(void)
+{
+ u64 dfr1 = read_sysreg(id_aa64dfr1_el1);
+
+ return !!cpuid_feature_extract_unsigned_field(dfr1,
+ ID_AA64DFR1_EL1_PMICNTR_SHIFT);
+}
+
static inline void write_pmcr(u64 val)
{
write_sysreg(val, pmcr_el0);
@@ -71,22 +87,32 @@ static inline u64 read_pmccntr(void)
return read_sysreg(pmccntr_el0);
}
-static inline void write_pmcntenset(u32 val)
+static inline void write_pmicntr(u64 val)
+{
+ write_sysreg_s(val, SYS_PMICNTR_EL0);
+}
+
+static inline u64 read_pmicntr(void)
+{
+ return read_sysreg_s(SYS_PMICNTR_EL0);
+}
+
+static inline void write_pmcntenset(u64 val)
{
write_sysreg(val, pmcntenset_el0);
}
-static inline void write_pmcntenclr(u32 val)
+static inline void write_pmcntenclr(u64 val)
{
write_sysreg(val, pmcntenclr_el0);
}
-static inline void write_pmintenset(u32 val)
+static inline void write_pmintenset(u64 val)
{
write_sysreg(val, pmintenset_el1);
}
-static inline void write_pmintenclr(u32 val)
+static inline void write_pmintenclr(u64 val)
{
write_sysreg(val, pmintenclr_el1);
}
@@ -96,12 +122,27 @@ static inline void write_pmccfiltr(u64 val)
write_sysreg(val, pmccfiltr_el0);
}
-static inline void write_pmovsclr(u32 val)
+static inline u64 read_pmccfiltr(void)
+{
+ return read_sysreg(pmccfiltr_el0);
+}
+
+static inline void write_pmicfiltr(u64 val)
+{
+ write_sysreg_s(val, SYS_PMICFILTR_EL0);
+}
+
+static inline u64 read_pmicfiltr(void)
+{
+ return read_sysreg_s(SYS_PMICFILTR_EL0);
+}
+
+static inline void write_pmovsclr(u64 val)
{
write_sysreg(val, pmovsclr_el0);
}
-static inline u32 read_pmovsclr(void)
+static inline u64 read_pmovsclr(void)
{
return read_sysreg(pmovsclr_el0);
}
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 558434267271..3d261cc123c1 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -832,6 +832,12 @@ static inline bool system_supports_lpa2(void)
return cpus_have_final_cap(ARM64_HAS_LPA2);
}
+static inline bool system_supports_poe(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_POE) &&
+ alternative_has_cap_unlikely(ARM64_HAS_S1POE);
+}
+
int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
bool try_emulate_mrs(struct pt_regs *regs, u32 isn);
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 5fd7caea4419..5a7dfeb8e8eb 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -143,6 +143,7 @@
#define APPLE_CPU_PART_M2_AVALANCHE_MAX 0x039
#define AMPERE_CPU_PART_AMPERE1 0xAC3
+#define AMPERE_CPU_PART_AMPERE1A 0xAC4
#define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */
@@ -212,6 +213,7 @@
#define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)
#define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX)
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
+#define MIDR_AMPERE1A MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1A)
#define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index fd87c4b8f984..e0ffdf13a18b 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -165,42 +165,53 @@
mrs x1, id_aa64dfr0_el1
ubfx x1, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4
cmp x1, #3
- b.lt .Lset_debug_fgt_\@
+ b.lt .Lskip_spe_fgt_\@
/* Disable PMSNEVFR_EL1 read and write traps */
orr x0, x0, #(1 << 62)
-.Lset_debug_fgt_\@:
+.Lskip_spe_fgt_\@:
msr_s SYS_HDFGRTR_EL2, x0
msr_s SYS_HDFGWTR_EL2, x0
mov x0, xzr
mrs x1, id_aa64pfr1_el1
ubfx x1, x1, #ID_AA64PFR1_EL1_SME_SHIFT, #4
- cbz x1, .Lset_pie_fgt_\@
+ cbz x1, .Lskip_debug_fgt_\@
/* Disable nVHE traps of TPIDR2 and SMPRI */
orr x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK
orr x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK
-.Lset_pie_fgt_\@:
+.Lskip_debug_fgt_\@:
mrs_s x1, SYS_ID_AA64MMFR3_EL1
ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4
- cbz x1, .Lset_fgt_\@
+ cbz x1, .Lskip_pie_fgt_\@
/* Disable trapping of PIR_EL1 / PIRE0_EL1 */
orr x0, x0, #HFGxTR_EL2_nPIR_EL1
orr x0, x0, #HFGxTR_EL2_nPIRE0_EL1
-.Lset_fgt_\@:
+.Lskip_pie_fgt_\@:
+ mrs_s x1, SYS_ID_AA64MMFR3_EL1
+ ubfx x1, x1, #ID_AA64MMFR3_EL1_S1POE_SHIFT, #4
+ cbz x1, .Lskip_poe_fgt_\@
+
+ /* Disable trapping of POR_EL0 */
+ orr x0, x0, #HFGxTR_EL2_nPOR_EL0
+
+.Lskip_poe_fgt_\@:
msr_s SYS_HFGRTR_EL2, x0
msr_s SYS_HFGWTR_EL2, x0
msr_s SYS_HFGITR_EL2, xzr
mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU
ubfx x1, x1, #ID_AA64PFR0_EL1_AMU_SHIFT, #4
- cbz x1, .Lskip_fgt_\@
+ cbz x1, .Lskip_amu_fgt_\@
msr_s SYS_HAFGRTR_EL2, xzr
+
+.Lskip_amu_fgt_\@:
+
.Lskip_fgt_\@:
.endm
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 56c148890daf..da6d2c1c0b03 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -10,63 +10,63 @@
#include <asm/memory.h>
#include <asm/sysreg.h>
-#define ESR_ELx_EC_UNKNOWN (0x00)
-#define ESR_ELx_EC_WFx (0x01)
+#define ESR_ELx_EC_UNKNOWN UL(0x00)
+#define ESR_ELx_EC_WFx UL(0x01)
/* Unallocated EC: 0x02 */
-#define ESR_ELx_EC_CP15_32 (0x03)
-#define ESR_ELx_EC_CP15_64 (0x04)
-#define ESR_ELx_EC_CP14_MR (0x05)
-#define ESR_ELx_EC_CP14_LS (0x06)
-#define ESR_ELx_EC_FP_ASIMD (0x07)
-#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */
-#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */
+#define ESR_ELx_EC_CP15_32 UL(0x03)
+#define ESR_ELx_EC_CP15_64 UL(0x04)
+#define ESR_ELx_EC_CP14_MR UL(0x05)
+#define ESR_ELx_EC_CP14_LS UL(0x06)
+#define ESR_ELx_EC_FP_ASIMD UL(0x07)
+#define ESR_ELx_EC_CP10_ID UL(0x08) /* EL2 only */
+#define ESR_ELx_EC_PAC UL(0x09) /* EL2 and above */
/* Unallocated EC: 0x0A - 0x0B */
-#define ESR_ELx_EC_CP14_64 (0x0C)
-#define ESR_ELx_EC_BTI (0x0D)
-#define ESR_ELx_EC_ILL (0x0E)
+#define ESR_ELx_EC_CP14_64 UL(0x0C)
+#define ESR_ELx_EC_BTI UL(0x0D)
+#define ESR_ELx_EC_ILL UL(0x0E)
/* Unallocated EC: 0x0F - 0x10 */
-#define ESR_ELx_EC_SVC32 (0x11)
-#define ESR_ELx_EC_HVC32 (0x12) /* EL2 only */
-#define ESR_ELx_EC_SMC32 (0x13) /* EL2 and above */
+#define ESR_ELx_EC_SVC32 UL(0x11)
+#define ESR_ELx_EC_HVC32 UL(0x12) /* EL2 only */
+#define ESR_ELx_EC_SMC32 UL(0x13) /* EL2 and above */
/* Unallocated EC: 0x14 */
-#define ESR_ELx_EC_SVC64 (0x15)
-#define ESR_ELx_EC_HVC64 (0x16) /* EL2 and above */
-#define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */
-#define ESR_ELx_EC_SYS64 (0x18)
-#define ESR_ELx_EC_SVE (0x19)
-#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */
+#define ESR_ELx_EC_SVC64 UL(0x15)
+#define ESR_ELx_EC_HVC64 UL(0x16) /* EL2 and above */
+#define ESR_ELx_EC_SMC64 UL(0x17) /* EL2 and above */
+#define ESR_ELx_EC_SYS64 UL(0x18)
+#define ESR_ELx_EC_SVE UL(0x19)
+#define ESR_ELx_EC_ERET UL(0x1a) /* EL2 only */
/* Unallocated EC: 0x1B */
-#define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */
-#define ESR_ELx_EC_SME (0x1D)
+#define ESR_ELx_EC_FPAC UL(0x1C) /* EL1 and above */
+#define ESR_ELx_EC_SME UL(0x1D)
/* Unallocated EC: 0x1E */
-#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */
-#define ESR_ELx_EC_IABT_LOW (0x20)
-#define ESR_ELx_EC_IABT_CUR (0x21)
-#define ESR_ELx_EC_PC_ALIGN (0x22)
+#define ESR_ELx_EC_IMP_DEF UL(0x1f) /* EL3 only */
+#define ESR_ELx_EC_IABT_LOW UL(0x20)
+#define ESR_ELx_EC_IABT_CUR UL(0x21)
+#define ESR_ELx_EC_PC_ALIGN UL(0x22)
/* Unallocated EC: 0x23 */
-#define ESR_ELx_EC_DABT_LOW (0x24)
-#define ESR_ELx_EC_DABT_CUR (0x25)
-#define ESR_ELx_EC_SP_ALIGN (0x26)
-#define ESR_ELx_EC_MOPS (0x27)
-#define ESR_ELx_EC_FP_EXC32 (0x28)
+#define ESR_ELx_EC_DABT_LOW UL(0x24)
+#define ESR_ELx_EC_DABT_CUR UL(0x25)
+#define ESR_ELx_EC_SP_ALIGN UL(0x26)
+#define ESR_ELx_EC_MOPS UL(0x27)
+#define ESR_ELx_EC_FP_EXC32 UL(0x28)
/* Unallocated EC: 0x29 - 0x2B */
-#define ESR_ELx_EC_FP_EXC64 (0x2C)
+#define ESR_ELx_EC_FP_EXC64 UL(0x2C)
/* Unallocated EC: 0x2D - 0x2E */
-#define ESR_ELx_EC_SERROR (0x2F)
-#define ESR_ELx_EC_BREAKPT_LOW (0x30)
-#define ESR_ELx_EC_BREAKPT_CUR (0x31)
-#define ESR_ELx_EC_SOFTSTP_LOW (0x32)
-#define ESR_ELx_EC_SOFTSTP_CUR (0x33)
-#define ESR_ELx_EC_WATCHPT_LOW (0x34)
-#define ESR_ELx_EC_WATCHPT_CUR (0x35)
+#define ESR_ELx_EC_SERROR UL(0x2F)
+#define ESR_ELx_EC_BREAKPT_LOW UL(0x30)
+#define ESR_ELx_EC_BREAKPT_CUR UL(0x31)
+#define ESR_ELx_EC_SOFTSTP_LOW UL(0x32)
+#define ESR_ELx_EC_SOFTSTP_CUR UL(0x33)
+#define ESR_ELx_EC_WATCHPT_LOW UL(0x34)
+#define ESR_ELx_EC_WATCHPT_CUR UL(0x35)
/* Unallocated EC: 0x36 - 0x37 */
-#define ESR_ELx_EC_BKPT32 (0x38)
+#define ESR_ELx_EC_BKPT32 UL(0x38)
/* Unallocated EC: 0x39 */
-#define ESR_ELx_EC_VECTOR32 (0x3A) /* EL2 only */
+#define ESR_ELx_EC_VECTOR32 UL(0x3A) /* EL2 only */
/* Unallocated EC: 0x3B */
-#define ESR_ELx_EC_BRK64 (0x3C)
+#define ESR_ELx_EC_BRK64 UL(0x3C)
/* Unallocated EC: 0x3D - 0x3F */
-#define ESR_ELx_EC_MAX (0x3F)
+#define ESR_ELx_EC_MAX UL(0x3F)
#define ESR_ELx_EC_SHIFT (26)
#define ESR_ELx_EC_WIDTH (6)
@@ -122,8 +122,8 @@
#define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n))
/* Status codes for individual page table levels */
-#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + n)
-#define ESR_ELx_FSC_PERM_L(n) (ESR_ELx_FSC_PERM + n)
+#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + (n))
+#define ESR_ELx_FSC_PERM_L(n) (ESR_ELx_FSC_PERM + (n))
#define ESR_ELx_FSC_FAULT_nL (0x2C)
#define ESR_ELx_FSC_FAULT_L(n) (((n) < 0 ? ESR_ELx_FSC_FAULT_nL : \
@@ -161,6 +161,7 @@
/* ISS field definitions for exceptions taken in to Hyp */
#define ESR_ELx_FSC_ADDRSZ (0x00)
+#define ESR_ELx_FSC_ADDRSZ_L(n) (ESR_ELx_FSC_ADDRSZ + (n))
#define ESR_ELx_CV (UL(1) << 24)
#define ESR_ELx_COND_SHIFT (20)
#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index bc69ac368d73..f2a84efc3618 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -155,8 +155,6 @@ extern void cpu_enable_sme2(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_fa64(const struct arm64_cpu_capabilities *__unused);
extern void cpu_enable_fpmr(const struct arm64_cpu_capabilities *__unused);
-extern u64 read_smcr_features(void);
-
/*
* Helpers to translate bit indices in sve_vq_map to VQ values (and
* vice versa). This allows find_next_bit() to be used to find the
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 4edd3b61df11..a775adddecf2 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -157,6 +157,7 @@
#define KERNEL_HWCAP_SME_SF8FMA __khwcap2_feature(SME_SF8FMA)
#define KERNEL_HWCAP_SME_SF8DP4 __khwcap2_feature(SME_SF8DP4)
#define KERNEL_HWCAP_SME_SF8DP2 __khwcap2_feature(SME_SF8DP2)
+#define KERNEL_HWCAP_POE __khwcap2_feature(POE)
/*
* This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/hypervisor.h b/arch/arm64/include/asm/hypervisor.h
index 0ae427f352c8..409e239834d1 100644
--- a/arch/arm64/include/asm/hypervisor.h
+++ b/arch/arm64/include/asm/hypervisor.h
@@ -7,4 +7,15 @@
void kvm_init_hyp_services(void);
bool kvm_arm_hyp_service_available(u32 func_id);
+#ifdef CONFIG_ARM_PKVM_GUEST
+void pkvm_init_hyp_services(void);
+#else
+static inline void pkvm_init_hyp_services(void) { };
+#endif
+
+static inline void kvm_arch_init_hyp_services(void)
+{
+ pkvm_init_hyp_services();
+};
+
#endif
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 41fd90895dfc..1ada23a6ec19 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -271,6 +271,10 @@ __iowrite64_copy(void __iomem *to, const void *from, size_t count)
* I/O memory mapping functions.
*/
+typedef int (*ioremap_prot_hook_t)(phys_addr_t phys_addr, size_t size,
+ pgprot_t *prot);
+int arm64_ioremap_prot_hook_register(const ioremap_prot_hook_t hook);
+
#define ioremap_prot ioremap_prot
#define _PAGE_IOREMAP PROT_DEVICE_nGnRE
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index d81cc746e0eb..109a85ee6910 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -107,6 +107,7 @@
/* TCR_EL2 Registers bits */
#define TCR_EL2_DS (1UL << 32)
#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
+#define TCR_EL2_HPD (1 << 24)
#define TCR_EL2_TBI (1 << 20)
#define TCR_EL2_PS_SHIFT 16
#define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 2181a11b9d92..b36a3b6cc011 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -10,6 +10,7 @@
#include <asm/hyp_image.h>
#include <asm/insn.h>
#include <asm/virt.h>
+#include <asm/sysreg.h>
#define ARM_EXIT_WITH_SERROR_BIT 31
#define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT))
@@ -235,6 +236,9 @@ extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding);
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
+extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
+extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
+extern void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
@@ -259,7 +263,7 @@ extern u64 __kvm_get_mdcr_el2(void);
asm volatile( \
" mrs %1, spsr_el2\n" \
" mrs %2, elr_el2\n" \
- "1: at "at_op", %3\n" \
+ "1: " __msr_s(at_op, "%3") "\n" \
" isb\n" \
" b 9f\n" \
"2: msr spsr_el2, %1\n" \
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a33f5996ca9f..329619c6fa96 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -446,6 +446,12 @@ enum vcpu_sysreg {
GCR_EL1, /* Tag Control Register */
TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
+ POR_EL0, /* Permission Overlay Register 0 (EL0) */
+
+ /* FP/SIMD/SVE */
+ SVCR,
+ FPMR,
+
/* 32bit specific registers. */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
@@ -517,6 +523,8 @@ enum vcpu_sysreg {
VNCR(PIR_EL1), /* Permission Indirection Register 1 (EL1) */
VNCR(PIRE0_EL1), /* Permission Indirection Register 0 (EL1) */
+ VNCR(POR_EL1), /* Permission Overlay Register 1 (EL1) */
+
VNCR(HFGRTR_EL2),
VNCR(HFGWTR_EL2),
VNCR(HFGITR_EL2),
@@ -530,6 +538,8 @@ enum vcpu_sysreg {
VNCR(CNTP_CVAL_EL0),
VNCR(CNTP_CTL_EL0),
+ VNCR(ICH_HCR_EL2),
+
NR_SYS_REGS /* Nothing after this line! */
};
@@ -595,6 +605,16 @@ struct kvm_host_data {
struct cpu_sve_state *sve_state;
};
+ union {
+ /* HYP VA pointer to the host storage for FPMR */
+ u64 *fpmr_ptr;
+ /*
+ * Used by pKVM only, as it needs to provide storage
+ * for the host
+ */
+ u64 fpmr;
+ };
+
/* Ownership of the FP regs */
enum {
FP_STATE_FREE,
@@ -664,8 +684,6 @@ struct kvm_vcpu_arch {
void *sve_state;
enum fp_type fp_type;
unsigned int sve_max_vl;
- u64 svcr;
- u64 fpmr;
/* Stage 2 paging state used by the hardware on next switch */
struct kvm_s2_mmu *hw_mmu;
@@ -1330,12 +1348,12 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
#ifdef CONFIG_KVM
-void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
-void kvm_clr_pmu_events(u32 clr);
+void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr);
+void kvm_clr_pmu_events(u64 clr);
bool kvm_set_pmuserenr(u64 val);
#else
-static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
-static inline void kvm_clr_pmu_events(u32 clr) {}
+static inline void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr) {}
+static inline void kvm_clr_pmu_events(u64 clr) {}
static inline bool kvm_set_pmuserenr(u64 val)
{
return false;
@@ -1473,4 +1491,8 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
(pa + pi + pa3) == 1; \
})
+#define kvm_has_fpmr(k) \
+ (system_supports_fpmr() && \
+ kvm_has_feat((k), ID_AA64PFR2_EL1, FPMR, IMP))
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 216ca424bb16..cd4087fbda9a 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -352,5 +352,11 @@ static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
return &kvm->arch.mmu != mmu;
}
+#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
+void kvm_s2_ptdump_create_debugfs(struct kvm *kvm);
+#else
+static inline void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) {}
+#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
+
#endif /* __ASSEMBLY__ */
#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 5b06c31035a2..e8bc6d67aba2 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -85,7 +85,7 @@ struct kvm_s2_trans {
bool readable;
int level;
u32 esr;
- u64 upper_attr;
+ u64 desc;
};
static inline phys_addr_t kvm_s2_trans_output(struct kvm_s2_trans *trans)
@@ -115,7 +115,7 @@ static inline bool kvm_s2_trans_writable(struct kvm_s2_trans *trans)
static inline bool kvm_s2_trans_executable(struct kvm_s2_trans *trans)
{
- return !(trans->upper_attr & BIT(54));
+ return !(trans->desc & BIT(54));
}
extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
@@ -205,4 +205,40 @@ static inline u64 kvm_encode_nested_level(struct kvm_s2_trans *trans)
return FIELD_PREP(KVM_NV_GUEST_MAP_SZ, trans->level);
}
+/* Adjust alignment for the contiguous bit as per StageOA() */
+#define contiguous_bit_shift(d, wi, l) \
+ ({ \
+ u8 shift = 0; \
+ \
+ if ((d) & PTE_CONT) { \
+ switch (BIT((wi)->pgshift)) { \
+ case SZ_4K: \
+ shift = 4; \
+ break; \
+ case SZ_16K: \
+ shift = (l) == 2 ? 5 : 7; \
+ break; \
+ case SZ_64K: \
+ shift = 5; \
+ break; \
+ } \
+ } \
+ \
+ shift; \
+ })
+
+static inline unsigned int ps_to_output_size(unsigned int ps)
+{
+ switch (ps) {
+ case 0: return 32;
+ case 1: return 36;
+ case 2: return 40;
+ case 3: return 42;
+ case 4: return 44;
+ case 5:
+ default:
+ return 48;
+ }
+}
+
#endif /* __ARM64_KVM_NESTED_H */
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 19278dfe7978..03f4c3d7839c 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -59,6 +59,48 @@ typedef u64 kvm_pte_t;
#define KVM_PHYS_INVALID (-1ULL)
+#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
+
+#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \
+ ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; })
+#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \
+ ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; })
+#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
+#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
+#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
+
+#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
+#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6)
+#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7)
+#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8)
+#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
+#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)
+
+#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50)
+
+#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
+
+#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
+
+#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
+
+#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
+
+#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
+ KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
+ KVM_PTE_LEAF_ATTR_HI_S2_XN)
+
+#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
+#define KVM_MAX_OWNER_ID 1
+
+/*
+ * Used to indicate a pte for which a 'break-before-make' sequence is in
+ * progress.
+ */
+#define KVM_INVALID_PTE_LOCKED BIT(10)
+
static inline bool kvm_pte_valid(kvm_pte_t pte)
{
return pte & KVM_PTE_VALID;
diff --git a/arch/arm64/include/asm/mem_encrypt.h b/arch/arm64/include/asm/mem_encrypt.h
new file mode 100644
index 000000000000..b0c9a86b13a4
--- /dev/null
+++ b/arch/arm64/include/asm/mem_encrypt.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_MEM_ENCRYPT_H
+#define __ASM_MEM_ENCRYPT_H
+
+struct arm64_mem_crypt_ops {
+ int (*encrypt)(unsigned long addr, int numpages);
+ int (*decrypt)(unsigned long addr, int numpages);
+};
+
+int arm64_mem_crypt_ops_register(const struct arm64_mem_crypt_ops *ops);
+
+int set_memory_encrypted(unsigned long addr, int numpages);
+int set_memory_decrypted(unsigned long addr, int numpages);
+
+#endif /* __ASM_MEM_ENCRYPT_H */
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 54fb014eba05..0480c61dbb4f 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -110,6 +110,8 @@
#define PAGE_END (_PAGE_END(VA_BITS_MIN))
#endif /* CONFIG_KASAN */
+#define PHYSMEM_END __pa(PAGE_END - 1)
+
#define MIN_THREAD_SHIFT (14 + KASAN_THREAD_SHIFT)
/*
diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h
index 5966ee4a6154..52791715f6e6 100644
--- a/arch/arm64/include/asm/mman.h
+++ b/arch/arm64/include/asm/mman.h
@@ -7,7 +7,7 @@
#include <uapi/asm/mman.h>
static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
- unsigned long pkey __always_unused)
+ unsigned long pkey)
{
unsigned long ret = 0;
@@ -17,6 +17,14 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
if (system_supports_mte() && (prot & PROT_MTE))
ret |= VM_MTE;
+#ifdef CONFIG_ARCH_HAS_PKEYS
+ if (system_supports_poe()) {
+ ret |= pkey & BIT(0) ? VM_PKEY_BIT0 : 0;
+ ret |= pkey & BIT(1) ? VM_PKEY_BIT1 : 0;
+ ret |= pkey & BIT(2) ? VM_PKEY_BIT2 : 0;
+ }
+#endif
+
return ret;
}
#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 65977c7783c5..2ec96d91acc6 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -25,6 +25,7 @@ typedef struct {
refcount_t pinned;
void *vdso;
unsigned long flags;
+ u8 pkey_allocation_map;
} mm_context_t;
/*
@@ -63,7 +64,6 @@ static inline bool arm64_kernel_unmapped_at_el0(void)
extern void arm64_memblock_init(void);
extern void paging_init(void);
extern void bootmem_init(void);
-extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
extern void create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
phys_addr_t size, pgprot_t prot);
extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index bd19f4c758b7..7c09d47e09cb 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -15,12 +15,12 @@
#include <linux/sched/hotplug.h>
#include <linux/mm_types.h>
#include <linux/pgtable.h>
+#include <linux/pkeys.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/daifflags.h>
#include <asm/proc-fns.h>
-#include <asm-generic/mm_hooks.h>
#include <asm/cputype.h>
#include <asm/sysreg.h>
#include <asm/tlbflush.h>
@@ -175,9 +175,36 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
atomic64_set(&mm->context.id, 0);
refcount_set(&mm->context.pinned, 0);
+
+ /* pkey 0 is the default, so always reserve it. */
+ mm->context.pkey_allocation_map = BIT(0);
+
+ return 0;
+}
+
+static inline void arch_dup_pkeys(struct mm_struct *oldmm,
+ struct mm_struct *mm)
+{
+ /* Duplicate the oldmm pkey state in mm: */
+ mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map;
+}
+
+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+{
+ arch_dup_pkeys(oldmm, mm);
+
return 0;
}
+static inline void arch_exit_mmap(struct mm_struct *mm)
+{
+}
+
+static inline void arch_unmap(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+}
+
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
static inline void update_saved_ttbr0(struct task_struct *tsk,
struct mm_struct *mm)
@@ -267,6 +294,23 @@ static inline unsigned long mm_untag_mask(struct mm_struct *mm)
return -1UL >> 8;
}
+/*
+ * Only enforce protection keys on the current process, because there is no
+ * user context to access POR_EL0 for another address space.
+ */
+static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+ bool write, bool execute, bool foreign)
+{
+ if (!system_supports_poe())
+ return true;
+
+ /* allow access if the VMA is not one from this process */
+ if (foreign || vma_is_foreign(vma))
+ return true;
+
+ return por_el0_allows_pkey(vma_pkey(vma), write, execute);
+}
+
#include <asm-generic/mmu_context.h>
#endif /* !__ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 1f60aa1bc750..fd330c1db289 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -135,7 +135,6 @@
/*
* Section
*/
-#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0)
#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */
#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */
#define PMD_SECT_S (_AT(pmdval_t, 3) << 8)
@@ -200,11 +199,26 @@
#define PTE_PI_IDX_3 54 /* UXN */
/*
+ * POIndex[2:0] encoding (Permission Overlay Extension)
+ */
+#define PTE_PO_IDX_0 (_AT(pteval_t, 1) << 60)
+#define PTE_PO_IDX_1 (_AT(pteval_t, 1) << 61)
+#define PTE_PO_IDX_2 (_AT(pteval_t, 1) << 62)
+
+#define PTE_PO_IDX_MASK GENMASK_ULL(62, 60)
+
+
+/*
* Memory Attribute override for Stage-2 (MemAttr[3:0])
*/
#define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2)
/*
+ * Hierarchical permission for Stage-1 tables
+ */
+#define S1_TABLE_AP (_AT(pmdval_t, 3) << 61)
+
+/*
* Highest possible physical address supported.
*/
#define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS)
@@ -298,6 +312,10 @@
#define TCR_TBI1 (UL(1) << 38)
#define TCR_HA (UL(1) << 39)
#define TCR_HD (UL(1) << 40)
+#define TCR_HPD0_SHIFT 41
+#define TCR_HPD0 (UL(1) << TCR_HPD0_SHIFT)
+#define TCR_HPD1_SHIFT 42
+#define TCR_HPD1 (UL(1) << TCR_HPD1_SHIFT)
#define TCR_TBID0 (UL(1) << 51)
#define TCR_TBID1 (UL(1) << 52)
#define TCR_NFD0 (UL(1) << 53)
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index b11cfb9fdd37..2a11d0c10760 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -154,10 +154,10 @@ static inline bool __pure lpa2_is_enabled(void)
#define PIE_E0 ( \
PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW))
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX_O) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX_O) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R_O) | \
+ PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW_O))
#define PIE_E1 ( \
PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 7a4f5604be3f..96c2b0b07c4c 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -34,6 +34,7 @@
#include <asm/cmpxchg.h>
#include <asm/fixmap.h>
+#include <asm/por.h>
#include <linux/mmdebug.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
@@ -149,6 +150,24 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
#define pte_accessible(mm, pte) \
(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
+static inline bool por_el0_allows_pkey(u8 pkey, bool write, bool execute)
+{
+ u64 por;
+
+ if (!system_supports_poe())
+ return true;
+
+ por = read_sysreg_s(SYS_POR_EL0);
+
+ if (write)
+ return por_elx_allows_write(por, pkey);
+
+ if (execute)
+ return por_elx_allows_exec(por, pkey);
+
+ return por_elx_allows_read(por, pkey);
+}
+
/*
* p??_access_permitted() is true for valid user mappings (PTE_USER
* bit set, subject to the write permission check). For execute-only
@@ -156,8 +175,11 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
* not set) must return false. PROT_NONE mappings do not have the
* PTE_VALID bit set.
*/
-#define pte_access_permitted(pte, write) \
+#define pte_access_permitted_no_overlay(pte, write) \
(((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte)))
+#define pte_access_permitted(pte, write) \
+ (pte_access_permitted_no_overlay(pte, write) && \
+ por_el0_allows_pkey(FIELD_GET(PTE_PO_IDX_MASK, pte_val(pte)), write, false))
#define pmd_access_permitted(pmd, write) \
(pte_access_permitted(pmd_pte(pmd), (write)))
#define pud_access_permitted(pud, write) \
@@ -373,10 +395,11 @@ static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages)
/*
* If the PTE would provide user space access to the tags associated
* with it then ensure that the MTE tags are synchronised. Although
- * pte_access_permitted() returns false for exec only mappings, they
- * don't expose tags (instruction fetches don't check tags).
+ * pte_access_permitted_no_overlay() returns false for exec only
+ * mappings, they don't expose tags (instruction fetches don't check
+ * tags).
*/
- if (system_supports_mte() && pte_access_permitted(pte, false) &&
+ if (system_supports_mte() && pte_access_permitted_no_overlay(pte, false) &&
!pte_special(pte) && pte_tagged(pte))
mte_sync_tags(pte, nr_pages);
}
@@ -1103,7 +1126,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
*/
const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
PTE_PRESENT_INVALID | PTE_VALID | PTE_WRITE |
- PTE_GP | PTE_ATTRINDX_MASK;
+ PTE_GP | PTE_ATTRINDX_MASK | PTE_PO_IDX_MASK;
+
/* preserve the hardware dirty information */
if (pte_hw_dirty(pte))
pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
diff --git a/arch/arm64/include/asm/pkeys.h b/arch/arm64/include/asm/pkeys.h
new file mode 100644
index 000000000000..0ca5f83ce148
--- /dev/null
+++ b/arch/arm64/include/asm/pkeys.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Arm Ltd.
+ *
+ * Based on arch/x86/include/asm/pkeys.h
+ */
+
+#ifndef _ASM_ARM64_PKEYS_H
+#define _ASM_ARM64_PKEYS_H
+
+#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2)
+
+#define arch_max_pkey() 8
+
+int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+ unsigned long init_val);
+
+static inline bool arch_pkeys_enabled(void)
+{
+ return system_supports_poe();
+}
+
+static inline int vma_pkey(struct vm_area_struct *vma)
+{
+ return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT;
+}
+
+static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
+ int prot, int pkey)
+{
+ if (pkey != -1)
+ return pkey;
+
+ return vma_pkey(vma);
+}
+
+static inline int execute_only_pkey(struct mm_struct *mm)
+{
+ // Execute-only mappings are handled by EPAN/FEAT_PAN3.
+ return -1;
+}
+
+#define mm_pkey_allocation_map(mm) (mm)->context.pkey_allocation_map
+#define mm_set_pkey_allocated(mm, pkey) do { \
+ mm_pkey_allocation_map(mm) |= (1U << pkey); \
+} while (0)
+#define mm_set_pkey_free(mm, pkey) do { \
+ mm_pkey_allocation_map(mm) &= ~(1U << pkey); \
+} while (0)
+
+static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
+{
+ /*
+ * "Allocated" pkeys are those that have been returned
+ * from pkey_alloc() or pkey 0 which is allocated
+ * implicitly when the mm is created.
+ */
+ if (pkey < 0 || pkey >= arch_max_pkey())
+ return false;
+
+ return mm_pkey_allocation_map(mm) & (1U << pkey);
+}
+
+/*
+ * Returns a positive, 3-bit key on success, or -1 on failure.
+ */
+static inline int mm_pkey_alloc(struct mm_struct *mm)
+{
+ /*
+ * Note: this is the one and only place we make sure
+ * that the pkey is valid as far as the hardware is
+ * concerned. The rest of the kernel trusts that
+ * only good, valid pkeys come out of here.
+ */
+ u8 all_pkeys_mask = GENMASK(arch_max_pkey() - 1, 0);
+ int ret;
+
+ if (!arch_pkeys_enabled())
+ return -1;
+
+ /*
+ * Are we out of pkeys? We must handle this specially
+ * because ffz() behavior is undefined if there are no
+ * zeros.
+ */
+ if (mm_pkey_allocation_map(mm) == all_pkeys_mask)
+ return -1;
+
+ ret = ffz(mm_pkey_allocation_map(mm));
+
+ mm_set_pkey_allocated(mm, ret);
+
+ return ret;
+}
+
+static inline int mm_pkey_free(struct mm_struct *mm, int pkey)
+{
+ if (!mm_pkey_is_allocated(mm, pkey))
+ return -EINVAL;
+
+ mm_set_pkey_free(mm, pkey);
+
+ return 0;
+}
+
+#endif /* _ASM_ARM64_PKEYS_H */
diff --git a/arch/arm64/include/asm/por.h b/arch/arm64/include/asm/por.h
new file mode 100644
index 000000000000..e06e9f473675
--- /dev/null
+++ b/arch/arm64/include/asm/por.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Arm Ltd.
+ */
+
+#ifndef _ASM_ARM64_POR_H
+#define _ASM_ARM64_POR_H
+
+#define POR_BITS_PER_PKEY 4
+#define POR_ELx_IDX(por_elx, idx) (((por_elx) >> ((idx) * POR_BITS_PER_PKEY)) & 0xf)
+
+static inline bool por_elx_allows_read(u64 por, u8 pkey)
+{
+ u8 perm = POR_ELx_IDX(por, pkey);
+
+ return perm & POE_R;
+}
+
+static inline bool por_elx_allows_write(u64 por, u8 pkey)
+{
+ u8 perm = POR_ELx_IDX(por, pkey);
+
+ return perm & POE_W;
+}
+
+static inline bool por_elx_allows_exec(u64 por, u8 pkey)
+{
+ u8 perm = POR_ELx_IDX(por, pkey);
+
+ return perm & POE_X;
+}
+
+#endif /* _ASM_ARM64_POR_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index f77371232d8c..1438424f0064 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -184,6 +184,7 @@ struct thread_struct {
u64 sctlr_user;
u64 svcr;
u64 tpidr2_el0;
+ u64 por_el0;
};
static inline unsigned int thread_get_vl(struct thread_struct *thread,
@@ -402,5 +403,10 @@ long get_tagged_addr_ctrl(struct task_struct *task);
#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl(current)
#endif
+int get_tsc_mode(unsigned long adr);
+int set_tsc_mode(unsigned int val);
+#define GET_TSC_CTL(adr) get_tsc_mode((adr))
+#define SET_TSC_CTL(val) set_tsc_mode((val))
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index 5b1701c76d1c..6cf4aae05219 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -5,6 +5,8 @@
#ifndef __ASM_PTDUMP_H
#define __ASM_PTDUMP_H
+#include <linux/ptdump.h>
+
#ifdef CONFIG_PTDUMP_CORE
#include <linux/mm_types.h>
@@ -21,14 +23,53 @@ struct ptdump_info {
unsigned long base_addr;
};
+struct ptdump_prot_bits {
+ u64 mask;
+ u64 val;
+ const char *set;
+ const char *clear;
+};
+
+struct ptdump_pg_level {
+ const struct ptdump_prot_bits *bits;
+ char name[4];
+ int num;
+ u64 mask;
+};
+
+/*
+ * The page dumper groups page table entries of the same type into a single
+ * description. It uses pg_state to track the range information while
+ * iterating over the pte entries. When the continuity is broken it then
+ * dumps out a description of the range.
+ */
+struct ptdump_pg_state {
+ struct ptdump_state ptdump;
+ struct ptdump_pg_level *pg_level;
+ struct seq_file *seq;
+ const struct addr_marker *marker;
+ const struct mm_struct *mm;
+ unsigned long start_address;
+ int level;
+ u64 current_prot;
+ bool check_wx;
+ unsigned long wx_pages;
+ unsigned long uxn_pages;
+};
+
void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
+void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
+ u64 val);
#ifdef CONFIG_PTDUMP_DEBUGFS
#define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64
void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
#else
static inline void ptdump_debugfs_register(struct ptdump_info *info,
const char *name) { }
-#endif
+#endif /* CONFIG_PTDUMP_DEBUGFS */
+#else
+static inline void note_page(struct ptdump_state *pt_st, unsigned long addr,
+ int level, u64 val) { }
#endif /* CONFIG_PTDUMP_CORE */
#endif /* __ASM_PTDUMP_H */
diff --git a/arch/arm64/include/asm/set_memory.h b/arch/arm64/include/asm/set_memory.h
index 0f740b781187..917761feeffd 100644
--- a/arch/arm64/include/asm/set_memory.h
+++ b/arch/arm64/include/asm/set_memory.h
@@ -3,6 +3,7 @@
#ifndef _ASM_ARM64_SET_MEMORY_H
#define _ASM_ARM64_SET_MEMORY_H
+#include <asm/mem_encrypt.h>
#include <asm-generic/set_memory.h>
bool can_set_direct_map(void);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 4a9ea103817e..9ea97dddefc4 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -109,6 +109,9 @@
#define set_pstate_ssbs(x) asm volatile(SET_PSTATE_SSBS(x))
#define set_pstate_dit(x) asm volatile(SET_PSTATE_DIT(x))
+/* Register-based PAN access, for save/restore purposes */
+#define SYS_PSTATE_PAN sys_reg(3, 0, 4, 2, 3)
+
#define __SYS_BARRIER_INSN(CRm, op2, Rt) \
__emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f))
@@ -325,7 +328,25 @@
#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0)
#define SYS_PAR_EL1_F BIT(0)
+/* When PAR_EL1.F == 1 */
#define SYS_PAR_EL1_FST GENMASK(6, 1)
+#define SYS_PAR_EL1_PTW BIT(8)
+#define SYS_PAR_EL1_S BIT(9)
+#define SYS_PAR_EL1_AssuredOnly BIT(12)
+#define SYS_PAR_EL1_TopLevel BIT(13)
+#define SYS_PAR_EL1_Overlay BIT(14)
+#define SYS_PAR_EL1_DirtyBit BIT(15)
+#define SYS_PAR_EL1_F1_IMPDEF GENMASK_ULL(63, 48)
+#define SYS_PAR_EL1_F1_RES0 (BIT(7) | BIT(10) | GENMASK_ULL(47, 16))
+#define SYS_PAR_EL1_RES1 BIT(11)
+/* When PAR_EL1.F == 0 */
+#define SYS_PAR_EL1_SH GENMASK_ULL(8, 7)
+#define SYS_PAR_EL1_NS BIT(9)
+#define SYS_PAR_EL1_F0_IMPDEF BIT(10)
+#define SYS_PAR_EL1_NSE BIT(11)
+#define SYS_PAR_EL1_PA GENMASK_ULL(51, 12)
+#define SYS_PAR_EL1_ATTR GENMASK_ULL(63, 56)
+#define SYS_PAR_EL1_F0_RES0 (GENMASK_ULL(6, 1) | GENMASK_ULL(55, 52))
/*** Statistical Profiling Extension ***/
#define PMSEVFR_EL1_RES0_IMP \
@@ -403,7 +424,6 @@
#define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2)
#define SYS_PMOVSCLR_EL0 sys_reg(3, 3, 9, 12, 3)
#define SYS_PMSWINC_EL0 sys_reg(3, 3, 9, 12, 4)
-#define SYS_PMSELR_EL0 sys_reg(3, 3, 9, 12, 5)
#define SYS_PMCEID0_EL0 sys_reg(3, 3, 9, 12, 6)
#define SYS_PMCEID1_EL0 sys_reg(3, 3, 9, 12, 7)
#define SYS_PMCCNTR_EL0 sys_reg(3, 3, 9, 13, 0)
@@ -652,6 +672,7 @@
#define OP_AT_S12E1W sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
#define OP_AT_S12E0R sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
#define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
+#define OP_AT_S1E2A sys_insn(AT_Op0, 4, AT_CRn, 9, 2)
/* TLBI instructions */
#define TLBI_Op0 1
@@ -1077,6 +1098,9 @@
#define POE_RXW UL(0x7)
#define POE_MASK UL(0xf)
+/* Initial value for Permission Overlay Extension for EL0 */
+#define POR_EL0_INIT POE_RXW
+
#define ARM64_FEATURE_FIELD_BITS 4
/* Defined for compatibility only, do not add new users. */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index e72a3bf9e563..1114c1c3300a 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -81,6 +81,7 @@ void arch_setup_new_exec(void);
#define TIF_SME 27 /* SME in use */
#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */
#define TIF_KERNEL_FPSTATE 29 /* Task is in a kernel mode FPSIMD section */
+#define TIF_TSC_SIGSEGV 30 /* SIGSEGV on counter-timer access */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
@@ -97,6 +98,7 @@ void arch_setup_new_exec(void);
#define _TIF_SVE (1 << TIF_SVE)
#define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_TSC_SIGSEGV (1 << TIF_TSC_SIGSEGV)
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index eefe766d6161..d780d1bd2eac 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -25,6 +25,7 @@ try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn)
void force_signal_inject(int signal, int code, unsigned long address, unsigned long err);
void arm64_notify_segfault(unsigned long addr);
void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str);
+void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey);
void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str);
diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h
index df2c47c55972..06f8ec0906a6 100644
--- a/arch/arm64/include/asm/vncr_mapping.h
+++ b/arch/arm64/include/asm/vncr_mapping.h
@@ -52,6 +52,7 @@
#define VNCR_PIRE0_EL1 0x290
#define VNCR_PIRE0_EL2 0x298
#define VNCR_PIR_EL1 0x2A0
+#define VNCR_POR_EL1 0x2A8
#define VNCR_ICH_LR0_EL2 0x400
#define VNCR_ICH_LR1_EL2 0x408
#define VNCR_ICH_LR2_EL2 0x410
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 285610e626f5..055381b2c615 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -122,5 +122,6 @@
#define HWCAP2_SME_SF8FMA (1UL << 60)
#define HWCAP2_SME_SF8DP4 (1UL << 61)
#define HWCAP2_SME_SF8DP2 (1UL << 62)
+#define HWCAP2_POE (1UL << 63)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/mman.h b/arch/arm64/include/uapi/asm/mman.h
index 1e6482a838e1..e7e0c8216243 100644
--- a/arch/arm64/include/uapi/asm/mman.h
+++ b/arch/arm64/include/uapi/asm/mman.h
@@ -7,4 +7,13 @@
#define PROT_BTI 0x10 /* BTI guarded page */
#define PROT_MTE 0x20 /* Normal Tagged mapping */
+/* Override any generic PKEY permission defines */
+#define PKEY_DISABLE_EXECUTE 0x4
+#define PKEY_DISABLE_READ 0x8
+#undef PKEY_ACCESS_MASK
+#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
+ PKEY_DISABLE_WRITE |\
+ PKEY_DISABLE_READ |\
+ PKEY_DISABLE_EXECUTE)
+
#endif /* ! _UAPI__ASM_MMAN_H */
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index 8a45b7a411e0..bb7af77a30a7 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -98,6 +98,13 @@ struct esr_context {
__u64 esr;
};
+#define POE_MAGIC 0x504f4530
+
+struct poe_context {
+ struct _aarch64_ctx head;
+ __u64 por_el0;
+};
+
/*
* extra_context: describes extra space in the signal frame for
* additional structures that don't fit in sigcontext.__reserved[].
@@ -320,10 +327,10 @@ struct zt_context {
((sizeof(struct za_context) + (__SVE_VQ_BYTES - 1)) \
/ __SVE_VQ_BYTES * __SVE_VQ_BYTES)
-#define ZA_SIG_REGS_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES))
+#define ZA_SIG_REGS_SIZE(vq) (((vq) * __SVE_VQ_BYTES) * ((vq) * __SVE_VQ_BYTES))
#define ZA_SIG_ZAV_OFFSET(vq, n) (ZA_SIG_REGS_OFFSET + \
- (SVE_SIG_ZREG_SIZE(vq) * n))
+ (SVE_SIG_ZREG_SIZE(vq) * (n)))
#define ZA_SIG_CONTEXT_SIZE(vq) \
(ZA_SIG_REGS_OFFSET + ZA_SIG_REGS_SIZE(vq))
@@ -334,7 +341,7 @@ struct zt_context {
#define ZT_SIG_REGS_OFFSET sizeof(struct zt_context)
-#define ZT_SIG_REGS_SIZE(n) (ZT_SIG_REG_BYTES * n)
+#define ZT_SIG_REGS_SIZE(n) (ZT_SIG_REG_BYTES * (n))
#define ZT_SIG_CONTEXT_SIZE(n) \
(sizeof(struct zt_context) + ZT_SIG_REGS_SIZE(n))
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index f6b6b4507357..dfefbdf4073a 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -456,6 +456,14 @@ static const struct midr_range erratum_spec_ssbs_list[] = {
};
#endif
+#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38
+static const struct midr_range erratum_ac03_cpu_38_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+ {},
+};
+#endif
+
const struct arm64_cpu_capabilities arm64_errata[] = {
#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
{
@@ -772,7 +780,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
{
.desc = "AmpereOne erratum AC03_CPU_38",
.capability = ARM64_WORKAROUND_AMPERE_AC03_CPU_38,
- ERRATA_MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ ERRATA_MIDR_RANGE_LIST(erratum_ac03_cpu_38_list),
},
#endif
{
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 646ecd3069fd..718728a85430 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -466,6 +466,8 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_POE),
+ FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1POE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0),
ARM64_FTR_END,
@@ -2348,6 +2350,14 @@ static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused)
sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn);
}
+#ifdef CONFIG_ARM64_POE
+static void cpu_enable_poe(const struct arm64_cpu_capabilities *__unused)
+{
+ sysreg_clear_set(REG_TCR2_EL1, 0, TCR2_EL1x_E0POE);
+ sysreg_clear_set(CPACR_EL1, 0, CPACR_ELx_E0POE);
+}
+#endif
+
/* Internal helper functions to match cpu capability type */
static bool
cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
@@ -2870,6 +2880,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_nv1,
ARM64_CPUID_FIELDS_NEG(ID_AA64MMFR4_EL1, E2H0, NI_NV1)
},
+#ifdef CONFIG_ARM64_POE
+ {
+ .desc = "Stage-1 Permission Overlay Extension (S1POE)",
+ .capability = ARM64_HAS_S1POE,
+ .type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+ .matches = has_cpuid_feature,
+ .cpu_enable = cpu_enable_poe,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, S1POE, IMP)
+ },
+#endif
{},
};
@@ -3034,6 +3054,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP2),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8E4M3, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E4M3),
HWCAP_CAP(ID_AA64FPFR0_EL1, F8E5M2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E5M2),
+#ifdef CONFIG_ARM64_POE
+ HWCAP_CAP(ID_AA64MMFR3_EL1, S1POE, IMP, CAP_HWCAP, KERNEL_HWCAP_POE),
+#endif
{},
};
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 09eeaa24d456..44718d0482b3 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -143,6 +143,7 @@ static const char *const hwcap_str[] = {
[KERNEL_HWCAP_SME_SF8FMA] = "smesf8fma",
[KERNEL_HWCAP_SME_SF8DP4] = "smesf8dp4",
[KERNEL_HWCAP_SME_SF8DP2] = "smesf8dp2",
+ [KERNEL_HWCAP_POE] = "poe",
};
#ifdef CONFIG_COMPAT
@@ -280,7 +281,7 @@ const struct seq_operations cpuinfo_op = {
};
-static struct kobj_type cpuregs_kobj_type = {
+static const struct kobj_type cpuregs_kobj_type = {
.sysfs_ops = &kobj_sysfs_ops,
};
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 02870beb271e..7b11d84f533c 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -407,7 +407,7 @@ int swsusp_arch_resume(void)
void *, phys_addr_t, phys_addr_t);
struct trans_pgd_info trans_info = {
.trans_alloc_page = hibernate_page_alloc,
- .trans_alloc_arg = (void *)GFP_ATOMIC,
+ .trans_alloc_arg = (__force void *)GFP_ATOMIC,
};
/*
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index f872c57e9909..fd9a7bed83ce 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -6,28 +6,7 @@
* Copyright (C) 2014 ARM Ltd.
*/
-#include <linux/acpi.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
#include <linux/pci.h>
-#include <linux/pci-acpi.h>
-#include <linux/pci-ecam.h>
-#include <linux/slab.h>
-
-#ifdef CONFIG_ACPI
-/*
- * Try to assign the IRQ number when probing a new device
- */
-int pcibios_alloc_irq(struct pci_dev *dev)
-{
- if (!acpi_disabled)
- acpi_pci_irq_enable(dev);
-
- return 0;
-}
-#endif
/*
* raw_pci_read/write - Platform-specific PCI config space access.
@@ -61,173 +40,3 @@ int pcibus_to_node(struct pci_bus *bus)
EXPORT_SYMBOL(pcibus_to_node);
#endif
-
-#ifdef CONFIG_ACPI
-
-struct acpi_pci_generic_root_info {
- struct acpi_pci_root_info common;
- struct pci_config_window *cfg; /* config space mapping */
-};
-
-int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
-{
- struct pci_config_window *cfg = bus->sysdata;
- struct acpi_device *adev = to_acpi_device(cfg->parent);
- struct acpi_pci_root *root = acpi_driver_data(adev);
-
- return root->segment;
-}
-
-int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
-{
- struct pci_config_window *cfg;
- struct acpi_device *adev;
- struct device *bus_dev;
-
- if (acpi_disabled)
- return 0;
-
- cfg = bridge->bus->sysdata;
-
- /*
- * On Hyper-V there is no corresponding ACPI device for a root bridge,
- * therefore ->parent is set as NULL by the driver. And set 'adev' as
- * NULL in this case because there is no proper ACPI device.
- */
- if (!cfg->parent)
- adev = NULL;
- else
- adev = to_acpi_device(cfg->parent);
-
- bus_dev = &bridge->bus->dev;
-
- ACPI_COMPANION_SET(&bridge->dev, adev);
- set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev)));
-
- return 0;
-}
-
-static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci)
-{
- struct resource_entry *entry, *tmp;
- int status;
-
- status = acpi_pci_probe_root_resources(ci);
- resource_list_for_each_entry_safe(entry, tmp, &ci->resources) {
- if (!(entry->res->flags & IORESOURCE_WINDOW))
- resource_list_destroy_entry(entry);
- }
- return status;
-}
-
-/*
- * Lookup the bus range for the domain in MCFG, and set up config space
- * mapping.
- */
-static struct pci_config_window *
-pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root)
-{
- struct device *dev = &root->device->dev;
- struct resource *bus_res = &root->secondary;
- u16 seg = root->segment;
- const struct pci_ecam_ops *ecam_ops;
- struct resource cfgres;
- struct acpi_device *adev;
- struct pci_config_window *cfg;
- int ret;
-
- ret = pci_mcfg_lookup(root, &cfgres, &ecam_ops);
- if (ret) {
- dev_err(dev, "%04x:%pR ECAM region not found\n", seg, bus_res);
- return NULL;
- }
-
- adev = acpi_resource_consumer(&cfgres);
- if (adev)
- dev_info(dev, "ECAM area %pR reserved by %s\n", &cfgres,
- dev_name(&adev->dev));
- else
- dev_warn(dev, FW_BUG "ECAM area %pR not reserved in ACPI namespace\n",
- &cfgres);
-
- cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops);
- if (IS_ERR(cfg)) {
- dev_err(dev, "%04x:%pR error %ld mapping ECAM\n", seg, bus_res,
- PTR_ERR(cfg));
- return NULL;
- }
-
- return cfg;
-}
-
-/* release_info: free resources allocated by init_info */
-static void pci_acpi_generic_release_info(struct acpi_pci_root_info *ci)
-{
- struct acpi_pci_generic_root_info *ri;
-
- ri = container_of(ci, struct acpi_pci_generic_root_info, common);
- pci_ecam_free(ri->cfg);
- kfree(ci->ops);
- kfree(ri);
-}
-
-/* Interface called from ACPI code to setup PCI host controller */
-struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
-{
- struct acpi_pci_generic_root_info *ri;
- struct pci_bus *bus, *child;
- struct acpi_pci_root_ops *root_ops;
- struct pci_host_bridge *host;
-
- ri = kzalloc(sizeof(*ri), GFP_KERNEL);
- if (!ri)
- return NULL;
-
- root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL);
- if (!root_ops) {
- kfree(ri);
- return NULL;
- }
-
- ri->cfg = pci_acpi_setup_ecam_mapping(root);
- if (!ri->cfg) {
- kfree(ri);
- kfree(root_ops);
- return NULL;
- }
-
- root_ops->release_info = pci_acpi_generic_release_info;
- root_ops->prepare_resources = pci_acpi_root_prepare_resources;
- root_ops->pci_ops = (struct pci_ops *)&ri->cfg->ops->pci_ops;
- bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg);
- if (!bus)
- return NULL;
-
- /* If we must preserve the resource configuration, claim now */
- host = pci_find_host_bridge(bus);
- if (host->preserve_config)
- pci_bus_claim_resources(bus);
-
- /*
- * Assign whatever was left unassigned. If we didn't claim above,
- * this will reassign everything.
- */
- pci_assign_unassigned_root_bus_resources(bus);
-
- list_for_each_entry(child, &bus->children, node)
- pcie_bus_configure_settings(child);
-
- return bus;
-}
-
-void pcibios_add_bus(struct pci_bus *bus)
-{
- acpi_pci_add_bus(bus);
-}
-
-void pcibios_remove_bus(struct pci_bus *bus)
-{
- acpi_pci_remove_bus(bus);
-}
-
-#endif
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 4ae31b7af6c3..0540653fbf38 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -43,6 +43,7 @@
#include <linux/stacktrace.h>
#include <asm/alternative.h>
+#include <asm/arch_timer.h>
#include <asm/compat.h>
#include <asm/cpufeature.h>
#include <asm/cacheflush.h>
@@ -271,12 +272,21 @@ static void flush_tagged_addr_state(void)
clear_thread_flag(TIF_TAGGED_ADDR);
}
+static void flush_poe(void)
+{
+ if (!system_supports_poe())
+ return;
+
+ write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);
+}
+
void flush_thread(void)
{
fpsimd_flush_thread();
tls_thread_flush();
flush_ptrace_hw_breakpoint(current);
flush_tagged_addr_state();
+ flush_poe();
}
void arch_release_task_struct(struct task_struct *tsk)
@@ -371,6 +381,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
if (system_supports_tpidr2())
p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
+ if (system_supports_poe())
+ p->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
+
if (stack_start) {
if (is_compat_thread(task_thread_info(p)))
childregs->compat_sp = stack_start;
@@ -472,27 +485,63 @@ static void entry_task_switch(struct task_struct *next)
}
/*
- * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
- * Ensure access is disabled when switching to a 32bit task, ensure
- * access is enabled when switching to a 64bit task.
+ * Handle sysreg updates for ARM erratum 1418040 which affects the 32bit view of
+ * CNTVCT, various other errata which require trapping all CNTVCT{,_EL0}
+ * accesses and prctl(PR_SET_TSC). Ensure access is disabled iff a workaround is
+ * required or PR_TSC_SIGSEGV is set.
*/
-static void erratum_1418040_thread_switch(struct task_struct *next)
+static void update_cntkctl_el1(struct task_struct *next)
{
- if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) ||
- !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
- return;
+ struct thread_info *ti = task_thread_info(next);
- if (is_compat_thread(task_thread_info(next)))
+ if (test_ti_thread_flag(ti, TIF_TSC_SIGSEGV) ||
+ has_erratum_handler(read_cntvct_el0) ||
+ (IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) &&
+ this_cpu_has_cap(ARM64_WORKAROUND_1418040) &&
+ is_compat_thread(ti)))
sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0);
else
sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN);
}
-static void erratum_1418040_new_exec(void)
+static void cntkctl_thread_switch(struct task_struct *prev,
+ struct task_struct *next)
+{
+ if ((read_ti_thread_flags(task_thread_info(prev)) &
+ (_TIF_32BIT | _TIF_TSC_SIGSEGV)) !=
+ (read_ti_thread_flags(task_thread_info(next)) &
+ (_TIF_32BIT | _TIF_TSC_SIGSEGV)))
+ update_cntkctl_el1(next);
+}
+
+static int do_set_tsc_mode(unsigned int val)
{
+ bool tsc_sigsegv;
+
+ if (val == PR_TSC_SIGSEGV)
+ tsc_sigsegv = true;
+ else if (val == PR_TSC_ENABLE)
+ tsc_sigsegv = false;
+ else
+ return -EINVAL;
+
preempt_disable();
- erratum_1418040_thread_switch(current);
+ update_thread_flag(TIF_TSC_SIGSEGV, tsc_sigsegv);
+ update_cntkctl_el1(current);
preempt_enable();
+
+ return 0;
+}
+
+static void permission_overlay_switch(struct task_struct *next)
+{
+ if (!system_supports_poe())
+ return;
+
+ current->thread.por_el0 = read_sysreg_s(SYS_POR_EL0);
+ if (current->thread.por_el0 != next->thread.por_el0) {
+ write_sysreg_s(next->thread.por_el0, SYS_POR_EL0);
+ }
}
/*
@@ -528,8 +577,9 @@ struct task_struct *__switch_to(struct task_struct *prev,
contextidr_thread_switch(next);
entry_task_switch(next);
ssbs_thread_switch(next);
- erratum_1418040_thread_switch(next);
+ cntkctl_thread_switch(prev, next);
ptrauth_thread_switch_user(next);
+ permission_overlay_switch(next);
/*
* Complete any pending TLB or cache maintenance on this CPU in case
@@ -645,7 +695,7 @@ void arch_setup_new_exec(void)
current->mm->context.flags = mmflags;
ptrauth_thread_init_user();
mte_thread_init_user();
- erratum_1418040_new_exec();
+ do_set_tsc_mode(PR_TSC_ENABLE);
if (task_spec_ssb_noexec(current)) {
arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,
@@ -754,3 +804,26 @@ int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state,
return prot;
}
#endif
+
+int get_tsc_mode(unsigned long adr)
+{
+ unsigned int val;
+
+ if (is_compat_task())
+ return -EINVAL;
+
+ if (test_thread_flag(TIF_TSC_SIGSEGV))
+ val = PR_TSC_SIGSEGV;
+ else
+ val = PR_TSC_ENABLE;
+
+ return put_user(val, (unsigned int __user *)adr);
+}
+
+int set_tsc_mode(unsigned int val)
+{
+ if (is_compat_task())
+ return -EINVAL;
+
+ return do_set_tsc_mode(val);
+}
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 0d022599eb61..b756578aeaee 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1440,6 +1440,39 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct
}
#endif
+#ifdef CONFIG_ARM64_POE
+static int poe_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!system_supports_poe())
+ return -EINVAL;
+
+ return membuf_write(&to, &target->thread.por_el0,
+ sizeof(target->thread.por_el0));
+}
+
+static int poe_set(struct task_struct *target, const struct
+ user_regset *regset, unsigned int pos,
+ unsigned int count, const void *kbuf, const
+ void __user *ubuf)
+{
+ int ret;
+ long ctrl;
+
+ if (!system_supports_poe())
+ return -EINVAL;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1);
+ if (ret)
+ return ret;
+
+ target->thread.por_el0 = ctrl;
+
+ return 0;
+}
+#endif
+
enum aarch64_regset {
REGSET_GPR,
REGSET_FPR,
@@ -1469,6 +1502,9 @@ enum aarch64_regset {
#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
REGSET_TAGGED_ADDR_CTRL,
#endif
+#ifdef CONFIG_ARM64_POE
+ REGSET_POE
+#endif
};
static const struct user_regset aarch64_regsets[] = {
@@ -1628,6 +1664,16 @@ static const struct user_regset aarch64_regsets[] = {
.set = tagged_addr_ctrl_set,
},
#endif
+#ifdef CONFIG_ARM64_POE
+ [REGSET_POE] = {
+ .core_note_type = NT_ARM_POE,
+ .n = 1,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .regset_get = poe_get,
+ .set = poe_set,
+ },
+#endif
};
static const struct user_regset_view user_aarch64_view = {
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 4a77f4976e11..561986947530 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -61,6 +61,7 @@ struct rt_sigframe_user_layout {
unsigned long za_offset;
unsigned long zt_offset;
unsigned long fpmr_offset;
+ unsigned long poe_offset;
unsigned long extra_offset;
unsigned long end_offset;
};
@@ -185,6 +186,8 @@ struct user_ctxs {
u32 zt_size;
struct fpmr_context __user *fpmr;
u32 fpmr_size;
+ struct poe_context __user *poe;
+ u32 poe_size;
};
static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
@@ -258,6 +261,32 @@ static int restore_fpmr_context(struct user_ctxs *user)
return err;
}
+static int preserve_poe_context(struct poe_context __user *ctx)
+{
+ int err = 0;
+
+ __put_user_error(POE_MAGIC, &ctx->head.magic, err);
+ __put_user_error(sizeof(*ctx), &ctx->head.size, err);
+ __put_user_error(read_sysreg_s(SYS_POR_EL0), &ctx->por_el0, err);
+
+ return err;
+}
+
+static int restore_poe_context(struct user_ctxs *user)
+{
+ u64 por_el0;
+ int err = 0;
+
+ if (user->poe_size != sizeof(*user->poe))
+ return -EINVAL;
+
+ __get_user_error(por_el0, &(user->poe->por_el0), err);
+ if (!err)
+ write_sysreg_s(por_el0, SYS_POR_EL0);
+
+ return err;
+}
+
#ifdef CONFIG_ARM64_SVE
static int preserve_sve_context(struct sve_context __user *ctx)
@@ -621,6 +650,7 @@ static int parse_user_sigframe(struct user_ctxs *user,
user->za = NULL;
user->zt = NULL;
user->fpmr = NULL;
+ user->poe = NULL;
if (!IS_ALIGNED((unsigned long)base, 16))
goto invalid;
@@ -671,6 +701,17 @@ static int parse_user_sigframe(struct user_ctxs *user,
/* ignore */
break;
+ case POE_MAGIC:
+ if (!system_supports_poe())
+ goto invalid;
+
+ if (user->poe)
+ goto invalid;
+
+ user->poe = (struct poe_context __user *)head;
+ user->poe_size = size;
+ break;
+
case SVE_MAGIC:
if (!system_supports_sve() && !system_supports_sme())
goto invalid;
@@ -857,6 +898,9 @@ static int restore_sigframe(struct pt_regs *regs,
if (err == 0 && system_supports_sme2() && user.zt)
err = restore_zt_context(&user);
+ if (err == 0 && system_supports_poe() && user.poe)
+ err = restore_poe_context(&user);
+
return err;
}
@@ -980,6 +1024,13 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
return err;
}
+ if (system_supports_poe()) {
+ err = sigframe_alloc(user, &user->poe_offset,
+ sizeof(struct poe_context));
+ if (err)
+ return err;
+ }
+
return sigframe_alloc_end(user);
}
@@ -1042,6 +1093,14 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user,
err |= preserve_fpmr_context(fpmr_ctx);
}
+ if (system_supports_poe() && err == 0 && user->poe_offset) {
+ struct poe_context __user *poe_ctx =
+ apply_user_offset(user, user->poe_offset);
+
+ err |= preserve_poe_context(poe_ctx);
+ }
+
+
/* ZA state if present */
if (system_supports_sme() && err == 0 && user->za_offset) {
struct za_context __user *za_ctx =
@@ -1178,6 +1237,9 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
sme_smstop();
}
+ if (system_supports_poe())
+ write_sysreg_s(POR_EL0_INIT, SYS_POR_EL0);
+
if (ka->sa.sa_flags & SA_RESTORER)
sigtramp = ka->sa.sa_restorer;
else
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index f01f0fd7b7fe..3b3f6b56e733 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -68,7 +68,7 @@ enum ipi_msg_type {
IPI_RESCHEDULE,
IPI_CALL_FUNC,
IPI_CPU_STOP,
- IPI_CPU_CRASH_STOP,
+ IPI_CPU_STOP_NMI,
IPI_TIMER,
IPI_IRQ_WORK,
NR_IPI,
@@ -85,6 +85,8 @@ static int ipi_irq_base __ro_after_init;
static int nr_ipi __ro_after_init = NR_IPI;
static struct irq_desc *ipi_desc[MAX_IPI] __ro_after_init;
+static bool crash_stop;
+
static void ipi_setup(int cpu);
#ifdef CONFIG_HOTPLUG_CPU
@@ -823,7 +825,7 @@ static const char *ipi_types[MAX_IPI] __tracepoint_string = {
[IPI_RESCHEDULE] = "Rescheduling interrupts",
[IPI_CALL_FUNC] = "Function call interrupts",
[IPI_CPU_STOP] = "CPU stop interrupts",
- [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts",
+ [IPI_CPU_STOP_NMI] = "CPU stop NMIs",
[IPI_TIMER] = "Timer broadcast interrupts",
[IPI_IRQ_WORK] = "IRQ work interrupts",
[IPI_CPU_BACKTRACE] = "CPU backtrace interrupts",
@@ -867,9 +869,9 @@ void arch_irq_work_raise(void)
}
#endif
-static void __noreturn local_cpu_stop(void)
+static void __noreturn local_cpu_stop(unsigned int cpu)
{
- set_cpu_online(smp_processor_id(), false);
+ set_cpu_online(cpu, false);
local_daif_mask();
sdei_mask_local_cpu();
@@ -883,21 +885,26 @@ static void __noreturn local_cpu_stop(void)
*/
void __noreturn panic_smp_self_stop(void)
{
- local_cpu_stop();
+ local_cpu_stop(smp_processor_id());
}
-#ifdef CONFIG_KEXEC_CORE
-static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0);
-#endif
-
static void __noreturn ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
{
#ifdef CONFIG_KEXEC_CORE
+ /*
+ * Use local_daif_mask() instead of local_irq_disable() to make sure
+ * that pseudo-NMIs are disabled. The "crash stop" code starts with
+ * an IRQ and falls back to NMI (which might be pseudo). If the IRQ
+ * finally goes through right as we're timing out then the NMI could
+ * interrupt us. It's better to prevent the NMI and let the IRQ
+ * finish since the pt_regs will be better.
+ */
+ local_daif_mask();
+
crash_save_cpu(regs, cpu);
- atomic_dec(&waiting_for_crash_ipi);
+ set_cpu_online(cpu, false);
- local_irq_disable();
sdei_mask_local_cpu();
if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
@@ -962,14 +969,12 @@ static void do_handle_IPI(int ipinr)
break;
case IPI_CPU_STOP:
- local_cpu_stop();
- break;
-
- case IPI_CPU_CRASH_STOP:
- if (IS_ENABLED(CONFIG_KEXEC_CORE)) {
+ case IPI_CPU_STOP_NMI:
+ if (IS_ENABLED(CONFIG_KEXEC_CORE) && crash_stop) {
ipi_cpu_crash_stop(cpu, get_irq_regs());
-
unreachable();
+ } else {
+ local_cpu_stop(cpu);
}
break;
@@ -1024,8 +1029,7 @@ static bool ipi_should_be_nmi(enum ipi_msg_type ipi)
return false;
switch (ipi) {
- case IPI_CPU_STOP:
- case IPI_CPU_CRASH_STOP:
+ case IPI_CPU_STOP_NMI:
case IPI_CPU_BACKTRACE:
case IPI_KGDB_ROUNDUP:
return true;
@@ -1138,79 +1142,109 @@ static inline unsigned int num_other_online_cpus(void)
void smp_send_stop(void)
{
+ static unsigned long stop_in_progress;
+ cpumask_t mask;
unsigned long timeout;
- if (num_other_online_cpus()) {
- cpumask_t mask;
+ /*
+ * If this cpu is the only one alive at this point in time, online or
+ * not, there are no stop messages to be sent around, so just back out.
+ */
+ if (num_other_online_cpus() == 0)
+ goto skip_ipi;
- cpumask_copy(&mask, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), &mask);
+ /* Only proceed if this is the first CPU to reach this code */
+ if (test_and_set_bit(0, &stop_in_progress))
+ return;
- if (system_state <= SYSTEM_RUNNING)
- pr_crit("SMP: stopping secondary CPUs\n");
- smp_cross_call(&mask, IPI_CPU_STOP);
- }
+ /*
+ * Send an IPI to all currently online CPUs except the CPU running
+ * this code.
+ *
+ * NOTE: we don't do anything here to prevent other CPUs from coming
+ * online after we snapshot `cpu_online_mask`. Ideally, the calling code
+ * should do something to prevent other CPUs from coming up. This code
+ * can be called in the panic path and thus it doesn't seem wise to
+ * grab the CPU hotplug mutex ourselves. Worst case:
+ * - If a CPU comes online as we're running, we'll likely notice it
+ * during the 1 second wait below and then we'll catch it when we try
+ * with an NMI (assuming NMIs are enabled) since we re-snapshot the
+ * mask before sending an NMI.
+ * - If we leave the function and see that CPUs are still online we'll
+ * at least print a warning. Especially without NMIs this function
+ * isn't foolproof anyway so calling code will just have to accept
+ * the fact that there could be cases where a CPU can't be stopped.
+ */
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
- /* Wait up to one second for other CPUs to stop */
+ if (system_state <= SYSTEM_RUNNING)
+ pr_crit("SMP: stopping secondary CPUs\n");
+
+ /*
+ * Start with a normal IPI and wait up to one second for other CPUs to
+ * stop. We do this first because it gives other processors a chance
+ * to exit critical sections / drop locks and makes the rest of the
+ * stop process (especially console flush) more robust.
+ */
+ smp_cross_call(&mask, IPI_CPU_STOP);
timeout = USEC_PER_SEC;
while (num_other_online_cpus() && timeout--)
udelay(1);
- if (num_other_online_cpus())
+ /*
+ * If CPUs are still online, try an NMI. There's no excuse for this to
+ * be slow, so we only give them an extra 10 ms to respond.
+ */
+ if (num_other_online_cpus() && ipi_should_be_nmi(IPI_CPU_STOP_NMI)) {
+ smp_rmb();
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+
+ pr_info("SMP: retry stop with NMI for CPUs %*pbl\n",
+ cpumask_pr_args(&mask));
+
+ smp_cross_call(&mask, IPI_CPU_STOP_NMI);
+ timeout = USEC_PER_MSEC * 10;
+ while (num_other_online_cpus() && timeout--)
+ udelay(1);
+ }
+
+ if (num_other_online_cpus()) {
+ smp_rmb();
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+
pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
- cpumask_pr_args(cpu_online_mask));
+ cpumask_pr_args(&mask));
+ }
+skip_ipi:
sdei_mask_local_cpu();
}
#ifdef CONFIG_KEXEC_CORE
void crash_smp_send_stop(void)
{
- static int cpus_stopped;
- cpumask_t mask;
- unsigned long timeout;
-
/*
* This function can be called twice in panic path, but obviously
* we execute this only once.
+ *
+ * We use this same boolean to tell whether the IPI we send was a
+ * stop or a "crash stop".
*/
- if (cpus_stopped)
+ if (crash_stop)
return;
+ crash_stop = 1;
- cpus_stopped = 1;
+ smp_send_stop();
- /*
- * If this cpu is the only one alive at this point in time, online or
- * not, there are no stop messages to be sent around, so just back out.
- */
- if (num_other_online_cpus() == 0)
- goto skip_ipi;
-
- cpumask_copy(&mask, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), &mask);
-
- atomic_set(&waiting_for_crash_ipi, num_other_online_cpus());
-
- pr_crit("SMP: stopping secondary CPUs\n");
- smp_cross_call(&mask, IPI_CPU_CRASH_STOP);
-
- /* Wait up to one second for other CPUs to stop */
- timeout = USEC_PER_SEC;
- while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--)
- udelay(1);
-
- if (atomic_read(&waiting_for_crash_ipi) > 0)
- pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
- cpumask_pr_args(&mask));
-
-skip_ipi:
- sdei_mask_local_cpu();
sdei_handler_abort();
}
bool smp_crash_stop_failed(void)
{
- return (atomic_read(&waiting_for_crash_ipi) > 0);
+ return num_other_online_cpus() != 0;
}
#endif
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 9e22683aa921..563cbce11126 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -273,6 +273,12 @@ void arm64_force_sig_fault(int signo, int code, unsigned long far,
force_sig_fault(signo, code, (void __user *)far);
}
+void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey)
+{
+ arm64_show_signal(SIGSEGV, str);
+ force_sig_pkuerr((void __user *)far, pkey);
+}
+
void arm64_force_sig_mceerr(int code, unsigned long far, short lsb,
const char *str)
{
@@ -601,18 +607,26 @@ static void ctr_read_handler(unsigned long esr, struct pt_regs *regs)
static void cntvct_read_handler(unsigned long esr, struct pt_regs *regs)
{
- int rt = ESR_ELx_SYS64_ISS_RT(esr);
+ if (test_thread_flag(TIF_TSC_SIGSEGV)) {
+ force_sig(SIGSEGV);
+ } else {
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
- pt_regs_write_reg(regs, rt, arch_timer_read_counter());
- arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ pt_regs_write_reg(regs, rt, arch_timer_read_counter());
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ }
}
static void cntfrq_read_handler(unsigned long esr, struct pt_regs *regs)
{
- int rt = ESR_ELx_SYS64_ISS_RT(esr);
+ if (test_thread_flag(TIF_TSC_SIGSEGV)) {
+ force_sig(SIGSEGV);
+ } else {
+ int rt = ESR_ELx_SYS64_ISS_RT(esr);
- pt_regs_write_reg(regs, rt, arch_timer_get_rate());
- arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ pt_regs_write_reg(regs, rt, arch_timer_get_rate());
+ arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
+ }
}
static void mrs_handler(unsigned long esr, struct pt_regs *regs)
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 8304eb342be9..ead632ad01b4 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -66,4 +66,21 @@ config PROTECTED_NVHE_STACKTRACE
If unsure, or not using protected nVHE (pKVM), say N.
+config PTDUMP_STAGE2_DEBUGFS
+ bool "Present the stage-2 pagetables to debugfs"
+ depends on KVM
+ depends on DEBUG_KERNEL
+ depends on DEBUG_FS
+ depends on GENERIC_PTDUMP
+ select PTDUMP_CORE
+ default n
+ help
+ Say Y here if you want to show the stage-2 kernel pagetables
+ layout in a debugfs file. This information is only useful for kernel developers
+ who are working in architecture specific areas of the kernel.
+ It is probably not a good idea to enable this feature in a production
+ kernel.
+
+ If in doubt, say N.
+
endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 86a629aaf0a1..3cf7adb2b503 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -17,7 +17,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o stacktrace.o \
vgic-sys-reg-v3.o fpsimd.o pkvm.o \
- arch_timer.o trng.o vmid.o emulate-nested.o nested.o \
+ arch_timer.o trng.o vmid.o emulate-nested.o nested.o at.o \
vgic/vgic.o vgic/vgic-init.o \
vgic/vgic-irqfd.o vgic/vgic-v2.o \
vgic/vgic-v3.o vgic/vgic-v4.o \
@@ -27,6 +27,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
+kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o
always-y := hyp_constants.h hyp-constants.s
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 9bef7638342e..fe0764173cd0 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -46,6 +46,8 @@
#include <kvm/arm_pmu.h>
#include <kvm/arm_psci.h>
+#include "sys_regs.h"
+
static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
enum kvm_wfx_trap_policy {
@@ -228,6 +230,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
void kvm_arch_create_vm_debugfs(struct kvm *kvm)
{
kvm_sys_regs_create_debugfs(kvm);
+ kvm_s2_ptdump_create_debugfs(kvm);
}
static void kvm_destroy_mpidr_data(struct kvm *kvm)
@@ -821,15 +824,13 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
return ret;
}
- if (vcpu_has_nv(vcpu)) {
- ret = kvm_init_nv_sysregs(vcpu->kvm);
- if (ret)
- return ret;
- }
+ ret = kvm_finalize_sys_regs(vcpu);
+ if (ret)
+ return ret;
/*
- * This needs to happen after NV has imposed its own restrictions on
- * the feature set
+ * This needs to happen after any restriction has been applied
+ * to the feature set.
*/
kvm_calculate_traps(vcpu);
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
new file mode 100644
index 000000000000..39f0e87a340e
--- /dev/null
+++ b/arch/arm64/kvm/at.c
@@ -0,0 +1,1101 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017 - Linaro Ltd
+ * Author: Jintack Lim <jintack.lim@linaro.org>
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/esr.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+
+enum trans_regime {
+ TR_EL10,
+ TR_EL20,
+ TR_EL2,
+};
+
+struct s1_walk_info {
+ u64 baddr;
+ enum trans_regime regime;
+ unsigned int max_oa_bits;
+ unsigned int pgshift;
+ unsigned int txsz;
+ int sl;
+ bool hpd;
+ bool be;
+ bool s2;
+};
+
+struct s1_walk_result {
+ union {
+ struct {
+ u64 desc;
+ u64 pa;
+ s8 level;
+ u8 APTable;
+ bool UXNTable;
+ bool PXNTable;
+ };
+ struct {
+ u8 fst;
+ bool ptw;
+ bool s2;
+ };
+ };
+ bool failed;
+};
+
+static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool ptw, bool s2)
+{
+ wr->fst = fst;
+ wr->ptw = ptw;
+ wr->s2 = s2;
+ wr->failed = true;
+}
+
+#define S1_MMU_DISABLED (-127)
+
+static int get_ia_size(struct s1_walk_info *wi)
+{
+ return 64 - wi->txsz;
+}
+
+/* Return true if the IPA is out of the OA range */
+static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
+{
+ return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
+}
+
+/* Return the translation regime that applies to an AT instruction */
+static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
+{
+ /*
+ * We only get here from guest EL2, so the translation
+ * regime AT applies to is solely defined by {E2H,TGE}.
+ */
+ switch (op) {
+ case OP_AT_S1E2R:
+ case OP_AT_S1E2W:
+ case OP_AT_S1E2A:
+ return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
+ break;
+ default:
+ return (vcpu_el2_e2h_is_set(vcpu) &&
+ vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10;
+ }
+}
+
+static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi,
+ struct s1_walk_result *wr, u64 va)
+{
+ u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr;
+ unsigned int stride, x;
+ bool va55, tbi, lva, as_el0;
+
+ hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
+
+ wi->regime = compute_translation_regime(vcpu, op);
+ as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
+
+ va55 = va & BIT(55);
+
+ if (wi->regime == TR_EL2 && va55)
+ goto addrsz;
+
+ wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
+
+ switch (wi->regime) {
+ case TR_EL10:
+ sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+ tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
+ ttbr = (va55 ?
+ vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
+ vcpu_read_sys_reg(vcpu, TTBR0_EL1));
+ break;
+ case TR_EL2:
+ case TR_EL20:
+ sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
+ tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
+ ttbr = (va55 ?
+ vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
+ vcpu_read_sys_reg(vcpu, TTBR0_EL2));
+ break;
+ default:
+ BUG();
+ }
+
+ tbi = (wi->regime == TR_EL2 ?
+ FIELD_GET(TCR_EL2_TBI, tcr) :
+ (va55 ?
+ FIELD_GET(TCR_TBI1, tcr) :
+ FIELD_GET(TCR_TBI0, tcr)));
+
+ if (!tbi && (u64)sign_extend64(va, 55) != va)
+ goto addrsz;
+
+ va = (u64)sign_extend64(va, 55);
+
+ /* Let's put the MMU disabled case aside immediately */
+ switch (wi->regime) {
+ case TR_EL10:
+ /*
+ * If dealing with the EL1&0 translation regime, 3 things
+ * can disable the S1 translation:
+ *
+ * - HCR_EL2.DC = 1
+ * - HCR_EL2.{E2H,TGE} = {0,1}
+ * - SCTLR_EL1.M = 0
+ *
+ * The TGE part is interesting. If we have decided that this
+ * is EL1&0, then it means that either {E2H,TGE} == {1,0} or
+ * {0,x}, and we only need to test for TGE == 1.
+ */
+ if (hcr & (HCR_DC | HCR_TGE)) {
+ wr->level = S1_MMU_DISABLED;
+ break;
+ }
+ fallthrough;
+ case TR_EL2:
+ case TR_EL20:
+ if (!(sctlr & SCTLR_ELx_M))
+ wr->level = S1_MMU_DISABLED;
+ break;
+ }
+
+ if (wr->level == S1_MMU_DISABLED) {
+ if (va >= BIT(kvm_get_pa_bits(vcpu->kvm)))
+ goto addrsz;
+
+ wr->pa = va;
+ return 0;
+ }
+
+ wi->be = sctlr & SCTLR_ELx_EE;
+
+ wi->hpd = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP);
+ wi->hpd &= (wi->regime == TR_EL2 ?
+ FIELD_GET(TCR_EL2_HPD, tcr) :
+ (va55 ?
+ FIELD_GET(TCR_HPD1, tcr) :
+ FIELD_GET(TCR_HPD0, tcr)));
+
+ /* Someone was silly enough to encode TG0/TG1 differently */
+ if (va55) {
+ wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
+ tg = FIELD_GET(TCR_TG1_MASK, tcr);
+
+ switch (tg << TCR_TG1_SHIFT) {
+ case TCR_TG1_4K:
+ wi->pgshift = 12; break;
+ case TCR_TG1_16K:
+ wi->pgshift = 14; break;
+ case TCR_TG1_64K:
+ default: /* IMPDEF: treat any other value as 64k */
+ wi->pgshift = 16; break;
+ }
+ } else {
+ wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
+ tg = FIELD_GET(TCR_TG0_MASK, tcr);
+
+ switch (tg << TCR_TG0_SHIFT) {
+ case TCR_TG0_4K:
+ wi->pgshift = 12; break;
+ case TCR_TG0_16K:
+ wi->pgshift = 14; break;
+ case TCR_TG0_64K:
+ default: /* IMPDEF: treat any other value as 64k */
+ wi->pgshift = 16; break;
+ }
+ }
+
+ /* R_PLCGL, R_YXNYW */
+ if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
+ if (wi->txsz > 39)
+ goto transfault_l0;
+ } else {
+ if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
+ goto transfault_l0;
+ }
+
+ /* R_GTJBY, R_SXWGM */
+ switch (BIT(wi->pgshift)) {
+ case SZ_4K:
+ lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT);
+ lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
+ break;
+ case SZ_16K:
+ lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT);
+ lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
+ break;
+ case SZ_64K:
+ lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
+ break;
+ }
+
+ if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
+ goto transfault_l0;
+
+ ia_bits = get_ia_size(wi);
+
+ /* R_YYVYV, I_THCZK */
+ if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
+ (va55 && va < GENMASK(63, ia_bits)))
+ goto transfault_l0;
+
+ /* I_ZFSYQ */
+ if (wi->regime != TR_EL2 &&
+ (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
+ goto transfault_l0;
+
+ /* R_BNDVG and following statements */
+ if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
+ as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
+ goto transfault_l0;
+
+ /* AArch64.S1StartLevel() */
+ stride = wi->pgshift - 3;
+ wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
+
+ ps = (wi->regime == TR_EL2 ?
+ FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
+
+ wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps));
+
+ /* Compute minimal alignment */
+ x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
+
+ wi->baddr = ttbr & TTBRx_EL1_BADDR;
+
+ /* R_VPBBF */
+ if (check_output_size(wi->baddr, wi))
+ goto addrsz;
+
+ wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
+
+ return 0;
+
+addrsz: /* Address Size Fault level 0 */
+ fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false, false);
+ return -EFAULT;
+
+transfault_l0: /* Translation Fault level 0 */
+ fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false, false);
+ return -EFAULT;
+}
+
+static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
+ struct s1_walk_result *wr, u64 va)
+{
+ u64 va_top, va_bottom, baddr, desc;
+ int level, stride, ret;
+
+ level = wi->sl;
+ stride = wi->pgshift - 3;
+ baddr = wi->baddr;
+
+ va_top = get_ia_size(wi) - 1;
+
+ while (1) {
+ u64 index, ipa;
+
+ va_bottom = (3 - level) * stride + wi->pgshift;
+ index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
+
+ ipa = baddr | index;
+
+ if (wi->s2) {
+ struct kvm_s2_trans s2_trans = {};
+
+ ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans);
+ if (ret) {
+ fail_s1_walk(wr,
+ (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level,
+ true, true);
+ return ret;
+ }
+
+ if (!kvm_s2_trans_readable(&s2_trans)) {
+ fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level),
+ true, true);
+
+ return -EPERM;
+ }
+
+ ipa = kvm_s2_trans_output(&s2_trans);
+ }
+
+ ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
+ if (ret) {
+ fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level),
+ true, false);
+ return ret;
+ }
+
+ if (wi->be)
+ desc = be64_to_cpu((__force __be64)desc);
+ else
+ desc = le64_to_cpu((__force __le64)desc);
+
+ /* Invalid descriptor */
+ if (!(desc & BIT(0)))
+ goto transfault;
+
+ /* Block mapping, check validity down the line */
+ if (!(desc & BIT(1)))
+ break;
+
+ /* Page mapping */
+ if (level == 3)
+ break;
+
+ /* Table handling */
+ if (!wi->hpd) {
+ wr->APTable |= FIELD_GET(S1_TABLE_AP, desc);
+ wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc);
+ wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
+ }
+
+ baddr = desc & GENMASK_ULL(47, wi->pgshift);
+
+ /* Check for out-of-range OA */
+ if (check_output_size(baddr, wi))
+ goto addrsz;
+
+ /* Prepare for next round */
+ va_top = va_bottom - 1;
+ level++;
+ }
+
+ /* Block mapping, check the validity of the level */
+ if (!(desc & BIT(1))) {
+ bool valid_block = false;
+
+ switch (BIT(wi->pgshift)) {
+ case SZ_4K:
+ valid_block = level == 1 || level == 2;
+ break;
+ case SZ_16K:
+ case SZ_64K:
+ valid_block = level == 2;
+ break;
+ }
+
+ if (!valid_block)
+ goto transfault;
+ }
+
+ if (check_output_size(desc & GENMASK(47, va_bottom), wi))
+ goto addrsz;
+
+ va_bottom += contiguous_bit_shift(desc, wi, level);
+
+ wr->failed = false;
+ wr->level = level;
+ wr->desc = desc;
+ wr->pa = desc & GENMASK(47, va_bottom);
+ wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
+
+ return 0;
+
+addrsz:
+ fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), true, false);
+ return -EINVAL;
+transfault:
+ fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), true, false);
+ return -ENOENT;
+}
+
+struct mmu_config {
+ u64 ttbr0;
+ u64 ttbr1;
+ u64 tcr;
+ u64 mair;
+ u64 sctlr;
+ u64 vttbr;
+ u64 vtcr;
+ u64 hcr;
+};
+
+static void __mmu_config_save(struct mmu_config *config)
+{
+ config->ttbr0 = read_sysreg_el1(SYS_TTBR0);
+ config->ttbr1 = read_sysreg_el1(SYS_TTBR1);
+ config->tcr = read_sysreg_el1(SYS_TCR);
+ config->mair = read_sysreg_el1(SYS_MAIR);
+ config->sctlr = read_sysreg_el1(SYS_SCTLR);
+ config->vttbr = read_sysreg(vttbr_el2);
+ config->vtcr = read_sysreg(vtcr_el2);
+ config->hcr = read_sysreg(hcr_el2);
+}
+
+static void __mmu_config_restore(struct mmu_config *config)
+{
+ write_sysreg(config->hcr, hcr_el2);
+
+ /*
+ * ARM errata 1165522 and 1530923 require TGE to be 1 before
+ * we update the guest state.
+ */
+ asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
+
+ write_sysreg_el1(config->ttbr0, SYS_TTBR0);
+ write_sysreg_el1(config->ttbr1, SYS_TTBR1);
+ write_sysreg_el1(config->tcr, SYS_TCR);
+ write_sysreg_el1(config->mair, SYS_MAIR);
+ write_sysreg_el1(config->sctlr, SYS_SCTLR);
+ write_sysreg(config->vttbr, vttbr_el2);
+ write_sysreg(config->vtcr, vtcr_el2);
+}
+
+static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+{
+ u64 host_pan;
+ bool fail;
+
+ host_pan = read_sysreg_s(SYS_PSTATE_PAN);
+ write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN);
+
+ switch (op) {
+ case OP_AT_S1E1RP:
+ fail = __kvm_at(OP_AT_S1E1RP, vaddr);
+ break;
+ case OP_AT_S1E1WP:
+ fail = __kvm_at(OP_AT_S1E1WP, vaddr);
+ break;
+ }
+
+ write_sysreg_s(host_pan, SYS_PSTATE_PAN);
+
+ return fail;
+}
+
+#define MEMATTR(ic, oc) (MEMATTR_##oc << 4 | MEMATTR_##ic)
+#define MEMATTR_NC 0b0100
+#define MEMATTR_Wt 0b1000
+#define MEMATTR_Wb 0b1100
+#define MEMATTR_WbRaWa 0b1111
+
+#define MEMATTR_IS_DEVICE(m) (((m) & GENMASK(7, 4)) == 0)
+
+static u8 s2_memattr_to_attr(u8 memattr)
+{
+ memattr &= 0b1111;
+
+ switch (memattr) {
+ case 0b0000:
+ case 0b0001:
+ case 0b0010:
+ case 0b0011:
+ return memattr << 2;
+ case 0b0100:
+ return MEMATTR(Wb, Wb);
+ case 0b0101:
+ return MEMATTR(NC, NC);
+ case 0b0110:
+ return MEMATTR(Wt, NC);
+ case 0b0111:
+ return MEMATTR(Wb, NC);
+ case 0b1000:
+ /* Reserved, assume NC */
+ return MEMATTR(NC, NC);
+ case 0b1001:
+ return MEMATTR(NC, Wt);
+ case 0b1010:
+ return MEMATTR(Wt, Wt);
+ case 0b1011:
+ return MEMATTR(Wb, Wt);
+ case 0b1100:
+ /* Reserved, assume NC */
+ return MEMATTR(NC, NC);
+ case 0b1101:
+ return MEMATTR(NC, Wb);
+ case 0b1110:
+ return MEMATTR(Wt, Wb);
+ case 0b1111:
+ return MEMATTR(Wb, Wb);
+ default:
+ unreachable();
+ }
+}
+
+static u8 combine_s1_s2_attr(u8 s1, u8 s2)
+{
+ bool transient;
+ u8 final = 0;
+
+ /* Upgrade transient s1 to non-transient to simplify things */
+ switch (s1) {
+ case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */
+ transient = true;
+ s1 = MEMATTR_Wt | (s1 & GENMASK(1,0));
+ break;
+ case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */
+ transient = true;
+ s1 = MEMATTR_Wb | (s1 & GENMASK(1,0));
+ break;
+ default:
+ transient = false;
+ }
+
+ /* S2CombineS1AttrHints() */
+ if ((s1 & GENMASK(3, 2)) == MEMATTR_NC ||
+ (s2 & GENMASK(3, 2)) == MEMATTR_NC)
+ final = MEMATTR_NC;
+ else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt ||
+ (s2 & GENMASK(3, 2)) == MEMATTR_Wt)
+ final = MEMATTR_Wt;
+ else
+ final = MEMATTR_Wb;
+
+ if (final != MEMATTR_NC) {
+ /* Inherit RaWa hints form S1 */
+ if (transient) {
+ switch (s1 & GENMASK(3, 2)) {
+ case MEMATTR_Wt:
+ final = 0;
+ break;
+ case MEMATTR_Wb:
+ final = MEMATTR_NC;
+ break;
+ }
+ }
+
+ final |= s1 & GENMASK(1, 0);
+ }
+
+ return final;
+}
+
+#define ATTR_NSH 0b00
+#define ATTR_RSV 0b01
+#define ATTR_OSH 0b10
+#define ATTR_ISH 0b11
+
+static u8 compute_sh(u8 attr, u64 desc)
+{
+ u8 sh;
+
+ /* Any form of device, as well as NC has SH[1:0]=0b10 */
+ if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
+ return ATTR_OSH;
+
+ sh = FIELD_GET(PTE_SHARED, desc);
+ if (sh == ATTR_RSV) /* Reserved, mapped to NSH */
+ sh = ATTR_NSH;
+
+ return sh;
+}
+
+static u8 combine_sh(u8 s1_sh, u8 s2_sh)
+{
+ if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
+ return ATTR_OSH;
+ if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH)
+ return ATTR_ISH;
+
+ return ATTR_NSH;
+}
+
+static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
+ struct kvm_s2_trans *tr)
+{
+ u8 s1_parattr, s2_memattr, final_attr;
+ u64 par;
+
+ /* If S2 has failed to translate, report the damage */
+ if (tr->esr) {
+ par = SYS_PAR_EL1_RES1;
+ par |= SYS_PAR_EL1_F;
+ par |= SYS_PAR_EL1_S;
+ par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr);
+ return par;
+ }
+
+ s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par);
+ s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc);
+
+ if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) {
+ if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP))
+ s2_memattr &= ~BIT(3);
+
+ /* Combination of R_VRJSW and R_RHWZM */
+ switch (s2_memattr) {
+ case 0b0101:
+ if (MEMATTR_IS_DEVICE(s1_parattr))
+ final_attr = s1_parattr;
+ else
+ final_attr = MEMATTR(NC, NC);
+ break;
+ case 0b0110:
+ case 0b1110:
+ final_attr = MEMATTR(WbRaWa, WbRaWa);
+ break;
+ case 0b0111:
+ case 0b1111:
+ /* Preserve S1 attribute */
+ final_attr = s1_parattr;
+ break;
+ case 0b0100:
+ case 0b1100:
+ case 0b1101:
+ /* Reserved, do something non-silly */
+ final_attr = s1_parattr;
+ break;
+ default:
+ /* MemAttr[2]=0, Device from S2 */
+ final_attr = s2_memattr & GENMASK(1,0) << 2;
+ }
+ } else {
+ /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
+ u8 s2_parattr = s2_memattr_to_attr(s2_memattr);
+
+ if (MEMATTR_IS_DEVICE(s1_parattr) ||
+ MEMATTR_IS_DEVICE(s2_parattr)) {
+ final_attr = min(s1_parattr, s2_parattr);
+ } else {
+ /* At this stage, this is memory vs memory */
+ final_attr = combine_s1_s2_attr(s1_parattr & 0xf,
+ s2_parattr & 0xf);
+ final_attr |= combine_s1_s2_attr(s1_parattr >> 4,
+ s2_parattr >> 4) << 4;
+ }
+ }
+
+ if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) &&
+ !MEMATTR_IS_DEVICE(final_attr))
+ final_attr = MEMATTR(NC, NC);
+
+ par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
+ par |= tr->output & GENMASK(47, 12);
+ par |= FIELD_PREP(SYS_PAR_EL1_SH,
+ combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
+ compute_sh(final_attr, tr->desc)));
+
+ return par;
+}
+
+static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
+ enum trans_regime regime)
+{
+ u64 par;
+
+ if (wr->failed) {
+ par = SYS_PAR_EL1_RES1;
+ par |= SYS_PAR_EL1_F;
+ par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst);
+ par |= wr->ptw ? SYS_PAR_EL1_PTW : 0;
+ par |= wr->s2 ? SYS_PAR_EL1_S : 0;
+ } else if (wr->level == S1_MMU_DISABLED) {
+ /* MMU off or HCR_EL2.DC == 1 */
+ par = SYS_PAR_EL1_NSE;
+ par |= wr->pa & GENMASK_ULL(47, 12);
+
+ if (regime == TR_EL10 &&
+ (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
+ par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
+ MEMATTR(WbRaWa, WbRaWa));
+ par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH);
+ } else {
+ par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */
+ par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH);
+ }
+ } else {
+ u64 mair, sctlr;
+ u8 sh;
+
+ par = SYS_PAR_EL1_NSE;
+
+ mair = (regime == TR_EL10 ?
+ vcpu_read_sys_reg(vcpu, MAIR_EL1) :
+ vcpu_read_sys_reg(vcpu, MAIR_EL2));
+
+ mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
+ mair &= 0xff;
+
+ sctlr = (regime == TR_EL10 ?
+ vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
+ vcpu_read_sys_reg(vcpu, SCTLR_EL2));
+
+ /* Force NC for memory if SCTLR_ELx.C is clear */
+ if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair))
+ mair = MEMATTR(NC, NC);
+
+ par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
+ par |= wr->pa & GENMASK_ULL(47, 12);
+
+ sh = compute_sh(mair, wr->desc);
+ par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
+ }
+
+ return par;
+}
+
+static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
+{
+ u64 sctlr;
+
+ if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
+ return false;
+
+ if (regime == TR_EL10)
+ sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+ else
+ sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
+
+ return sctlr & SCTLR_EL1_EPAN;
+}
+
+static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+{
+ bool perm_fail, ur, uw, ux, pr, pw, px;
+ struct s1_walk_result wr = {};
+ struct s1_walk_info wi = {};
+ int ret, idx;
+
+ ret = setup_s1_walk(vcpu, op, &wi, &wr, vaddr);
+ if (ret)
+ goto compute_par;
+
+ if (wr.level == S1_MMU_DISABLED)
+ goto compute_par;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ ret = walk_s1(vcpu, &wi, &wr, vaddr);
+
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ if (ret)
+ goto compute_par;
+
+ /* FIXME: revisit when adding indirect permission support */
+ /* AArch64.S1DirectBasePermissions() */
+ if (wi.regime != TR_EL2) {
+ switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr.desc)) {
+ case 0b00:
+ pr = pw = true;
+ ur = uw = false;
+ break;
+ case 0b01:
+ pr = pw = ur = uw = true;
+ break;
+ case 0b10:
+ pr = true;
+ pw = ur = uw = false;
+ break;
+ case 0b11:
+ pr = ur = true;
+ pw = uw = false;
+ break;
+ }
+
+ switch (wr.APTable) {
+ case 0b00:
+ break;
+ case 0b01:
+ ur = uw = false;
+ break;
+ case 0b10:
+ pw = uw = false;
+ break;
+ case 0b11:
+ pw = ur = uw = false;
+ break;
+ }
+
+ /* We don't use px for anything yet, but hey... */
+ px = !((wr.desc & PTE_PXN) || wr.PXNTable || uw);
+ ux = !((wr.desc & PTE_UXN) || wr.UXNTable);
+
+ if (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) {
+ bool pan;
+
+ pan = *vcpu_cpsr(vcpu) & PSR_PAN_BIT;
+ pan &= ur || uw || (pan3_enabled(vcpu, wi.regime) && ux);
+ pw &= !pan;
+ pr &= !pan;
+ }
+ } else {
+ ur = uw = ux = false;
+
+ if (!(wr.desc & PTE_RDONLY)) {
+ pr = pw = true;
+ } else {
+ pr = true;
+ pw = false;
+ }
+
+ if (wr.APTable & BIT(1))
+ pw = false;
+
+ /* XN maps to UXN */
+ px = !((wr.desc & PTE_UXN) || wr.UXNTable);
+ }
+
+ perm_fail = false;
+
+ switch (op) {
+ case OP_AT_S1E1RP:
+ case OP_AT_S1E1R:
+ case OP_AT_S1E2R:
+ perm_fail = !pr;
+ break;
+ case OP_AT_S1E1WP:
+ case OP_AT_S1E1W:
+ case OP_AT_S1E2W:
+ perm_fail = !pw;
+ break;
+ case OP_AT_S1E0R:
+ perm_fail = !ur;
+ break;
+ case OP_AT_S1E0W:
+ perm_fail = !uw;
+ break;
+ case OP_AT_S1E1A:
+ case OP_AT_S1E2A:
+ break;
+ default:
+ BUG();
+ }
+
+ if (perm_fail)
+ fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false, false);
+
+compute_par:
+ return compute_par_s1(vcpu, &wr, wi.regime);
+}
+
+/*
+ * Return the PAR_EL1 value as the result of a valid translation.
+ *
+ * If the translation is unsuccessful, the value may only contain
+ * PAR_EL1.F, and cannot be taken at face value. It isn't an
+ * indication of the translation having failed, only that the fast
+ * path did not succeed, *unless* it indicates a S1 permission fault.
+ */
+static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+{
+ struct mmu_config config;
+ struct kvm_s2_mmu *mmu;
+ bool fail;
+ u64 par;
+
+ par = SYS_PAR_EL1_F;
+
+ /*
+ * We've trapped, so everything is live on the CPU. As we will
+ * be switching contexts behind everybody's back, disable
+ * interrupts while holding the mmu lock.
+ */
+ guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock);
+
+ /*
+ * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
+ * the right one (as we trapped from vEL2). If not, save the
+ * full MMU context.
+ */
+ if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))
+ goto skip_mmu_switch;
+
+ /*
+ * Obtaining the S2 MMU for a L2 is horribly racy, and we may not
+ * find it (recycled by another vcpu, for example). When this
+ * happens, admit defeat immediately and use the SW (slow) path.
+ */
+ mmu = lookup_s2_mmu(vcpu);
+ if (!mmu)
+ return par;
+
+ __mmu_config_save(&config);
+
+ write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0);
+ write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1);
+ write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR);
+ write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR);
+ write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR);
+ __load_stage2(mmu, mmu->arch);
+
+skip_mmu_switch:
+ /* Clear TGE, enable S2 translation, we're rolling */
+ write_sysreg((config.hcr & ~HCR_TGE) | HCR_VM, hcr_el2);
+ isb();
+
+ switch (op) {
+ case OP_AT_S1E1RP:
+ case OP_AT_S1E1WP:
+ fail = at_s1e1p_fast(vcpu, op, vaddr);
+ break;
+ case OP_AT_S1E1R:
+ fail = __kvm_at(OP_AT_S1E1R, vaddr);
+ break;
+ case OP_AT_S1E1W:
+ fail = __kvm_at(OP_AT_S1E1W, vaddr);
+ break;
+ case OP_AT_S1E0R:
+ fail = __kvm_at(OP_AT_S1E0R, vaddr);
+ break;
+ case OP_AT_S1E0W:
+ fail = __kvm_at(OP_AT_S1E0W, vaddr);
+ break;
+ case OP_AT_S1E1A:
+ fail = __kvm_at(OP_AT_S1E1A, vaddr);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ fail = true;
+ break;
+ }
+
+ if (!fail)
+ par = read_sysreg_par();
+
+ if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
+ __mmu_config_restore(&config);
+
+ return par;
+}
+
+static bool par_check_s1_perm_fault(u64 par)
+{
+ u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
+
+ return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM &&
+ !(par & SYS_PAR_EL1_S));
+}
+
+void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+{
+ u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
+
+ /*
+ * If PAR_EL1 reports that AT failed on a S1 permission fault, we
+ * know for sure that the PTW was able to walk the S1 tables and
+ * there's nothing else to do.
+ *
+ * If AT failed for any other reason, then we must walk the guest S1
+ * to emulate the instruction.
+ */
+ if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
+ par = handle_at_slow(vcpu, op, vaddr);
+
+ vcpu_write_sys_reg(vcpu, par, PAR_EL1);
+}
+
+void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+{
+ u64 par;
+
+ /*
+ * We've trapped, so everything is live on the CPU. As we will be
+ * switching context behind everybody's back, disable interrupts...
+ */
+ scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) {
+ struct kvm_s2_mmu *mmu;
+ u64 val, hcr;
+ bool fail;
+
+ mmu = &vcpu->kvm->arch.mmu;
+
+ val = hcr = read_sysreg(hcr_el2);
+ val &= ~HCR_TGE;
+ val |= HCR_VM;
+
+ if (!vcpu_el2_e2h_is_set(vcpu))
+ val |= HCR_NV | HCR_NV1;
+
+ write_sysreg(val, hcr_el2);
+ isb();
+
+ par = SYS_PAR_EL1_F;
+
+ switch (op) {
+ case OP_AT_S1E2R:
+ fail = __kvm_at(OP_AT_S1E1R, vaddr);
+ break;
+ case OP_AT_S1E2W:
+ fail = __kvm_at(OP_AT_S1E1W, vaddr);
+ break;
+ case OP_AT_S1E2A:
+ fail = __kvm_at(OP_AT_S1E1A, vaddr);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ fail = true;
+ }
+
+ isb();
+
+ if (!fail)
+ par = read_sysreg_par();
+
+ write_sysreg(hcr, hcr_el2);
+ isb();
+ }
+
+ /* We failed the translation, let's replay it in slow motion */
+ if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
+ par = handle_at_slow(vcpu, op, vaddr);
+
+ vcpu_write_sys_reg(vcpu, par, PAR_EL1);
+}
+
+void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+{
+ struct kvm_s2_trans out = {};
+ u64 ipa, par;
+ bool write;
+ int ret;
+
+ /* Do the stage-1 translation */
+ switch (op) {
+ case OP_AT_S12E1R:
+ op = OP_AT_S1E1R;
+ write = false;
+ break;
+ case OP_AT_S12E1W:
+ op = OP_AT_S1E1W;
+ write = true;
+ break;
+ case OP_AT_S12E0R:
+ op = OP_AT_S1E0R;
+ write = false;
+ break;
+ case OP_AT_S12E0W:
+ op = OP_AT_S1E0W;
+ write = true;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ __kvm_at_s1e01(vcpu, op, vaddr);
+ par = vcpu_read_sys_reg(vcpu, PAR_EL1);
+ if (par & SYS_PAR_EL1_F)
+ return;
+
+ /*
+ * If we only have a single stage of translation (E2H=0 or
+ * TGE=1), exit early. Same thing if {VM,DC}=={0,0}.
+ */
+ if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) ||
+ !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
+ return;
+
+ /* Do the stage-2 translation */
+ ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
+ out.esr = 0;
+ ret = kvm_walk_nested_s2(vcpu, ipa, &out);
+ if (ret < 0)
+ return;
+
+ /* Check the access permission */
+ if (!out.esr &&
+ ((!write && !out.readable) || (write && !out.writable)))
+ out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3);
+
+ par = compute_par_s12(vcpu, par, &out);
+ vcpu_write_sys_reg(vcpu, par, PAR_EL1);
+}
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 05166eccea0a..05b6435d02a9 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -83,14 +83,20 @@ enum cgt_group_id {
CGT_CPTR_TAM,
CGT_CPTR_TCPAC,
+ CGT_HCRX_EnFPM,
CGT_HCRX_TCR2En,
+ CGT_ICH_HCR_TC,
+ CGT_ICH_HCR_TALL0,
+ CGT_ICH_HCR_TALL1,
+ CGT_ICH_HCR_TDIR,
+
/*
* Anything after this point is a combination of coarse trap
* controls, which must all be evaluated to decide what to do.
*/
__MULTIPLE_CONTROL_BITS__,
- CGT_HCR_IMO_FMO = __MULTIPLE_CONTROL_BITS__,
+ CGT_HCR_IMO_FMO_ICH_HCR_TC = __MULTIPLE_CONTROL_BITS__,
CGT_HCR_TID2_TID4,
CGT_HCR_TTLB_TTLBIS,
CGT_HCR_TTLB_TTLBOS,
@@ -105,6 +111,8 @@ enum cgt_group_id {
CGT_MDCR_TDE_TDRA,
CGT_MDCR_TDCC_TDE_TDA,
+ CGT_ICH_HCR_TC_TDIR,
+
/*
* Anything after this point requires a callback evaluating a
* complex trap condition. Ugly stuff.
@@ -372,12 +380,42 @@ static const struct trap_bits coarse_trap_bits[] = {
.mask = CPTR_EL2_TCPAC,
.behaviour = BEHAVE_FORWARD_ANY,
},
+ [CGT_HCRX_EnFPM] = {
+ .index = HCRX_EL2,
+ .value = 0,
+ .mask = HCRX_EL2_EnFPM,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
[CGT_HCRX_TCR2En] = {
.index = HCRX_EL2,
.value = 0,
.mask = HCRX_EL2_TCR2En,
.behaviour = BEHAVE_FORWARD_ANY,
},
+ [CGT_ICH_HCR_TC] = {
+ .index = ICH_HCR_EL2,
+ .value = ICH_HCR_TC,
+ .mask = ICH_HCR_TC,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_ICH_HCR_TALL0] = {
+ .index = ICH_HCR_EL2,
+ .value = ICH_HCR_TALL0,
+ .mask = ICH_HCR_TALL0,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_ICH_HCR_TALL1] = {
+ .index = ICH_HCR_EL2,
+ .value = ICH_HCR_TALL1,
+ .mask = ICH_HCR_TALL1,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_ICH_HCR_TDIR] = {
+ .index = ICH_HCR_EL2,
+ .value = ICH_HCR_TDIR,
+ .mask = ICH_HCR_TDIR,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
};
#define MCB(id, ...) \
@@ -387,7 +425,6 @@ static const struct trap_bits coarse_trap_bits[] = {
}
static const enum cgt_group_id *coarse_control_combo[] = {
- MCB(CGT_HCR_IMO_FMO, CGT_HCR_IMO, CGT_HCR_FMO),
MCB(CGT_HCR_TID2_TID4, CGT_HCR_TID2, CGT_HCR_TID4),
MCB(CGT_HCR_TTLB_TTLBIS, CGT_HCR_TTLB, CGT_HCR_TTLBIS),
MCB(CGT_HCR_TTLB_TTLBOS, CGT_HCR_TTLB, CGT_HCR_TTLBOS),
@@ -402,6 +439,9 @@ static const enum cgt_group_id *coarse_control_combo[] = {
MCB(CGT_MDCR_TDE_TDOSA, CGT_MDCR_TDE, CGT_MDCR_TDOSA),
MCB(CGT_MDCR_TDE_TDRA, CGT_MDCR_TDE, CGT_MDCR_TDRA),
MCB(CGT_MDCR_TDCC_TDE_TDA, CGT_MDCR_TDCC, CGT_MDCR_TDE, CGT_MDCR_TDA),
+
+ MCB(CGT_HCR_IMO_FMO_ICH_HCR_TC, CGT_HCR_IMO, CGT_HCR_FMO, CGT_ICH_HCR_TC),
+ MCB(CGT_ICH_HCR_TC_TDIR, CGT_ICH_HCR_TC, CGT_ICH_HCR_TDIR),
};
typedef enum trap_behaviour (*complex_condition_check)(struct kvm_vcpu *);
@@ -536,9 +576,9 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(SYS_CSSELR_EL1, CGT_HCR_TID2_TID4),
SR_RANGE_TRAP(SYS_ID_PFR0_EL1,
sys_reg(3, 0, 0, 7, 7), CGT_HCR_TID3),
- SR_TRAP(SYS_ICC_SGI0R_EL1, CGT_HCR_IMO_FMO),
- SR_TRAP(SYS_ICC_ASGI1R_EL1, CGT_HCR_IMO_FMO),
- SR_TRAP(SYS_ICC_SGI1R_EL1, CGT_HCR_IMO_FMO),
+ SR_TRAP(SYS_ICC_SGI0R_EL1, CGT_HCR_IMO_FMO_ICH_HCR_TC),
+ SR_TRAP(SYS_ICC_ASGI1R_EL1, CGT_HCR_IMO_FMO_ICH_HCR_TC),
+ SR_TRAP(SYS_ICC_SGI1R_EL1, CGT_HCR_IMO_FMO_ICH_HCR_TC),
SR_RANGE_TRAP(sys_reg(3, 0, 11, 0, 0),
sys_reg(3, 0, 11, 15, 7), CGT_HCR_TIDCP),
SR_RANGE_TRAP(sys_reg(3, 1, 11, 0, 0),
@@ -786,6 +826,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(OP_AT_S12E1W, CGT_HCR_NV),
SR_TRAP(OP_AT_S12E0R, CGT_HCR_NV),
SR_TRAP(OP_AT_S12E0W, CGT_HCR_NV),
+ SR_TRAP(OP_AT_S1E2A, CGT_HCR_NV),
SR_TRAP(OP_TLBI_IPAS2E1, CGT_HCR_NV),
SR_TRAP(OP_TLBI_RIPAS2E1, CGT_HCR_NV),
SR_TRAP(OP_TLBI_IPAS2LE1, CGT_HCR_NV),
@@ -867,6 +908,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(OP_AT_S1E0W, CGT_HCR_AT),
SR_TRAP(OP_AT_S1E1RP, CGT_HCR_AT),
SR_TRAP(OP_AT_S1E1WP, CGT_HCR_AT),
+ SR_TRAP(OP_AT_S1E1A, CGT_HCR_AT),
SR_TRAP(SYS_ERXPFGF_EL1, CGT_HCR_nFIEN),
SR_TRAP(SYS_ERXPFGCTL_EL1, CGT_HCR_nFIEN),
SR_TRAP(SYS_ERXPFGCDN_EL1, CGT_HCR_nFIEN),
@@ -1108,6 +1150,35 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(SYS_CNTP_CTL_EL0, CGT_CNTHCTL_EL1PTEN),
SR_TRAP(SYS_CNTPCT_EL0, CGT_CNTHCTL_EL1PCTEN),
SR_TRAP(SYS_CNTPCTSS_EL0, CGT_CNTHCTL_EL1PCTEN),
+ SR_TRAP(SYS_FPMR, CGT_HCRX_EnFPM),
+ /*
+ * IMPDEF choice:
+ * We treat ICC_SRE_EL2.{SRE,Enable) and ICV_SRE_EL1.SRE as
+ * RAO/WI. We therefore never consider ICC_SRE_EL2.Enable for
+ * ICC_SRE_EL1 access, and always handle it locally.
+ */
+ SR_TRAP(SYS_ICC_AP0R0_EL1, CGT_ICH_HCR_TALL0),
+ SR_TRAP(SYS_ICC_AP0R1_EL1, CGT_ICH_HCR_TALL0),
+ SR_TRAP(SYS_ICC_AP0R2_EL1, CGT_ICH_HCR_TALL0),
+ SR_TRAP(SYS_ICC_AP0R3_EL1, CGT_ICH_HCR_TALL0),
+ SR_TRAP(SYS_ICC_AP1R0_EL1, CGT_ICH_HCR_TALL1),
+ SR_TRAP(SYS_ICC_AP1R1_EL1, CGT_ICH_HCR_TALL1),
+ SR_TRAP(SYS_ICC_AP1R2_EL1, CGT_ICH_HCR_TALL1),
+ SR_TRAP(SYS_ICC_AP1R3_EL1, CGT_ICH_HCR_TALL1),
+ SR_TRAP(SYS_ICC_BPR0_EL1, CGT_ICH_HCR_TALL0),
+ SR_TRAP(SYS_ICC_BPR1_EL1, CGT_ICH_HCR_TALL1),
+ SR_TRAP(SYS_ICC_CTLR_EL1, CGT_ICH_HCR_TC),
+ SR_TRAP(SYS_ICC_DIR_EL1, CGT_ICH_HCR_TC_TDIR),
+ SR_TRAP(SYS_ICC_EOIR0_EL1, CGT_ICH_HCR_TALL0),
+ SR_TRAP(SYS_ICC_EOIR1_EL1, CGT_ICH_HCR_TALL1),
+ SR_TRAP(SYS_ICC_HPPIR0_EL1, CGT_ICH_HCR_TALL0),
+ SR_TRAP(SYS_ICC_HPPIR1_EL1, CGT_ICH_HCR_TALL1),
+ SR_TRAP(SYS_ICC_IAR0_EL1, CGT_ICH_HCR_TALL0),
+ SR_TRAP(SYS_ICC_IAR1_EL1, CGT_ICH_HCR_TALL1),
+ SR_TRAP(SYS_ICC_IGRPEN0_EL1, CGT_ICH_HCR_TALL0),
+ SR_TRAP(SYS_ICC_IGRPEN1_EL1, CGT_ICH_HCR_TALL1),
+ SR_TRAP(SYS_ICC_PMR_EL1, CGT_ICH_HCR_TC),
+ SR_TRAP(SYS_ICC_RPR_EL1, CGT_ICH_HCR_TC),
};
static DEFINE_XARRAY(sr_forward_xa);
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index c53e5b14038d..ea5484ce1f3b 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -63,6 +63,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
*/
*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
*host_data_ptr(fpsimd_state) = kern_hyp_va(&current->thread.uw.fpsimd_state);
+ *host_data_ptr(fpmr_ptr) = kern_hyp_va(&current->thread.uw.fpmr);
vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
@@ -134,8 +135,8 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
fp_state.sve_state = vcpu->arch.sve_state;
fp_state.sve_vl = vcpu->arch.sve_max_vl;
fp_state.sme_state = NULL;
- fp_state.svcr = &vcpu->arch.svcr;
- fp_state.fpmr = &vcpu->arch.fpmr;
+ fp_state.svcr = &__vcpu_sys_reg(vcpu, SVCR);
+ fp_state.fpmr = &__vcpu_sys_reg(vcpu, FPMR);
fp_state.fp_type = &vcpu->arch.fp_type;
if (vcpu_has_sve(vcpu))
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 11098eb7eb44..962f985977c2 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -1045,6 +1045,11 @@ int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
mutex_lock(&kvm->slots_lock);
+ if (write && atomic_read(&kvm->nr_memslots_dirty_logging)) {
+ ret = -EBUSY;
+ goto out;
+ }
+
while (length > 0) {
kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL);
void *maddr;
@@ -1059,6 +1064,7 @@ int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
page = pfn_to_online_page(pfn);
if (!page) {
/* Reject ZONE_DEVICE memory */
+ kvm_release_pfn_clean(pfn);
ret = -EFAULT;
goto out;
}
diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h
index 9e13c1bc2ad5..17df94570f03 100644
--- a/arch/arm64/kvm/hyp/include/hyp/fault.h
+++ b/arch/arm64/kvm/hyp/include/hyp/fault.h
@@ -14,6 +14,7 @@
static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
{
+ int ret;
u64 par, tmp;
/*
@@ -27,7 +28,9 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
* saved the guest context yet, and we may return early...
*/
par = read_sysreg_par();
- if (!__kvm_at("s1e1r", far))
+ ret = system_supports_poe() ? __kvm_at(OP_AT_S1E1A, far) :
+ __kvm_at(OP_AT_S1E1R, far);
+ if (!ret)
tmp = read_sysreg_par();
else
tmp = SYS_PAR_EL1_F; /* back to the guest */
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 37ff87d782b6..46d52e8a3df3 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -403,6 +403,9 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
else
__fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs);
+ if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm)))
+ write_sysreg_s(__vcpu_sys_reg(vcpu, FPMR), SYS_FPMR);
+
/* Skip restoring fpexc32 for AArch64 guests */
if (!(read_sysreg(hcr_el2) & HCR_RW))
write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2);
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index 4c0fdabaf8ae..1579a3c08a36 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -16,9 +16,15 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
+static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt);
+
static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1);
+
+ // POR_EL0 can affect uaccess, so must be saved/restored early.
+ if (ctxt_has_s1poe(ctxt))
+ ctxt_sys_reg(ctxt, POR_EL0) = read_sysreg_s(SYS_POR_EL0);
}
static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
@@ -66,6 +72,17 @@ static inline bool ctxt_has_tcrx(struct kvm_cpu_context *ctxt)
return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, TCRX, IMP);
}
+static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt)
+{
+ struct kvm_vcpu *vcpu;
+
+ if (!system_supports_poe())
+ return false;
+
+ vcpu = ctxt_to_vcpu(ctxt);
+ return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, S1POE, IMP);
+}
+
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR);
@@ -80,6 +97,9 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, PIR_EL1) = read_sysreg_el1(SYS_PIR);
ctxt_sys_reg(ctxt, PIRE0_EL1) = read_sysreg_el1(SYS_PIRE0);
}
+
+ if (ctxt_has_s1poe(ctxt))
+ ctxt_sys_reg(ctxt, POR_EL1) = read_sysreg_el1(SYS_POR);
}
ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR);
ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0);
@@ -120,6 +140,10 @@ static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1);
+
+ // POR_EL0 can affect uaccess, so must be saved/restored early.
+ if (ctxt_has_s1poe(ctxt))
+ write_sysreg_s(ctxt_sys_reg(ctxt, POR_EL0), SYS_POR_EL0);
}
static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
@@ -158,6 +182,9 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
write_sysreg_el1(ctxt_sys_reg(ctxt, PIR_EL1), SYS_PIR);
write_sysreg_el1(ctxt_sys_reg(ctxt, PIRE0_EL1), SYS_PIRE0);
}
+
+ if (ctxt_has_s1poe(ctxt))
+ write_sysreg_el1(ctxt_sys_reg(ctxt, POR_EL1), SYS_POR);
}
write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR);
write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0);
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index e715c157c2c4..e433dfab882a 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -426,9 +426,9 @@ out:
return;
}
-static __always_inline void do_ffa_mem_xfer(const u64 func_id,
- struct arm_smccc_res *res,
- struct kvm_cpu_context *ctxt)
+static void __do_ffa_mem_xfer(const u64 func_id,
+ struct arm_smccc_res *res,
+ struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, len, ctxt, 1);
DECLARE_REG(u32, fraglen, ctxt, 2);
@@ -440,9 +440,6 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id,
u32 offset, nr_ranges;
int ret = 0;
- BUILD_BUG_ON(func_id != FFA_FN64_MEM_SHARE &&
- func_id != FFA_FN64_MEM_LEND);
-
if (addr_mbz || npages_mbz || fraglen > len ||
fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) {
ret = FFA_RET_INVALID_PARAMETERS;
@@ -461,6 +458,11 @@ static __always_inline void do_ffa_mem_xfer(const u64 func_id,
goto out_unlock;
}
+ if (len > ffa_desc_buf.len) {
+ ret = FFA_RET_NO_MEMORY;
+ goto out_unlock;
+ }
+
buf = hyp_buffers.tx;
memcpy(buf, host_buffers.tx, fraglen);
@@ -512,6 +514,13 @@ err_unshare:
goto out_unlock;
}
+#define do_ffa_mem_xfer(fid, res, ctxt) \
+ do { \
+ BUILD_BUG_ON((fid) != FFA_FN64_MEM_SHARE && \
+ (fid) != FFA_FN64_MEM_LEND); \
+ __do_ffa_mem_xfer((fid), (res), (ctxt)); \
+ } while (0);
+
static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
struct kvm_cpu_context *ctxt)
{
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
index 07120b37da35..401af1835be6 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S
@@ -130,7 +130,7 @@ alternative_else_nop_endif
/* Invalidate the stale TLBs from Bootloader */
tlbi alle2
- tlbi vmalls12e1
+ tlbi alle1
dsb sy
mov_q x0, INIT_SCTLR_EL2_MMU_ON
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index f43d845f3c4e..87692b566d90 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -62,6 +62,8 @@ static void fpsimd_sve_flush(void)
static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
{
+ bool has_fpmr;
+
if (!guest_owns_fp_regs())
return;
@@ -73,11 +75,18 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
else
__fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
+ has_fpmr = kvm_has_fpmr(kern_hyp_va(vcpu->kvm));
+ if (has_fpmr)
+ __vcpu_sys_reg(vcpu, FPMR) = read_sysreg_s(SYS_FPMR);
+
if (system_supports_sve())
__hyp_sve_restore_host();
else
__fpsimd_restore_state(*host_data_ptr(fpsimd_state));
+ if (has_fpmr)
+ write_sysreg_s(*host_data_ptr(fpmr), SYS_FPMR);
+
*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 8f5c56d5b1cd..cc69106734ca 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -197,6 +197,15 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
} else {
__fpsimd_save_state(*host_data_ptr(fpsimd_state));
}
+
+ if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm))) {
+ u64 val = read_sysreg_s(SYS_FPMR);
+
+ if (unlikely(is_protected_kvm_enabled()))
+ *host_data_ptr(fpmr) = val;
+ else
+ **host_data_ptr(fpmr_ptr) = val;
+ }
}
static const exit_handler_fn hyp_exit_handlers[] = {
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index ca3c09df8d7c..48da9ca9763f 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -132,10 +132,10 @@ static void exit_vmid_context(struct tlb_inv_context *cxt)
else
__load_host_stage2();
- if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- /* Ensure write of the old VMID */
- isb();
+ /* Ensure write of the old VMID */
+ isb();
+ if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
if (!(cxt->sctlr & SCTLR_ELx_M)) {
write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
isb();
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 9e2bbee77491..b11bcebac908 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -17,48 +17,6 @@
#define KVM_PTE_TYPE_PAGE 1
#define KVM_PTE_TYPE_TABLE 1
-#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
-
-#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
-#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6)
-#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \
- ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; })
-#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \
- ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; })
-#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8)
-#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
-#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)
-
-#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
-#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6)
-#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7)
-#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8)
-#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
-#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)
-
-#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50)
-
-#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55)
-
-#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
-
-#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
-
-#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
-
-#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
- KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
- KVM_PTE_LEAF_ATTR_HI_S2_XN)
-
-#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2)
-#define KVM_MAX_OWNER_ID 1
-
-/*
- * Used to indicate a pte for which a 'break-before-make' sequence is in
- * progress.
- */
-#define KVM_INVALID_PTE_LOCKED BIT(10)
-
struct kvm_pgtable_walk_data {
struct kvm_pgtable_walker *walker;
@@ -1547,7 +1505,6 @@ static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
*/
new = kvm_init_table_pte(childp, mm_ops);
stage2_make_pte(ctx, new);
- dsb(ishst);
return 0;
}
@@ -1559,8 +1516,11 @@ int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
.flags = KVM_PGTABLE_WALK_LEAF,
.arg = mc,
};
+ int ret;
- return kvm_pgtable_walk(pgt, addr, size, &walker);
+ ret = kvm_pgtable_walk(pgt, addr, size, &walker);
+ dsb(ishst);
+ return ret;
}
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 7b397fad26f2..18d4677002b1 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -268,8 +268,16 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
* starting to mess with the rest of the GIC, and VMCR_EL2 in
* particular. This logic must be called before
* __vgic_v3_restore_state().
+ *
+ * However, if the vgic is disabled (ICH_HCR_EL2.EN==0), no GIC is
+ * provisioned at all. In order to prevent illegal accesses to the
+ * system registers to trap to EL1 (duh), force ICC_SRE_EL1.SRE to 1
+ * so that the trap bits can take effect. Yes, we *loves* the GIC.
*/
- if (!cpu_if->vgic_sre) {
+ if (!(cpu_if->vgic_hcr & ICH_HCR_EN)) {
+ write_gicreg(ICC_SRE_EL1_SRE, ICC_SRE_EL1);
+ isb();
+ } else if (!cpu_if->vgic_sre) {
write_gicreg(0, ICC_SRE_EL1);
isb();
write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
@@ -288,8 +296,9 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
}
/*
- * Prevent the guest from touching the GIC system registers if
- * SRE isn't enabled for GICv3 emulation.
+ * Prevent the guest from touching the ICC_SRE_EL1 system
+ * register. Note that this may not have any effect, as
+ * ICC_SRE_EL2.Enable being RAO/WI is a valid implementation.
*/
write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
ICC_SRE_EL2);
@@ -297,10 +306,11 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
/*
* If we need to trap system registers, we must write
* ICH_HCR_EL2 anyway, even if no interrupts are being
- * injected,
+ * injected. Note that this also applies if we don't expect
+ * any system register access (no vgic at all).
*/
if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
- cpu_if->its_vpe.its_vm)
+ cpu_if->its_vpe.its_vm || !cpu_if->vgic_sre)
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
}
@@ -326,7 +336,7 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
* no interrupts were being injected, and we disable it again here.
*/
if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
- cpu_if->its_vpe.its_vm)
+ cpu_if->its_vpe.its_vm || !cpu_if->vgic_sre)
write_gicreg(0, ICH_HCR_EL2);
}
@@ -1032,6 +1042,75 @@ static void __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
write_gicreg(vmcr, ICH_VMCR_EL2);
}
+static bool __vgic_v3_check_trap_forwarding(struct kvm_vcpu *vcpu,
+ u32 sysreg, bool is_read)
+{
+ u64 ich_hcr;
+
+ if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+ return false;
+
+ ich_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
+
+ switch (sysreg) {
+ case SYS_ICC_IGRPEN0_EL1:
+ if (is_read &&
+ (__vcpu_sys_reg(vcpu, HFGRTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
+ return true;
+
+ if (!is_read &&
+ (__vcpu_sys_reg(vcpu, HFGWTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
+ return true;
+
+ fallthrough;
+
+ case SYS_ICC_AP0Rn_EL1(0):
+ case SYS_ICC_AP0Rn_EL1(1):
+ case SYS_ICC_AP0Rn_EL1(2):
+ case SYS_ICC_AP0Rn_EL1(3):
+ case SYS_ICC_BPR0_EL1:
+ case SYS_ICC_EOIR0_EL1:
+ case SYS_ICC_HPPIR0_EL1:
+ case SYS_ICC_IAR0_EL1:
+ return ich_hcr & ICH_HCR_TALL0;
+
+ case SYS_ICC_IGRPEN1_EL1:
+ if (is_read &&
+ (__vcpu_sys_reg(vcpu, HFGRTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
+ return true;
+
+ if (!is_read &&
+ (__vcpu_sys_reg(vcpu, HFGWTR_EL2) & HFGxTR_EL2_ICC_IGRPENn_EL1))
+ return true;
+
+ fallthrough;
+
+ case SYS_ICC_AP1Rn_EL1(0):
+ case SYS_ICC_AP1Rn_EL1(1):
+ case SYS_ICC_AP1Rn_EL1(2):
+ case SYS_ICC_AP1Rn_EL1(3):
+ case SYS_ICC_BPR1_EL1:
+ case SYS_ICC_EOIR1_EL1:
+ case SYS_ICC_HPPIR1_EL1:
+ case SYS_ICC_IAR1_EL1:
+ return ich_hcr & ICH_HCR_TALL1;
+
+ case SYS_ICC_DIR_EL1:
+ if (ich_hcr & ICH_HCR_TDIR)
+ return true;
+
+ fallthrough;
+
+ case SYS_ICC_RPR_EL1:
+ case SYS_ICC_CTLR_EL1:
+ case SYS_ICC_PMR_EL1:
+ return ich_hcr & ICH_HCR_TC;
+
+ default:
+ return false;
+ }
+}
+
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
{
int rt;
@@ -1041,6 +1120,9 @@ int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
bool is_read;
u32 sysreg;
+ if (kern_hyp_va(vcpu->kvm)->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3)
+ return 0;
+
esr = kvm_vcpu_get_esr(vcpu);
if (vcpu_mode_is_32bit(vcpu)) {
if (!kvm_condition_valid(vcpu)) {
@@ -1055,6 +1137,9 @@ int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ;
+ if (__vgic_v3_check_trap_forwarding(vcpu, sysreg, is_read))
+ return 0;
+
switch (sysreg) {
case SYS_ICC_IAR0_EL1:
case SYS_ICC_IAR1_EL1:
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 77010b76c150..80581b1c3995 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -312,6 +312,9 @@ static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
{
__fpsimd_save_state(*host_data_ptr(fpsimd_state));
+
+ if (kvm_has_fpmr(vcpu->kvm))
+ **host_data_ptr(fpmr_ptr) = read_sysreg_s(SYS_FPMR);
}
static bool kvm_hyp_handle_tlbi_el2(struct kvm_vcpu *vcpu, u64 *exit_code)
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index bab27f9d8cc6..638b6205526f 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -103,20 +103,6 @@ struct s2_walk_info {
bool be;
};
-static unsigned int ps_to_output_size(unsigned int ps)
-{
- switch (ps) {
- case 0: return 32;
- case 1: return 36;
- case 2: return 40;
- case 3: return 42;
- case 4: return 44;
- case 5:
- default:
- return 48;
- }
-}
-
static u32 compute_fsc(int level, u32 fsc)
{
return fsc | (level & 0x3);
@@ -256,7 +242,7 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
/* Check for valid descriptor at this point */
if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
- out->upper_attr = desc;
+ out->desc = desc;
return 1;
}
@@ -266,7 +252,7 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
if (check_output_size(wi, desc)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
- out->upper_attr = desc;
+ out->desc = desc;
return 1;
}
@@ -278,27 +264,24 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
if (level < first_block_level) {
out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
- out->upper_attr = desc;
+ out->desc = desc;
return 1;
}
- /*
- * We don't use the contiguous bit in the stage-2 ptes, so skip check
- * for misprogramming of the contiguous bit.
- */
-
if (check_output_size(wi, desc)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ);
- out->upper_attr = desc;
+ out->desc = desc;
return 1;
}
if (!(desc & BIT(10))) {
out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS);
- out->upper_attr = desc;
+ out->desc = desc;
return 1;
}
+ addr_bottom += contiguous_bit_shift(desc, wi, level);
+
/* Calculate and return the result */
paddr = (desc & GENMASK_ULL(47, addr_bottom)) |
(ipa & GENMASK_ULL(addr_bottom - 1, 0));
@@ -307,7 +290,7 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
out->readable = desc & (0b01 << 6);
out->writable = desc & (0b10 << 6);
out->level = level;
- out->upper_attr = desc & GENMASK_ULL(63, 52);
+ out->desc = desc;
return 0;
}
@@ -954,19 +937,16 @@ static void set_sysreg_masks(struct kvm *kvm, int sr, u64 res0, u64 res1)
int kvm_init_nv_sysregs(struct kvm *kvm)
{
u64 res0, res1;
- int ret = 0;
- mutex_lock(&kvm->arch.config_lock);
+ lockdep_assert_held(&kvm->arch.config_lock);
if (kvm->arch.sysreg_masks)
- goto out;
+ return 0;
kvm->arch.sysreg_masks = kzalloc(sizeof(*(kvm->arch.sysreg_masks)),
GFP_KERNEL_ACCOUNT);
- if (!kvm->arch.sysreg_masks) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!kvm->arch.sysreg_masks)
+ return -ENOMEM;
limit_nv_id_regs(kvm);
@@ -1195,8 +1175,13 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, V1P1))
res0 |= ~(res0 | res1);
set_sysreg_masks(kvm, HAFGRTR_EL2, res0, res1);
-out:
- mutex_unlock(&kvm->arch.config_lock);
- return ret;
+ /* SCTLR_EL1 */
+ res0 = SCTLR_EL1_RES0;
+ res1 = SCTLR_EL1_RES1;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
+ res0 |= SCTLR_EL1_EPAN;
+ set_sysreg_masks(kvm, SCTLR_EL1, res0, res1);
+
+ return 0;
}
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 82a2a003259c..ac36c438b8c1 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -233,7 +233,7 @@ void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
int i;
struct kvm_pmu *pmu = &vcpu->arch.pmu;
- for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
+ for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++)
pmu->pmc[i].idx = i;
}
@@ -260,7 +260,7 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
{
int i;
- for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
+ for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++)
kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, i));
irq_work_sync(&vcpu->arch.pmu.overflow_work);
}
@@ -291,7 +291,7 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
if (!(kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E) || !val)
return;
- for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
+ for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++) {
struct kvm_pmc *pmc;
if (!(val & BIT(i)))
@@ -323,7 +323,7 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
if (!kvm_vcpu_has_pmu(vcpu) || !val)
return;
- for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
+ for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++) {
struct kvm_pmc *pmc;
if (!(val & BIT(i)))
@@ -910,10 +910,10 @@ u8 kvm_arm_pmu_get_max_counters(struct kvm *kvm)
struct arm_pmu *arm_pmu = kvm->arch.arm_pmu;
/*
- * The arm_pmu->num_events considers the cycle counter as well.
- * Ignore that and return only the general-purpose counters.
+ * The arm_pmu->cntr_mask considers the fixed counter(s) as well.
+ * Ignore those and return only the general-purpose counters.
*/
- return arm_pmu->num_events - 1;
+ return bitmap_weight(arm_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS);
}
static void kvm_arm_set_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu)
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
index 329819806096..0b3adf3e17b4 100644
--- a/arch/arm64/kvm/pmu.c
+++ b/arch/arm64/kvm/pmu.c
@@ -5,6 +5,8 @@
*/
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/perf/arm_pmuv3.h>
static DEFINE_PER_CPU(struct kvm_pmu_events, kvm_pmu_events);
@@ -35,7 +37,7 @@ struct kvm_pmu_events *kvm_get_pmu_events(void)
* Add events to track that we may want to switch at guest entry/exit
* time.
*/
-void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr)
+void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr)
{
struct kvm_pmu_events *pmu = kvm_get_pmu_events();
@@ -51,7 +53,7 @@ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr)
/*
* Stop tracking events
*/
-void kvm_clr_pmu_events(u32 clr)
+void kvm_clr_pmu_events(u64 clr)
{
struct kvm_pmu_events *pmu = kvm_get_pmu_events();
@@ -62,79 +64,32 @@ void kvm_clr_pmu_events(u32 clr)
pmu->events_guest &= ~clr;
}
-#define PMEVTYPER_READ_CASE(idx) \
- case idx: \
- return read_sysreg(pmevtyper##idx##_el0)
-
-#define PMEVTYPER_WRITE_CASE(idx) \
- case idx: \
- write_sysreg(val, pmevtyper##idx##_el0); \
- break
-
-#define PMEVTYPER_CASES(readwrite) \
- PMEVTYPER_##readwrite##_CASE(0); \
- PMEVTYPER_##readwrite##_CASE(1); \
- PMEVTYPER_##readwrite##_CASE(2); \
- PMEVTYPER_##readwrite##_CASE(3); \
- PMEVTYPER_##readwrite##_CASE(4); \
- PMEVTYPER_##readwrite##_CASE(5); \
- PMEVTYPER_##readwrite##_CASE(6); \
- PMEVTYPER_##readwrite##_CASE(7); \
- PMEVTYPER_##readwrite##_CASE(8); \
- PMEVTYPER_##readwrite##_CASE(9); \
- PMEVTYPER_##readwrite##_CASE(10); \
- PMEVTYPER_##readwrite##_CASE(11); \
- PMEVTYPER_##readwrite##_CASE(12); \
- PMEVTYPER_##readwrite##_CASE(13); \
- PMEVTYPER_##readwrite##_CASE(14); \
- PMEVTYPER_##readwrite##_CASE(15); \
- PMEVTYPER_##readwrite##_CASE(16); \
- PMEVTYPER_##readwrite##_CASE(17); \
- PMEVTYPER_##readwrite##_CASE(18); \
- PMEVTYPER_##readwrite##_CASE(19); \
- PMEVTYPER_##readwrite##_CASE(20); \
- PMEVTYPER_##readwrite##_CASE(21); \
- PMEVTYPER_##readwrite##_CASE(22); \
- PMEVTYPER_##readwrite##_CASE(23); \
- PMEVTYPER_##readwrite##_CASE(24); \
- PMEVTYPER_##readwrite##_CASE(25); \
- PMEVTYPER_##readwrite##_CASE(26); \
- PMEVTYPER_##readwrite##_CASE(27); \
- PMEVTYPER_##readwrite##_CASE(28); \
- PMEVTYPER_##readwrite##_CASE(29); \
- PMEVTYPER_##readwrite##_CASE(30)
-
/*
* Read a value direct from PMEVTYPER<idx> where idx is 0-30
- * or PMCCFILTR_EL0 where idx is ARMV8_PMU_CYCLE_IDX (31).
+ * or PMxCFILTR_EL0 where idx is 31-32.
*/
static u64 kvm_vcpu_pmu_read_evtype_direct(int idx)
{
- switch (idx) {
- PMEVTYPER_CASES(READ);
- case ARMV8_PMU_CYCLE_IDX:
- return read_sysreg(pmccfiltr_el0);
- default:
- WARN_ON(1);
- }
+ if (idx == ARMV8_PMU_CYCLE_IDX)
+ return read_pmccfiltr();
+ else if (idx == ARMV8_PMU_INSTR_IDX)
+ return read_pmicfiltr();
- return 0;
+ return read_pmevtypern(idx);
}
/*
* Write a value direct to PMEVTYPER<idx> where idx is 0-30
- * or PMCCFILTR_EL0 where idx is ARMV8_PMU_CYCLE_IDX (31).
+ * or PMxCFILTR_EL0 where idx is 31-32.
*/
static void kvm_vcpu_pmu_write_evtype_direct(int idx, u32 val)
{
- switch (idx) {
- PMEVTYPER_CASES(WRITE);
- case ARMV8_PMU_CYCLE_IDX:
- write_sysreg(val, pmccfiltr_el0);
- break;
- default:
- WARN_ON(1);
- }
+ if (idx == ARMV8_PMU_CYCLE_IDX)
+ write_pmccfiltr(val);
+ else if (idx == ARMV8_PMU_INSTR_IDX)
+ write_pmicfiltr(val);
+ else
+ write_pmevtypern(idx, val);
}
/*
@@ -145,7 +100,7 @@ static void kvm_vcpu_pmu_enable_el0(unsigned long events)
u64 typer;
u32 counter;
- for_each_set_bit(counter, &events, 32) {
+ for_each_set_bit(counter, &events, ARMPMU_MAX_HWEVENTS) {
typer = kvm_vcpu_pmu_read_evtype_direct(counter);
typer &= ~ARMV8_PMU_EXCLUDE_EL0;
kvm_vcpu_pmu_write_evtype_direct(counter, typer);
@@ -160,7 +115,7 @@ static void kvm_vcpu_pmu_disable_el0(unsigned long events)
u64 typer;
u32 counter;
- for_each_set_bit(counter, &events, 32) {
+ for_each_set_bit(counter, &events, ARMPMU_MAX_HWEVENTS) {
typer = kvm_vcpu_pmu_read_evtype_direct(counter);
typer |= ARMV8_PMU_EXCLUDE_EL0;
kvm_vcpu_pmu_write_evtype_direct(counter, typer);
@@ -176,7 +131,7 @@ static void kvm_vcpu_pmu_disable_el0(unsigned long events)
void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
{
struct kvm_pmu_events *pmu;
- u32 events_guest, events_host;
+ u64 events_guest, events_host;
if (!kvm_arm_support_pmu_v3() || !has_vhe())
return;
@@ -197,7 +152,7 @@ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu)
{
struct kvm_pmu_events *pmu;
- u32 events_guest, events_host;
+ u64 events_guest, events_host;
if (!kvm_arm_support_pmu_v3() || !has_vhe())
return;
diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c
new file mode 100644
index 000000000000..e4a342e903e2
--- /dev/null
+++ b/arch/arm64/kvm/ptdump.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Debug helper used to dump the stage-2 pagetables of the system and their
+ * associated permissions.
+ *
+ * Copyright (C) Google, 2024
+ * Author: Sebastian Ene <sebastianene@google.com>
+ */
+#include <linux/debugfs.h>
+#include <linux/kvm_host.h>
+#include <linux/seq_file.h>
+
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_pgtable.h>
+#include <asm/ptdump.h>
+
+#define MARKERS_LEN 2
+#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1)
+
+struct kvm_ptdump_guest_state {
+ struct kvm *kvm;
+ struct ptdump_pg_state parser_state;
+ struct addr_marker ipa_marker[MARKERS_LEN];
+ struct ptdump_pg_level level[KVM_PGTABLE_MAX_LEVELS];
+ struct ptdump_range range[MARKERS_LEN];
+};
+
+static const struct ptdump_prot_bits stage2_pte_bits[] = {
+ {
+ .mask = PTE_VALID,
+ .val = PTE_VALID,
+ .set = " ",
+ .clear = "F",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
+ .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
+ .set = "R",
+ .clear = " ",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
+ .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
+ .set = "W",
+ .clear = " ",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
+ .val = PTE_VALID,
+ .set = " ",
+ .clear = "X",
+ }, {
+ .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
+ .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
+ .set = "AF",
+ .clear = " ",
+ }, {
+ .mask = PTE_TABLE_BIT | PTE_VALID,
+ .val = PTE_VALID,
+ .set = "BLK",
+ .clear = " ",
+ },
+};
+
+static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx,
+ enum kvm_pgtable_walk_flags visit)
+{
+ struct ptdump_pg_state *st = ctx->arg;
+ struct ptdump_state *pt_st = &st->ptdump;
+
+ note_page(pt_st, ctx->addr, ctx->level, ctx->old);
+
+ return 0;
+}
+
+static int kvm_ptdump_build_levels(struct ptdump_pg_level *level, u32 start_lvl)
+{
+ u32 i;
+ u64 mask;
+
+ if (WARN_ON_ONCE(start_lvl >= KVM_PGTABLE_LAST_LEVEL))
+ return -EINVAL;
+
+ mask = 0;
+ for (i = 0; i < ARRAY_SIZE(stage2_pte_bits); i++)
+ mask |= stage2_pte_bits[i].mask;
+
+ for (i = start_lvl; i < KVM_PGTABLE_MAX_LEVELS; i++) {
+ snprintf(level[i].name, sizeof(level[i].name), "%u", i);
+
+ level[i].num = ARRAY_SIZE(stage2_pte_bits);
+ level[i].bits = stage2_pte_bits;
+ level[i].mask = mask;
+ }
+
+ return 0;
+}
+
+static struct kvm_ptdump_guest_state *kvm_ptdump_parser_create(struct kvm *kvm)
+{
+ struct kvm_ptdump_guest_state *st;
+ struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
+ struct kvm_pgtable *pgtable = mmu->pgt;
+ int ret;
+
+ st = kzalloc(sizeof(struct kvm_ptdump_guest_state), GFP_KERNEL_ACCOUNT);
+ if (!st)
+ return ERR_PTR(-ENOMEM);
+
+ ret = kvm_ptdump_build_levels(&st->level[0], pgtable->start_level);
+ if (ret) {
+ kfree(st);
+ return ERR_PTR(ret);
+ }
+
+ st->ipa_marker[0].name = "Guest IPA";
+ st->ipa_marker[1].start_address = BIT(pgtable->ia_bits);
+ st->range[0].end = BIT(pgtable->ia_bits);
+
+ st->kvm = kvm;
+ st->parser_state = (struct ptdump_pg_state) {
+ .marker = &st->ipa_marker[0],
+ .level = -1,
+ .pg_level = &st->level[0],
+ .ptdump.range = &st->range[0],
+ .start_address = 0,
+ };
+
+ return st;
+}
+
+static int kvm_ptdump_guest_show(struct seq_file *m, void *unused)
+{
+ int ret;
+ struct kvm_ptdump_guest_state *st = m->private;
+ struct kvm *kvm = st->kvm;
+ struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
+ struct ptdump_pg_state *parser_state = &st->parser_state;
+ struct kvm_pgtable_walker walker = (struct kvm_pgtable_walker) {
+ .cb = kvm_ptdump_visitor,
+ .arg = parser_state,
+ .flags = KVM_PGTABLE_WALK_LEAF,
+ };
+
+ parser_state->seq = m;
+
+ write_lock(&kvm->mmu_lock);
+ ret = kvm_pgtable_walk(mmu->pgt, 0, BIT(mmu->pgt->ia_bits), &walker);
+ write_unlock(&kvm->mmu_lock);
+
+ return ret;
+}
+
+static int kvm_ptdump_guest_open(struct inode *m, struct file *file)
+{
+ struct kvm *kvm = m->i_private;
+ struct kvm_ptdump_guest_state *st;
+ int ret;
+
+ if (!kvm_get_kvm_safe(kvm))
+ return -ENOENT;
+
+ st = kvm_ptdump_parser_create(kvm);
+ if (IS_ERR(st)) {
+ ret = PTR_ERR(st);
+ goto err_with_kvm_ref;
+ }
+
+ ret = single_open(file, kvm_ptdump_guest_show, st);
+ if (!ret)
+ return 0;
+
+ kfree(st);
+err_with_kvm_ref:
+ kvm_put_kvm(kvm);
+ return ret;
+}
+
+static int kvm_ptdump_guest_close(struct inode *m, struct file *file)
+{
+ struct kvm *kvm = m->i_private;
+ void *st = ((struct seq_file *)file->private_data)->private;
+
+ kfree(st);
+ kvm_put_kvm(kvm);
+
+ return single_release(m, file);
+}
+
+static const struct file_operations kvm_ptdump_guest_fops = {
+ .open = kvm_ptdump_guest_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = kvm_ptdump_guest_close,
+};
+
+static int kvm_pgtable_range_show(struct seq_file *m, void *unused)
+{
+ struct kvm_pgtable *pgtable = m->private;
+
+ seq_printf(m, "%2u\n", pgtable->ia_bits);
+ return 0;
+}
+
+static int kvm_pgtable_levels_show(struct seq_file *m, void *unused)
+{
+ struct kvm_pgtable *pgtable = m->private;
+
+ seq_printf(m, "%1d\n", KVM_PGTABLE_MAX_LEVELS - pgtable->start_level);
+ return 0;
+}
+
+static int kvm_pgtable_debugfs_open(struct inode *m, struct file *file,
+ int (*show)(struct seq_file *, void *))
+{
+ struct kvm *kvm = m->i_private;
+ struct kvm_pgtable *pgtable;
+ int ret;
+
+ if (!kvm_get_kvm_safe(kvm))
+ return -ENOENT;
+
+ pgtable = kvm->arch.mmu.pgt;
+
+ ret = single_open(file, show, pgtable);
+ if (ret < 0)
+ kvm_put_kvm(kvm);
+ return ret;
+}
+
+static int kvm_pgtable_range_open(struct inode *m, struct file *file)
+{
+ return kvm_pgtable_debugfs_open(m, file, kvm_pgtable_range_show);
+}
+
+static int kvm_pgtable_levels_open(struct inode *m, struct file *file)
+{
+ return kvm_pgtable_debugfs_open(m, file, kvm_pgtable_levels_show);
+}
+
+static int kvm_pgtable_debugfs_close(struct inode *m, struct file *file)
+{
+ struct kvm *kvm = m->i_private;
+
+ kvm_put_kvm(kvm);
+ return single_release(m, file);
+}
+
+static const struct file_operations kvm_pgtable_range_fops = {
+ .open = kvm_pgtable_range_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = kvm_pgtable_debugfs_close,
+};
+
+static const struct file_operations kvm_pgtable_levels_fops = {
+ .open = kvm_pgtable_levels_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = kvm_pgtable_debugfs_close,
+};
+
+void kvm_s2_ptdump_create_debugfs(struct kvm *kvm)
+{
+ debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry,
+ kvm, &kvm_ptdump_guest_fops);
+ debugfs_create_file("ipa_range", 0400, kvm->debugfs_dentry, kvm,
+ &kvm_pgtable_range_fops);
+ debugfs_create_file("stage2_levels", 0400, kvm->debugfs_dentry,
+ kvm, &kvm_pgtable_levels_fops);
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 31e49da867ff..dad88e31f953 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -18,6 +18,7 @@
#include <linux/printk.h>
#include <linux/uaccess.h>
+#include <asm/arm_pmuv3.h>
#include <asm/cacheflush.h>
#include <asm/cputype.h>
#include <asm/debug-monitors.h>
@@ -47,6 +48,13 @@ static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
u64 val);
+static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ kvm_inject_undefined(vcpu);
+ return false;
+}
+
static bool bad_trap(struct kvm_vcpu *vcpu,
struct sys_reg_params *params,
const struct sys_reg_desc *r,
@@ -54,8 +62,7 @@ static bool bad_trap(struct kvm_vcpu *vcpu,
{
WARN_ONCE(1, "Unexpected %s\n", msg);
print_sys_reg_instr(params);
- kvm_inject_undefined(vcpu);
- return false;
+ return undef_access(vcpu, params, r);
}
static bool read_from_write_only(struct kvm_vcpu *vcpu,
@@ -346,10 +353,8 @@ static bool access_dcgsw(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- if (!kvm_has_mte(vcpu->kvm)) {
- kvm_inject_undefined(vcpu);
- return false;
- }
+ if (!kvm_has_mte(vcpu->kvm))
+ return undef_access(vcpu, p, r);
/* Treat MTE S/W ops as we treat the classic ones: with contempt */
return access_dcsw(vcpu, p, r);
@@ -386,10 +391,8 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
u64 val, mask, shift;
if (reg_to_encoding(r) == SYS_TCR2_EL1 &&
- !kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, TCRX, IMP)) {
- kvm_inject_undefined(vcpu);
- return false;
- }
+ !kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, TCRX, IMP))
+ return undef_access(vcpu, p, r);
BUG_ON(!p->is_write);
@@ -436,10 +439,8 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
{
bool g1;
- if (!kvm_has_gicv3(vcpu->kvm)) {
- kvm_inject_undefined(vcpu);
- return false;
- }
+ if (!kvm_has_gicv3(vcpu->kvm))
+ return undef_access(vcpu, p, r);
if (!p->is_write)
return read_from_write_only(vcpu, p, r);
@@ -484,6 +485,9 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
+ if (!kvm_has_gicv3(vcpu->kvm))
+ return undef_access(vcpu, p, r);
+
if (p->is_write)
return ignore_write(vcpu, p);
@@ -501,14 +505,6 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
return read_zero(vcpu, p);
}
-static bool trap_undef(struct kvm_vcpu *vcpu,
- struct sys_reg_params *p,
- const struct sys_reg_desc *r)
-{
- kvm_inject_undefined(vcpu);
- return false;
-}
-
/*
* ARMv8.1 mandates at least a trivial LORegion implementation, where all the
* RW registers are RES0 (which we can implement as RAZ/WI). On an ARMv8.0
@@ -521,10 +517,8 @@ static bool trap_loregion(struct kvm_vcpu *vcpu,
{
u32 sr = reg_to_encoding(r);
- if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, LO, IMP)) {
- kvm_inject_undefined(vcpu);
- return false;
- }
+ if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, LO, IMP))
+ return undef_access(vcpu, p, r);
if (p->is_write && sr == SYS_LORID_EL1)
return write_to_read_only(vcpu, p, r);
@@ -893,7 +887,7 @@ static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
static u64 reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
reset_unknown(vcpu, r);
- __vcpu_sys_reg(vcpu, r->reg) &= ARMV8_PMU_COUNTER_MASK;
+ __vcpu_sys_reg(vcpu, r->reg) &= PMSELR_EL0_SEL_MASK;
return __vcpu_sys_reg(vcpu, r->reg);
}
@@ -985,7 +979,7 @@ static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
else
/* return PMSELR.SEL field */
p->regval = __vcpu_sys_reg(vcpu, PMSELR_EL0)
- & ARMV8_PMU_COUNTER_MASK;
+ & PMSELR_EL0_SEL_MASK;
return true;
}
@@ -1053,8 +1047,8 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
if (pmu_access_event_counter_el0_disabled(vcpu))
return false;
- idx = __vcpu_sys_reg(vcpu, PMSELR_EL0)
- & ARMV8_PMU_COUNTER_MASK;
+ idx = SYS_FIELD_GET(PMSELR_EL0, SEL,
+ __vcpu_sys_reg(vcpu, PMSELR_EL0));
} else if (r->Op2 == 0) {
/* PMCCNTR_EL0 */
if (pmu_access_cycle_counter_el0_disabled(vcpu))
@@ -1104,7 +1098,7 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 1) {
/* PMXEVTYPER_EL0 */
- idx = __vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK;
+ idx = SYS_FIELD_GET(PMSELR_EL0, SEL, __vcpu_sys_reg(vcpu, PMSELR_EL0));
reg = PMEVTYPER0_EL0 + idx;
} else if (r->CRn == 14 && (r->CRm & 12) == 12) {
idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
@@ -1257,10 +1251,8 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (p->is_write) {
- if (!vcpu_mode_priv(vcpu)) {
- kvm_inject_undefined(vcpu);
- return false;
- }
+ if (!vcpu_mode_priv(vcpu))
+ return undef_access(vcpu, p, r);
__vcpu_sys_reg(vcpu, PMUSERENR_EL0) =
p->regval & ARMV8_PMU_USERENR_MASK;
@@ -1344,14 +1336,6 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
.reset = reset_pmevtyper, \
.access = access_pmu_evtyper, .reg = (PMEVTYPER0_EL0 + n), }
-static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
- const struct sys_reg_desc *r)
-{
- kvm_inject_undefined(vcpu);
-
- return false;
-}
-
/* Macro to expand the AMU counter and type registers*/
#define AMU_AMEVCNTR0_EL0(n) { SYS_DESC(SYS_AMEVCNTR0_EL0(n)), undef_access }
#define AMU_AMEVTYPER0_EL0(n) { SYS_DESC(SYS_AMEVTYPER0_EL0(n)), undef_access }
@@ -1410,8 +1394,7 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
break;
default:
print_sys_reg_msg(p, "%s", "Unhandled trapped timer register");
- kvm_inject_undefined(vcpu);
- return false;
+ return undef_access(vcpu, p, r);
}
if (p->is_write)
@@ -1545,6 +1528,10 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME);
break;
+ case SYS_ID_AA64PFR2_EL1:
+ /* We only expose FPMR */
+ val &= ID_AA64PFR2_EL1_FPMR;
+ break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) |
@@ -1562,6 +1549,9 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
case SYS_ID_AA64MMFR2_EL1:
val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK;
break;
+ case SYS_ID_AA64MMFR3_EL1:
+ val &= ID_AA64MMFR3_EL1_TCRX | ID_AA64MMFR3_EL1_S1POE;
+ break;
case SYS_ID_MMFR4_EL1:
val &= ~ARM64_FEATURE_MASK(ID_MMFR4_EL1_CCIDX);
break;
@@ -1675,6 +1665,24 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu,
return REG_HIDDEN;
}
+static unsigned int sme_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (kvm_has_feat(vcpu->kvm, ID_AA64PFR1_EL1, SME, IMP))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
+static unsigned int fp8_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (kvm_has_fpmr(vcpu->kvm))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
static u64 read_sanitised_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
@@ -2091,26 +2099,6 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
#define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v)
/*
- * EL{0,1}2 registers are the EL2 view on an EL0 or EL1 register when
- * HCR_EL2.E2H==1, and only in the sysreg table for convenience of
- * handling traps. Given that, they are always hidden from userspace.
- */
-static unsigned int hidden_user_visibility(const struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *rd)
-{
- return REG_HIDDEN_USER;
-}
-
-#define EL12_REG(name, acc, rst, v) { \
- SYS_DESC(SYS_##name##_EL12), \
- .access = acc, \
- .reset = rst, \
- .reg = name##_EL1, \
- .val = v, \
- .visibility = hidden_user_visibility, \
-}
-
-/*
* Since reset() callback and field val are not used for idregs, they will be
* used for specific purposes for idregs.
* The reset() would return KVM sanitised register value. The value would be the
@@ -2217,6 +2205,18 @@ static bool access_spsr(struct kvm_vcpu *vcpu,
return true;
}
+static bool access_cntkctl_el12(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ __vcpu_sys_reg(vcpu, CNTKCTL_EL1) = p->regval;
+ else
+ p->regval = __vcpu_sys_reg(vcpu, CNTKCTL_EL1);
+
+ return true;
+}
+
static u64 reset_hcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
{
u64 val = r->val;
@@ -2261,6 +2261,15 @@ static bool access_zcr_el2(struct kvm_vcpu *vcpu,
return true;
}
+static unsigned int s1poe_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S1POE, IMP))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
/*
* Architected system registers.
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@ -2307,7 +2316,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
// DBGDTR[TR]X_EL0 share the same encoding
{ SYS_DESC(SYS_DBGDTRTX_EL0), trap_raz_wi },
- { SYS_DESC(SYS_DBGVCR32_EL2), trap_undef, reset_val, DBGVCR32_EL2, 0 },
+ { SYS_DESC(SYS_DBGVCR32_EL2), undef_access, reset_val, DBGVCR32_EL2, 0 },
{ SYS_DESC(SYS_MPIDR_EL1), NULL, reset_mpidr, MPIDR_EL1 },
@@ -2365,16 +2374,15 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64PFR0_EL1_MPAM |
ID_AA64PFR0_EL1_SVE |
ID_AA64PFR0_EL1_RAS |
- ID_AA64PFR0_EL1_GIC |
ID_AA64PFR0_EL1_AdvSIMD |
ID_AA64PFR0_EL1_FP), },
ID_SANITISED(ID_AA64PFR1_EL1),
- ID_UNALLOCATED(4,2),
+ ID_WRITABLE(ID_AA64PFR2_EL1, ID_AA64PFR2_EL1_FPMR),
ID_UNALLOCATED(4,3),
ID_WRITABLE(ID_AA64ZFR0_EL1, ~ID_AA64ZFR0_EL1_RES0),
ID_HIDDEN(ID_AA64SMFR0_EL1),
ID_UNALLOCATED(4,6),
- ID_UNALLOCATED(4,7),
+ ID_WRITABLE(ID_AA64FPFR0_EL1, ~ID_AA64FPFR0_EL1_RES0),
/* CRm=5 */
{ SYS_DESC(SYS_ID_AA64DFR0_EL1),
@@ -2424,7 +2432,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64MMFR2_EL1_IDS |
ID_AA64MMFR2_EL1_NV |
ID_AA64MMFR2_EL1_CCIDX)),
- ID_SANITISED(ID_AA64MMFR3_EL1),
+ ID_WRITABLE(ID_AA64MMFR3_EL1, (ID_AA64MMFR3_EL1_TCRX |
+ ID_AA64MMFR3_EL1_S1POE)),
ID_SANITISED(ID_AA64MMFR4_EL1),
ID_UNALLOCATED(7,5),
ID_UNALLOCATED(7,6),
@@ -2455,6 +2464,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_SPSR_EL1), access_spsr},
{ SYS_DESC(SYS_ELR_EL1), access_elr},
+ { SYS_DESC(SYS_ICC_PMR_EL1), undef_access },
+
{ SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 },
{ SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 },
{ SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 },
@@ -2498,6 +2509,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
{ SYS_DESC(SYS_PIRE0_EL1), NULL, reset_unknown, PIRE0_EL1 },
{ SYS_DESC(SYS_PIR_EL1), NULL, reset_unknown, PIR_EL1 },
+ { SYS_DESC(SYS_POR_EL1), NULL, reset_unknown, POR_EL1,
+ .visibility = s1poe_visibility },
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
{ SYS_DESC(SYS_LORSA_EL1), trap_loregion },
@@ -2509,18 +2522,31 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_VBAR_EL1), access_rw, reset_val, VBAR_EL1, 0 },
{ SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 },
- { SYS_DESC(SYS_ICC_IAR0_EL1), write_to_read_only },
- { SYS_DESC(SYS_ICC_EOIR0_EL1), read_from_write_only },
- { SYS_DESC(SYS_ICC_HPPIR0_EL1), write_to_read_only },
- { SYS_DESC(SYS_ICC_DIR_EL1), read_from_write_only },
- { SYS_DESC(SYS_ICC_RPR_EL1), write_to_read_only },
+ { SYS_DESC(SYS_ICC_IAR0_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_EOIR0_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_HPPIR0_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_BPR0_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_AP0R0_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_AP0R1_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_AP0R2_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_AP0R3_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_AP1R0_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_DIR_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_RPR_EL1), undef_access },
{ SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi },
{ SYS_DESC(SYS_ICC_ASGI1R_EL1), access_gic_sgi },
{ SYS_DESC(SYS_ICC_SGI0R_EL1), access_gic_sgi },
- { SYS_DESC(SYS_ICC_IAR1_EL1), write_to_read_only },
- { SYS_DESC(SYS_ICC_EOIR1_EL1), read_from_write_only },
- { SYS_DESC(SYS_ICC_HPPIR1_EL1), write_to_read_only },
+ { SYS_DESC(SYS_ICC_IAR1_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_EOIR1_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_HPPIR1_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_BPR1_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_CTLR_EL1), undef_access },
{ SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre },
+ { SYS_DESC(SYS_ICC_IGRPEN0_EL1), undef_access },
+ { SYS_DESC(SYS_ICC_IGRPEN1_EL1), undef_access },
{ SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
{ SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 },
@@ -2541,7 +2567,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
CTR_EL0_IDC_MASK |
CTR_EL0_DminLine_MASK |
CTR_EL0_IminLine_MASK),
- { SYS_DESC(SYS_SVCR), undef_access },
+ { SYS_DESC(SYS_SVCR), undef_access, reset_val, SVCR, 0, .visibility = sme_visibility },
+ { SYS_DESC(SYS_FPMR), undef_access, reset_val, FPMR, 0, .visibility = fp8_visibility },
{ PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,
.reg = PMCR_EL0, .get_user = get_pmcr, .set_user = set_pmcr },
@@ -2584,6 +2611,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
.access = access_pmovs, .reg = PMOVSSET_EL0,
.get_user = get_pmreg, .set_user = set_pmreg },
+ { SYS_DESC(SYS_POR_EL0), NULL, reset_unknown, POR_EL0,
+ .visibility = s1poe_visibility },
{ SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 },
{ SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 },
{ SYS_DESC(SYS_TPIDR2_EL0), undef_access },
@@ -2764,7 +2793,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG_VNCR(VTTBR_EL2, reset_val, 0),
EL2_REG_VNCR(VTCR_EL2, reset_val, 0),
- { SYS_DESC(SYS_DACR32_EL2), trap_undef, reset_unknown, DACR32_EL2 },
+ { SYS_DESC(SYS_DACR32_EL2), undef_access, reset_unknown, DACR32_EL2 },
EL2_REG_VNCR(HDFGRTR_EL2, reset_val, 0),
EL2_REG_VNCR(HDFGWTR_EL2, reset_val, 0),
EL2_REG_VNCR(HAFGRTR_EL2, reset_val, 0),
@@ -2773,20 +2802,16 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_SP_EL1), access_sp_el1},
/* AArch32 SPSR_* are RES0 if trapped from a NV guest */
- { SYS_DESC(SYS_SPSR_irq), .access = trap_raz_wi,
- .visibility = hidden_user_visibility },
- { SYS_DESC(SYS_SPSR_abt), .access = trap_raz_wi,
- .visibility = hidden_user_visibility },
- { SYS_DESC(SYS_SPSR_und), .access = trap_raz_wi,
- .visibility = hidden_user_visibility },
- { SYS_DESC(SYS_SPSR_fiq), .access = trap_raz_wi,
- .visibility = hidden_user_visibility },
-
- { SYS_DESC(SYS_IFSR32_EL2), trap_undef, reset_unknown, IFSR32_EL2 },
+ { SYS_DESC(SYS_SPSR_irq), .access = trap_raz_wi },
+ { SYS_DESC(SYS_SPSR_abt), .access = trap_raz_wi },
+ { SYS_DESC(SYS_SPSR_und), .access = trap_raz_wi },
+ { SYS_DESC(SYS_SPSR_fiq), .access = trap_raz_wi },
+
+ { SYS_DESC(SYS_IFSR32_EL2), undef_access, reset_unknown, IFSR32_EL2 },
EL2_REG(AFSR0_EL2, access_rw, reset_val, 0),
EL2_REG(AFSR1_EL2, access_rw, reset_val, 0),
EL2_REG_REDIR(ESR_EL2, reset_val, 0),
- { SYS_DESC(SYS_FPEXC32_EL2), trap_undef, reset_val, FPEXC32_EL2, 0x700 },
+ { SYS_DESC(SYS_FPEXC32_EL2), undef_access, reset_val, FPEXC32_EL2, 0x700 },
EL2_REG_REDIR(FAR_EL2, reset_val, 0),
EL2_REG(HPFAR_EL2, access_rw, reset_val, 0),
@@ -2796,7 +2821,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(VBAR_EL2, access_rw, reset_val, 0),
EL2_REG(RVBAR_EL2, access_rw, reset_val, 0),
- { SYS_DESC(SYS_RMR_EL2), trap_undef },
+ { SYS_DESC(SYS_RMR_EL2), undef_access },
+
+ EL2_REG_VNCR(ICH_HCR_EL2, reset_val, 0),
EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0),
EL2_REG(TPIDR_EL2, access_rw, reset_val, 0),
@@ -2804,11 +2831,48 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG_VNCR(CNTVOFF_EL2, reset_val, 0),
EL2_REG(CNTHCTL_EL2, access_rw, reset_val, 0),
- EL12_REG(CNTKCTL, access_rw, reset_val, 0),
+ { SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 },
EL2_REG(SP_EL2, NULL, reset_unknown, 0),
};
+static bool handle_at_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+ __kvm_at_s1e01(vcpu, op, p->regval);
+
+ return true;
+}
+
+static bool handle_at_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+ /* There is no FGT associated with AT S1E2A :-( */
+ if (op == OP_AT_S1E2A &&
+ !kvm_has_feat(vcpu->kvm, ID_AA64ISAR2_EL1, ATS1A, IMP)) {
+ kvm_inject_undefined(vcpu);
+ return false;
+ }
+
+ __kvm_at_s1e2(vcpu, op, p->regval);
+
+ return true;
+}
+
+static bool handle_at_s12(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+
+ __kvm_at_s12(vcpu, op, p->regval);
+
+ return true;
+}
+
static bool kvm_supported_tlbi_s12_op(struct kvm_vcpu *vpcu, u32 instr)
{
struct kvm *kvm = vpcu->kvm;
@@ -2830,10 +2894,8 @@ static bool handle_alle1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
- if (!kvm_supported_tlbi_s12_op(vcpu, sys_encoding)) {
- kvm_inject_undefined(vcpu);
- return false;
- }
+ if (!kvm_supported_tlbi_s12_op(vcpu, sys_encoding))
+ return undef_access(vcpu, p, r);
write_lock(&vcpu->kvm->mmu_lock);
@@ -2902,10 +2964,8 @@ static bool handle_vmalls12e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
u64 limit, vttbr;
- if (!kvm_supported_tlbi_s12_op(vcpu, sys_encoding)) {
- kvm_inject_undefined(vcpu);
- return false;
- }
+ if (!kvm_supported_tlbi_s12_op(vcpu, sys_encoding))
+ return undef_access(vcpu, p, r);
vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
limit = BIT_ULL(kvm_get_pa_bits(vcpu->kvm));
@@ -2930,10 +2990,8 @@ static bool handle_ripas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
u64 base, range, tg, num, scale;
int shift;
- if (!kvm_supported_tlbi_ipas2_op(vcpu, sys_encoding)) {
- kvm_inject_undefined(vcpu);
- return false;
- }
+ if (!kvm_supported_tlbi_ipas2_op(vcpu, sys_encoding))
+ return undef_access(vcpu, p, r);
/*
* Because the shadow S2 structure doesn't necessarily reflect that
@@ -3001,10 +3059,8 @@ static bool handle_ipas2e1is(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
u64 vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
- if (!kvm_supported_tlbi_ipas2_op(vcpu, sys_encoding)) {
- kvm_inject_undefined(vcpu);
- return false;
- }
+ if (!kvm_supported_tlbi_ipas2_op(vcpu, sys_encoding))
+ return undef_access(vcpu, p, r);
kvm_s2_mmu_iterate_by_vmid(vcpu->kvm, get_vmid(vttbr),
&(union tlbi_info) {
@@ -3044,10 +3100,8 @@ static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
WARN_ON(!vcpu_is_el2(vcpu));
- if (!kvm_supported_tlbi_s1e1_op(vcpu, sys_encoding)) {
- kvm_inject_undefined(vcpu);
- return false;
- }
+ if (!kvm_supported_tlbi_s1e1_op(vcpu, sys_encoding))
+ return undef_access(vcpu, p, r);
kvm_s2_mmu_iterate_by_vmid(vcpu->kvm, get_vmid(vttbr),
&(union tlbi_info) {
@@ -3071,6 +3125,14 @@ static struct sys_reg_desc sys_insn_descs[] = {
{ SYS_DESC(SYS_DC_ISW), access_dcsw },
{ SYS_DESC(SYS_DC_IGSW), access_dcgsw },
{ SYS_DESC(SYS_DC_IGDSW), access_dcgsw },
+
+ SYS_INSN(AT_S1E1R, handle_at_s1e01),
+ SYS_INSN(AT_S1E1W, handle_at_s1e01),
+ SYS_INSN(AT_S1E0R, handle_at_s1e01),
+ SYS_INSN(AT_S1E0W, handle_at_s1e01),
+ SYS_INSN(AT_S1E1RP, handle_at_s1e01),
+ SYS_INSN(AT_S1E1WP, handle_at_s1e01),
+
{ SYS_DESC(SYS_DC_CSW), access_dcsw },
{ SYS_DESC(SYS_DC_CGSW), access_dcgsw },
{ SYS_DESC(SYS_DC_CGDSW), access_dcgsw },
@@ -3150,19 +3212,27 @@ static struct sys_reg_desc sys_insn_descs[] = {
SYS_INSN(TLBI_VALE1NXS, handle_tlbi_el1),
SYS_INSN(TLBI_VAALE1NXS, handle_tlbi_el1),
+ SYS_INSN(AT_S1E2R, handle_at_s1e2),
+ SYS_INSN(AT_S1E2W, handle_at_s1e2),
+ SYS_INSN(AT_S12E1R, handle_at_s12),
+ SYS_INSN(AT_S12E1W, handle_at_s12),
+ SYS_INSN(AT_S12E0R, handle_at_s12),
+ SYS_INSN(AT_S12E0W, handle_at_s12),
+ SYS_INSN(AT_S1E2A, handle_at_s1e2),
+
SYS_INSN(TLBI_IPAS2E1IS, handle_ipas2e1is),
SYS_INSN(TLBI_RIPAS2E1IS, handle_ripas2e1is),
SYS_INSN(TLBI_IPAS2LE1IS, handle_ipas2e1is),
SYS_INSN(TLBI_RIPAS2LE1IS, handle_ripas2e1is),
- SYS_INSN(TLBI_ALLE2OS, trap_undef),
- SYS_INSN(TLBI_VAE2OS, trap_undef),
+ SYS_INSN(TLBI_ALLE2OS, undef_access),
+ SYS_INSN(TLBI_VAE2OS, undef_access),
SYS_INSN(TLBI_ALLE1OS, handle_alle1is),
- SYS_INSN(TLBI_VALE2OS, trap_undef),
+ SYS_INSN(TLBI_VALE2OS, undef_access),
SYS_INSN(TLBI_VMALLS12E1OS, handle_vmalls12e1is),
- SYS_INSN(TLBI_RVAE2IS, trap_undef),
- SYS_INSN(TLBI_RVALE2IS, trap_undef),
+ SYS_INSN(TLBI_RVAE2IS, undef_access),
+ SYS_INSN(TLBI_RVALE2IS, undef_access),
SYS_INSN(TLBI_ALLE1IS, handle_alle1is),
SYS_INSN(TLBI_VMALLS12E1IS, handle_vmalls12e1is),
@@ -3174,10 +3244,10 @@ static struct sys_reg_desc sys_insn_descs[] = {
SYS_INSN(TLBI_IPAS2LE1, handle_ipas2e1is),
SYS_INSN(TLBI_RIPAS2LE1, handle_ripas2e1is),
SYS_INSN(TLBI_RIPAS2LE1OS, handle_ripas2e1is),
- SYS_INSN(TLBI_RVAE2OS, trap_undef),
- SYS_INSN(TLBI_RVALE2OS, trap_undef),
- SYS_INSN(TLBI_RVAE2, trap_undef),
- SYS_INSN(TLBI_RVALE2, trap_undef),
+ SYS_INSN(TLBI_RVAE2OS, undef_access),
+ SYS_INSN(TLBI_RVALE2OS, undef_access),
+ SYS_INSN(TLBI_RVAE2, undef_access),
+ SYS_INSN(TLBI_RVALE2, undef_access),
SYS_INSN(TLBI_ALLE1, handle_alle1is),
SYS_INSN(TLBI_VMALLS12E1, handle_vmalls12e1is),
@@ -3186,19 +3256,19 @@ static struct sys_reg_desc sys_insn_descs[] = {
SYS_INSN(TLBI_IPAS2LE1ISNXS, handle_ipas2e1is),
SYS_INSN(TLBI_RIPAS2LE1ISNXS, handle_ripas2e1is),
- SYS_INSN(TLBI_ALLE2OSNXS, trap_undef),
- SYS_INSN(TLBI_VAE2OSNXS, trap_undef),
+ SYS_INSN(TLBI_ALLE2OSNXS, undef_access),
+ SYS_INSN(TLBI_VAE2OSNXS, undef_access),
SYS_INSN(TLBI_ALLE1OSNXS, handle_alle1is),
- SYS_INSN(TLBI_VALE2OSNXS, trap_undef),
+ SYS_INSN(TLBI_VALE2OSNXS, undef_access),
SYS_INSN(TLBI_VMALLS12E1OSNXS, handle_vmalls12e1is),
- SYS_INSN(TLBI_RVAE2ISNXS, trap_undef),
- SYS_INSN(TLBI_RVALE2ISNXS, trap_undef),
- SYS_INSN(TLBI_ALLE2ISNXS, trap_undef),
- SYS_INSN(TLBI_VAE2ISNXS, trap_undef),
+ SYS_INSN(TLBI_RVAE2ISNXS, undef_access),
+ SYS_INSN(TLBI_RVALE2ISNXS, undef_access),
+ SYS_INSN(TLBI_ALLE2ISNXS, undef_access),
+ SYS_INSN(TLBI_VAE2ISNXS, undef_access),
SYS_INSN(TLBI_ALLE1ISNXS, handle_alle1is),
- SYS_INSN(TLBI_VALE2ISNXS, trap_undef),
+ SYS_INSN(TLBI_VALE2ISNXS, undef_access),
SYS_INSN(TLBI_VMALLS12E1ISNXS, handle_vmalls12e1is),
SYS_INSN(TLBI_IPAS2E1OSNXS, handle_ipas2e1is),
SYS_INSN(TLBI_IPAS2E1NXS, handle_ipas2e1is),
@@ -3208,14 +3278,14 @@ static struct sys_reg_desc sys_insn_descs[] = {
SYS_INSN(TLBI_IPAS2LE1NXS, handle_ipas2e1is),
SYS_INSN(TLBI_RIPAS2LE1NXS, handle_ripas2e1is),
SYS_INSN(TLBI_RIPAS2LE1OSNXS, handle_ripas2e1is),
- SYS_INSN(TLBI_RVAE2OSNXS, trap_undef),
- SYS_INSN(TLBI_RVALE2OSNXS, trap_undef),
- SYS_INSN(TLBI_RVAE2NXS, trap_undef),
- SYS_INSN(TLBI_RVALE2NXS, trap_undef),
- SYS_INSN(TLBI_ALLE2NXS, trap_undef),
- SYS_INSN(TLBI_VAE2NXS, trap_undef),
+ SYS_INSN(TLBI_RVAE2OSNXS, undef_access),
+ SYS_INSN(TLBI_RVALE2OSNXS, undef_access),
+ SYS_INSN(TLBI_RVAE2NXS, undef_access),
+ SYS_INSN(TLBI_RVALE2NXS, undef_access),
+ SYS_INSN(TLBI_ALLE2NXS, undef_access),
+ SYS_INSN(TLBI_VAE2NXS, undef_access),
SYS_INSN(TLBI_ALLE1NXS, handle_alle1is),
- SYS_INSN(TLBI_VALE2NXS, trap_undef),
+ SYS_INSN(TLBI_VALE2NXS, undef_access),
SYS_INSN(TLBI_VMALLS12E1NXS, handle_vmalls12e1is),
};
@@ -3393,6 +3463,7 @@ static const struct sys_reg_desc cp15_regs[] = {
/* TTBCR2 */
{ AA32(HI), Op1( 0), CRn( 2), CRm( 0), Op2( 3), access_vm_reg, NULL, TCR_EL1 },
{ Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, DACR32_EL2 },
+ { CP15_SYS_DESC(SYS_ICC_PMR_EL1), undef_access },
/* DFSR */
{ Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, ESR_EL1 },
{ Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, IFSR32_EL2 },
@@ -3442,8 +3513,28 @@ static const struct sys_reg_desc cp15_regs[] = {
/* AMAIR1 */
{ AA32(HI), Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, AMAIR_EL1 },
- /* ICC_SRE */
- { Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre },
+ { CP15_SYS_DESC(SYS_ICC_IAR0_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_EOIR0_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_HPPIR0_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_BPR0_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_AP0R0_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_AP0R1_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_AP0R2_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_AP0R3_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_AP1R0_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_DIR_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_RPR_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_IAR1_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_EOIR1_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_HPPIR1_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_BPR1_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_CTLR_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre },
+ { CP15_SYS_DESC(SYS_ICC_IGRPEN0_EL1), undef_access },
+ { CP15_SYS_DESC(SYS_ICC_IGRPEN1_EL1), undef_access },
{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, CONTEXTIDR_EL1 },
@@ -4280,7 +4371,7 @@ int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
int ret;
r = id_to_sys_reg_desc(vcpu, reg->id, table, num);
- if (!r || sysreg_hidden_user(vcpu, r))
+ if (!r || sysreg_hidden(vcpu, r))
return -ENOENT;
if (r->get_user) {
@@ -4324,7 +4415,7 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
return -EFAULT;
r = id_to_sys_reg_desc(vcpu, reg->id, table, num);
- if (!r || sysreg_hidden_user(vcpu, r))
+ if (!r || sysreg_hidden(vcpu, r))
return -ENOENT;
if (sysreg_user_write_ignore(vcpu, r))
@@ -4410,7 +4501,7 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu,
if (!(rd->reg || rd->get_user))
return 0;
- if (sysreg_hidden_user(vcpu, rd))
+ if (sysreg_hidden(vcpu, rd))
return 0;
if (!copy_reg_to_user(rd, uind))
@@ -4551,6 +4642,7 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
mutex_lock(&kvm->arch.config_lock);
vcpu_set_hcr(vcpu);
+ vcpu_set_ich_hcr(vcpu);
if (cpus_have_final_cap(ARM64_HAS_HCX)) {
/*
@@ -4566,6 +4658,9 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
if (kvm_has_feat(kvm, ID_AA64MMFR3_EL1, TCRX, IMP))
vcpu->arch.hcrx_el2 |= HCRX_EL2_TCR2En;
+
+ if (kvm_has_fpmr(kvm))
+ vcpu->arch.hcrx_el2 |= HCRX_EL2_EnFPM;
}
if (test_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags))
@@ -4574,8 +4669,6 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
kvm->arch.fgu[HFGxTR_GROUP] = (HFGxTR_EL2_nAMAIR2_EL1 |
HFGxTR_EL2_nMAIR2_EL1 |
HFGxTR_EL2_nS2POR_EL1 |
- HFGxTR_EL2_nPOR_EL1 |
- HFGxTR_EL2_nPOR_EL0 |
HFGxTR_EL2_nACCDATA_EL1 |
HFGxTR_EL2_nSMPRI_EL1_MASK |
HFGxTR_EL2_nTPIDR2_EL0_MASK);
@@ -4606,10 +4699,21 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
HFGITR_EL2_TLBIRVAAE1OS |
HFGITR_EL2_TLBIRVAE1OS);
+ if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, ATS1A, IMP))
+ kvm->arch.fgu[HFGITR_GROUP] |= HFGITR_EL2_ATS1E1A;
+
+ if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, PAN, PAN2))
+ kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_ATS1E1RP |
+ HFGITR_EL2_ATS1E1WP);
+
if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP))
kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPIRE0_EL1 |
HFGxTR_EL2_nPIR_EL1);
+ if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1POE, IMP))
+ kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPOR_EL1 |
+ HFGxTR_EL2_nPOR_EL0);
+
if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP))
kvm->arch.fgu[HAFGRTR_GROUP] |= ~(HAFGRTR_EL2_RES0 |
HAFGRTR_EL2_RES1);
@@ -4619,6 +4723,36 @@ out:
mutex_unlock(&kvm->arch.config_lock);
}
+/*
+ * Perform last adjustments to the ID registers that are implied by the
+ * configuration outside of the ID regs themselves, as well as any
+ * initialisation that directly depend on these ID registers (such as
+ * RES0/RES1 behaviours). This is not the place to configure traps though.
+ *
+ * Because this can be called once per CPU, changes must be idempotent.
+ */
+int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ guard(mutex)(&kvm->arch.config_lock);
+
+ if (!(static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) &&
+ irqchip_in_kernel(kvm) &&
+ kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)) {
+ kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64PFR0_EL1)] &= ~ID_AA64PFR0_EL1_GIC_MASK;
+ kvm->arch.id_regs[IDREG_IDX(SYS_ID_PFR1_EL1)] &= ~ID_PFR1_EL1_GIC_MASK;
+ }
+
+ if (vcpu_has_nv(vcpu)) {
+ int ret = kvm_init_nv_sysregs(kvm);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
int __init kvm_sys_reg_table_init(void)
{
bool valid = true;
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 997eea21ba2a..1d94ed6efad2 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -95,9 +95,8 @@ struct sys_reg_desc {
};
#define REG_HIDDEN (1 << 0) /* hidden from userspace and guest */
-#define REG_HIDDEN_USER (1 << 1) /* hidden from userspace only */
-#define REG_RAZ (1 << 2) /* RAZ from userspace and guest */
-#define REG_USER_WI (1 << 3) /* WI from userspace only */
+#define REG_RAZ (1 << 1) /* RAZ from userspace and guest */
+#define REG_USER_WI (1 << 2) /* WI from userspace only */
static __printf(2, 3)
inline void print_sys_reg_msg(const struct sys_reg_params *p,
@@ -165,15 +164,6 @@ static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu,
return sysreg_visibility(vcpu, r) & REG_HIDDEN;
}
-static inline bool sysreg_hidden_user(const struct kvm_vcpu *vcpu,
- const struct sys_reg_desc *r)
-{
- if (likely(!r->visibility))
- return false;
-
- return r->visibility(vcpu, r) & (REG_HIDDEN | REG_HIDDEN_USER);
-}
-
static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *r)
{
@@ -235,6 +225,8 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index);
+int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu);
+
#define AA32(_x) .aarch32_map = AA32_##_x
#define Op0(_x) .Op0 = _x
#define Op1(_x) .Op1 = _x
@@ -248,4 +240,11 @@ bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index);
CRn(sys_reg_CRn(reg)), CRm(sys_reg_CRm(reg)), \
Op2(sys_reg_Op2(reg))
+#define CP15_SYS_DESC(reg) \
+ .name = #reg, \
+ .aarch32_map = AA32_DIRECT, \
+ Op0(0), Op1(sys_reg_Op1(reg)), \
+ CRn(sys_reg_CRn(reg)), CRm(sys_reg_CRm(reg)), \
+ Op2(sys_reg_Op2(reg))
+
#endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 3eecdd2f4b8f..b217b256853c 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -292,6 +292,18 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
/* Get the show on the road... */
vgic_v3->vgic_hcr = ICH_HCR_EN;
+}
+
+void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
+
+ /* Hide GICv3 sysreg if necessary */
+ if (!kvm_has_gicv3(vcpu->kvm)) {
+ vgic_v3->vgic_hcr |= ICH_HCR_TALL0 | ICH_HCR_TALL1 | ICH_HCR_TC;
+ return;
+ }
+
if (group0_trap)
vgic_v3->vgic_hcr |= ICH_HCR_TALL0;
if (group1_trap)
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index abe29c7d85d0..f50274fd5581 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -922,10 +922,13 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
void kvm_vgic_load(struct kvm_vcpu *vcpu)
{
- if (unlikely(!vgic_initialized(vcpu->kvm)))
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
+ if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
return;
+ }
- if (kvm_vgic_global_state.type == VGIC_V2)
+ if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
vgic_v2_load(vcpu);
else
vgic_v3_load(vcpu);
@@ -933,10 +936,13 @@ void kvm_vgic_load(struct kvm_vcpu *vcpu)
void kvm_vgic_put(struct kvm_vcpu *vcpu)
{
- if (unlikely(!vgic_initialized(vcpu->kvm)))
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
+ if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+ __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
return;
+ }
- if (kvm_vgic_global_state.type == VGIC_V2)
+ if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
vgic_v2_put(vcpu);
else
vgic_v3_put(vcpu);
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 8532bfe3fed4..f2486b4d9f95 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -346,11 +346,11 @@ void vgic_v4_configure_vsgis(struct kvm *kvm);
void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val);
int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq);
+void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu);
+
static inline bool kvm_has_gicv3(struct kvm *kvm)
{
- return (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) &&
- irqchip_in_kernel(kvm) &&
- kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3);
+ return kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP);
}
#endif
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 60454256945b..2fc8c6dd0407 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-y := dma-mapping.o extable.o fault.o init.o \
cache.o copypage.o flush.o \
- ioremap.o mmap.o pgd.o mmu.o \
+ ioremap.o mmap.o pgd.o mem_encrypt.o mmu.o \
context.o proc.o pageattr.o fixmap.o
obj-$(CONFIG_ARM64_CONTPTE) += contpte.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c
index a3edced29ac1..55107d27d3f8 100644
--- a/arch/arm64/mm/contpte.c
+++ b/arch/arm64/mm/contpte.c
@@ -421,6 +421,12 @@ int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
ptep = contpte_align_down(ptep);
start_addr = addr = ALIGN_DOWN(addr, CONT_PTE_SIZE);
+ /*
+ * We are not advancing entry because __ptep_set_access_flags()
+ * only consumes access flags from entry. And since we have checked
+ * for the whole contpte block and returned early, pte_same()
+ * within __ptep_set_access_flags() is likely false.
+ */
for (i = 0; i < CONT_PTES; i++, ptep++, addr += PAGE_SIZE)
__ptep_set_access_flags(vma, addr, ptep, entry, 0);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 451ba7cbd5ad..8b281cf308b3 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -23,6 +23,7 @@
#include <linux/sched/debug.h>
#include <linux/highmem.h>
#include <linux/perf_event.h>
+#include <linux/pkeys.h>
#include <linux/preempt.h>
#include <linux/hugetlb.h>
@@ -486,6 +487,23 @@ static void do_bad_area(unsigned long far, unsigned long esr,
}
}
+static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma,
+ unsigned int mm_flags)
+{
+ unsigned long iss2 = ESR_ELx_ISS2(esr);
+
+ if (!system_supports_poe())
+ return false;
+
+ if (esr_fsc_is_permission_fault(esr) && (iss2 & ESR_ELx_Overlay))
+ return true;
+
+ return !arch_vma_access_permitted(vma,
+ mm_flags & FAULT_FLAG_WRITE,
+ mm_flags & FAULT_FLAG_INSTRUCTION,
+ false);
+}
+
static bool is_el0_instruction_abort(unsigned long esr)
{
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
@@ -511,6 +529,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
unsigned long addr = untagged_addr(far);
struct vm_area_struct *vma;
int si_code;
+ int pkey = -1;
if (kprobe_page_fault(regs, esr))
return 0;
@@ -575,6 +594,16 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
goto bad_area;
}
+
+ if (fault_from_pkey(esr, vma, mm_flags)) {
+ pkey = vma_pkey(vma);
+ vma_end_read(vma);
+ fault = 0;
+ si_code = SEGV_PKUERR;
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ goto bad_area;
+ }
+
fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs);
if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
vma_end_read(vma);
@@ -610,7 +639,16 @@ retry:
goto bad_area;
}
+ if (fault_from_pkey(esr, vma, mm_flags)) {
+ pkey = vma_pkey(vma);
+ mmap_read_unlock(mm);
+ fault = 0;
+ si_code = SEGV_PKUERR;
+ goto bad_area;
+ }
+
fault = handle_mm_fault(vma, addr, mm_flags, regs);
+
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
if (!user_mode(regs))
@@ -669,8 +707,23 @@ bad_area:
arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name);
} else {
+ /*
+ * The pkey value that we return to userspace can be different
+ * from the pkey that caused the fault.
+ *
+ * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4);
+ * 2. T1 : set POR_EL0 to deny access to pkey=4, touches, page
+ * 3. T1 : faults...
+ * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5);
+ * 5. T1 : enters fault handler, takes mmap_lock, etc...
+ * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
+ * faulted on a pte with its pkey=4.
+ */
/* Something tried to access memory that out of memory map */
- arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name);
+ if (si_code == SEGV_PKUERR)
+ arm64_force_sig_fault_pkey(far, inf->name, pkey);
+ else
+ arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name);
}
return 0;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 9b5ab6818f7f..a0400b9aa814 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -414,8 +414,16 @@ void __init mem_init(void)
void free_initmem(void)
{
- free_reserved_area(lm_alias(__init_begin),
- lm_alias(__init_end),
+ void *lm_init_begin = lm_alias(__init_begin);
+ void *lm_init_end = lm_alias(__init_end);
+
+ WARN_ON(!IS_ALIGNED((unsigned long)lm_init_begin, PAGE_SIZE));
+ WARN_ON(!IS_ALIGNED((unsigned long)lm_init_end, PAGE_SIZE));
+
+ /* Delete __init region from memblock.reserved. */
+ memblock_free(lm_init_begin, lm_init_end - lm_init_begin);
+
+ free_reserved_area(lm_init_begin, lm_init_end,
POISON_FREE_INITMEM, "unused kernel");
/*
* Unmap the __init region but leave the VM area in place. This
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index 269f2f63ab7d..6cc0b7e7eb03 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -3,10 +3,22 @@
#include <linux/mm.h>
#include <linux/io.h>
+static ioremap_prot_hook_t ioremap_prot_hook;
+
+int arm64_ioremap_prot_hook_register(ioremap_prot_hook_t hook)
+{
+ if (WARN_ON(ioremap_prot_hook))
+ return -EBUSY;
+
+ ioremap_prot_hook = hook;
+ return 0;
+}
+
void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
unsigned long prot)
{
unsigned long last_addr = phys_addr + size - 1;
+ pgprot_t pgprot = __pgprot(prot);
/* Don't allow outside PHYS_MASK */
if (last_addr & ~PHYS_MASK)
@@ -16,7 +28,16 @@ void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size,
if (WARN_ON(pfn_is_map_memory(__phys_to_pfn(phys_addr))))
return NULL;
- return generic_ioremap_prot(phys_addr, size, __pgprot(prot));
+ /*
+ * If a hook is registered (e.g. for confidential computing
+ * purposes), call that now and barf if it fails.
+ */
+ if (unlikely(ioremap_prot_hook) &&
+ WARN_ON(ioremap_prot_hook(phys_addr, size, &pgprot))) {
+ return NULL;
+ }
+
+ return generic_ioremap_prot(phys_addr, size, pgprot);
}
EXPORT_SYMBOL(ioremap_prot);
diff --git a/arch/arm64/mm/mem_encrypt.c b/arch/arm64/mm/mem_encrypt.c
new file mode 100644
index 000000000000..ee3c0ab04384
--- /dev/null
+++ b/arch/arm64/mm/mem_encrypt.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Implementation of the memory encryption/decryption API.
+ *
+ * Since the low-level details of the operation depend on the
+ * Confidential Computing environment (e.g. pKVM, CCA, ...), this just
+ * acts as a top-level dispatcher to whatever hooks may have been
+ * registered.
+ *
+ * Author: Will Deacon <will@kernel.org>
+ * Copyright (C) 2024 Google LLC
+ *
+ * "Hello, boils and ghouls!"
+ */
+
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+
+#include <asm/mem_encrypt.h>
+
+static const struct arm64_mem_crypt_ops *crypt_ops;
+
+int arm64_mem_crypt_ops_register(const struct arm64_mem_crypt_ops *ops)
+{
+ if (WARN_ON(crypt_ops))
+ return -EBUSY;
+
+ crypt_ops = ops;
+ return 0;
+}
+
+int set_memory_encrypted(unsigned long addr, int numpages)
+{
+ if (likely(!crypt_ops) || WARN_ON(!PAGE_ALIGNED(addr)))
+ return 0;
+
+ return crypt_ops->encrypt(addr, numpages);
+}
+EXPORT_SYMBOL_GPL(set_memory_encrypted);
+
+int set_memory_decrypted(unsigned long addr, int numpages)
+{
+ if (likely(!crypt_ops) || WARN_ON(!PAGE_ALIGNED(addr)))
+ return 0;
+
+ return crypt_ops->decrypt(addr, numpages);
+}
+EXPORT_SYMBOL_GPL(set_memory_decrypted);
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 642bdf908b22..7e3ad97e27d8 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -102,6 +102,17 @@ pgprot_t vm_get_page_prot(unsigned long vm_flags)
if (vm_flags & VM_MTE)
prot |= PTE_ATTRINDX(MT_NORMAL_TAGGED);
+#ifdef CONFIG_ARCH_HAS_PKEYS
+ if (system_supports_poe()) {
+ if (vm_flags & VM_PKEY_BIT0)
+ prot |= PTE_PO_IDX_0;
+ if (vm_flags & VM_PKEY_BIT1)
+ prot |= PTE_PO_IDX_1;
+ if (vm_flags & VM_PKEY_BIT2)
+ prot |= PTE_PO_IDX_2;
+ }
+#endif
+
return __pgprot(prot);
}
EXPORT_SYMBOL(vm_get_page_prot);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 353ea5dc32b8..e55b02fbddc8 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -25,6 +25,7 @@
#include <linux/vmalloc.h>
#include <linux/set_memory.h>
#include <linux/kfence.h>
+#include <linux/pkeys.h>
#include <asm/barrier.h>
#include <asm/cputype.h>
@@ -1549,3 +1550,47 @@ void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp)
cpu_uninstall_idmap();
}
+
+#ifdef CONFIG_ARCH_HAS_PKEYS
+int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, unsigned long init_val)
+{
+ u64 new_por = POE_RXW;
+ u64 old_por;
+ u64 pkey_shift;
+
+ if (!system_supports_poe())
+ return -ENOSPC;
+
+ /*
+ * This code should only be called with valid 'pkey'
+ * values originating from in-kernel users. Complain
+ * if a bad value is observed.
+ */
+ if (WARN_ON_ONCE(pkey >= arch_max_pkey()))
+ return -EINVAL;
+
+ /* Set the bits we need in POR: */
+ new_por = POE_RXW;
+ if (init_val & PKEY_DISABLE_WRITE)
+ new_por &= ~POE_W;
+ if (init_val & PKEY_DISABLE_ACCESS)
+ new_por &= ~POE_RW;
+ if (init_val & PKEY_DISABLE_READ)
+ new_por &= ~POE_R;
+ if (init_val & PKEY_DISABLE_EXECUTE)
+ new_por &= ~POE_X;
+
+ /* Shift the bits in to the correct place in POR for pkey: */
+ pkey_shift = pkey * POR_BITS_PER_PKEY;
+ new_por <<= pkey_shift;
+
+ /* Get old POR and mask off any old bits in place: */
+ old_por = read_sysreg_s(SYS_POR_EL0);
+ old_por &= ~(POE_MASK << pkey_shift);
+
+ /* Write old part along with new part: */
+ write_sysreg_s(old_por | new_por, SYS_POR_EL0);
+
+ return 0;
+}
+#endif
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index f4bc6c5bac06..8abdc7fed321 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -36,8 +36,6 @@
#define TCR_KASLR_FLAGS 0
#endif
-#define TCR_SMP_FLAGS TCR_SHARED
-
/* PTWs cacheable, inner/outer WBWA */
#define TCR_CACHE_FLAGS TCR_IRGN_WBWA | TCR_ORGN_WBWA
@@ -469,7 +467,7 @@ SYM_FUNC_START(__cpu_setup)
tcr .req x16
mov_q mair, MAIR_EL1_SET
mov_q tcr, TCR_T0SZ(IDMAP_VA_BITS) | TCR_T1SZ(VA_BITS_MIN) | TCR_CACHE_FLAGS | \
- TCR_SMP_FLAGS | TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
+ TCR_SHARED | TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS
tcr_clear_errata_bits tcr, x9, x5
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 6986827e0d64..264c5f9b97d8 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -38,33 +38,7 @@
seq_printf(m, fmt); \
})
-/*
- * The page dumper groups page table entries of the same type into a single
- * description. It uses pg_state to track the range information while
- * iterating over the pte entries. When the continuity is broken it then
- * dumps out a description of the range.
- */
-struct pg_state {
- struct ptdump_state ptdump;
- struct seq_file *seq;
- const struct addr_marker *marker;
- const struct mm_struct *mm;
- unsigned long start_address;
- int level;
- u64 current_prot;
- bool check_wx;
- unsigned long wx_pages;
- unsigned long uxn_pages;
-};
-
-struct prot_bits {
- u64 mask;
- u64 val;
- const char *set;
- const char *clear;
-};
-
-static const struct prot_bits pte_bits[] = {
+static const struct ptdump_prot_bits pte_bits[] = {
{
.mask = PTE_VALID,
.val = PTE_VALID,
@@ -143,14 +117,7 @@ static const struct prot_bits pte_bits[] = {
}
};
-struct pg_level {
- const struct prot_bits *bits;
- char name[4];
- int num;
- u64 mask;
-};
-
-static struct pg_level pg_level[] __ro_after_init = {
+static struct ptdump_pg_level kernel_pg_levels[] __ro_after_init = {
{ /* pgd */
.name = "PGD",
.bits = pte_bits,
@@ -174,7 +141,7 @@ static struct pg_level pg_level[] __ro_after_init = {
},
};
-static void dump_prot(struct pg_state *st, const struct prot_bits *bits,
+static void dump_prot(struct ptdump_pg_state *st, const struct ptdump_prot_bits *bits,
size_t num)
{
unsigned i;
@@ -192,7 +159,7 @@ static void dump_prot(struct pg_state *st, const struct prot_bits *bits,
}
}
-static void note_prot_uxn(struct pg_state *st, unsigned long addr)
+static void note_prot_uxn(struct ptdump_pg_state *st, unsigned long addr)
{
if (!st->check_wx)
return;
@@ -206,7 +173,7 @@ static void note_prot_uxn(struct pg_state *st, unsigned long addr)
st->uxn_pages += (addr - st->start_address) / PAGE_SIZE;
}
-static void note_prot_wx(struct pg_state *st, unsigned long addr)
+static void note_prot_wx(struct ptdump_pg_state *st, unsigned long addr)
{
if (!st->check_wx)
return;
@@ -221,16 +188,17 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
}
-static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
- u64 val)
+void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
+ u64 val)
{
- struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
+ struct ptdump_pg_state *st = container_of(pt_st, struct ptdump_pg_state, ptdump);
+ struct ptdump_pg_level *pg_level = st->pg_level;
static const char units[] = "KMGTPE";
u64 prot = 0;
/* check if the current level has been folded dynamically */
- if ((level == 1 && mm_p4d_folded(st->mm)) ||
- (level == 2 && mm_pud_folded(st->mm)))
+ if (st->mm && ((level == 1 && mm_p4d_folded(st->mm)) ||
+ (level == 2 && mm_pud_folded(st->mm))))
level = 0;
if (level >= 0)
@@ -286,15 +254,16 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
{
unsigned long end = ~0UL;
- struct pg_state st;
+ struct ptdump_pg_state st;
if (info->base_addr < TASK_SIZE_64)
end = TASK_SIZE_64;
- st = (struct pg_state){
+ st = (struct ptdump_pg_state){
.seq = s,
.marker = info->markers,
.mm = info->mm,
+ .pg_level = &kernel_pg_levels[0],
.level = -1,
.ptdump = {
.note_page = note_page,
@@ -312,10 +281,10 @@ static void __init ptdump_initialize(void)
{
unsigned i, j;
- for (i = 0; i < ARRAY_SIZE(pg_level); i++)
- if (pg_level[i].bits)
- for (j = 0; j < pg_level[i].num; j++)
- pg_level[i].mask |= pg_level[i].bits[j].mask;
+ for (i = 0; i < ARRAY_SIZE(kernel_pg_levels); i++)
+ if (kernel_pg_levels[i].bits)
+ for (j = 0; j < kernel_pg_levels[i].num; j++)
+ kernel_pg_levels[i].mask |= kernel_pg_levels[i].bits[j].mask;
}
static struct ptdump_info kernel_ptdump_info __ro_after_init = {
@@ -324,12 +293,13 @@ static struct ptdump_info kernel_ptdump_info __ro_after_init = {
bool ptdump_check_wx(void)
{
- struct pg_state st = {
+ struct ptdump_pg_state st = {
.seq = NULL,
.marker = (struct addr_marker[]) {
{ 0, NULL},
{ -1, NULL},
},
+ .pg_level = &kernel_pg_levels[0],
.level = -1,
.check_wx = true,
.ptdump = {
diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c
index 5139a28130c0..0f7b484cb2ff 100644
--- a/arch/arm64/mm/trans_pgd.c
+++ b/arch/arm64/mm/trans_pgd.c
@@ -42,14 +42,16 @@ static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
* the temporary mappings we use during restore.
*/
__set_pte(dst_ptep, pte_mkwrite_novma(pte));
- } else if ((debug_pagealloc_enabled() ||
- is_kfence_address((void *)addr)) && !pte_none(pte)) {
+ } else if (!pte_none(pte)) {
/*
* debug_pagealloc will removed the PTE_VALID bit if
* the page isn't in use by the resume kernel. It may have
* been in use by the original kernel, in which case we need
* to put it back in our copy to do the restore.
*
+ * Other cases include kfence / vmalloc / memfd_secret which
+ * may call `set_direct_map_invalid_noflush()`.
+ *
* Before marking this entry valid, check the pfn should
* be mapped.
*/
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index ac3429d892b9..eedb5acc21ed 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -45,6 +45,7 @@ HAS_MOPS
HAS_NESTED_VIRT
HAS_PAN
HAS_S1PIE
+HAS_S1POE
HAS_RAS_EXTN
HAS_RNG
HAS_SB
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 7ceaa1e0b4bc..8d637ac4b7c6 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -2029,6 +2029,31 @@ Sysreg FAR_EL1 3 0 6 0 0
Field 63:0 ADDR
EndSysreg
+Sysreg PMICNTR_EL0 3 3 9 4 0
+Field 63:0 ICNT
+EndSysreg
+
+Sysreg PMICFILTR_EL0 3 3 9 6 0
+Res0 63:59
+Field 58 SYNC
+Field 57:56 VS
+Res0 55:32
+Field 31 P
+Field 30 U
+Field 29 NSK
+Field 28 NSU
+Field 27 NSH
+Field 26 M
+Res0 25
+Field 24 SH
+Field 23 T
+Field 22 RLK
+Field 21 RLU
+Field 20 RLH
+Res0 19:16
+Field 15:0 evtCount
+EndSysreg
+
Sysreg PMSCR_EL1 3 0 9 9 0
Res0 63:8
Field 7:6 PCT
@@ -2153,6 +2178,11 @@ Field 4 P
Field 3:0 ALIGN
EndSysreg
+Sysreg PMSELR_EL0 3 3 9 12 5
+Res0 63:5
+Field 4:0 SEL
+EndSysreg
+
SysregFields CONTEXTIDR_ELx
Res0 63:32
Field 31:0 PROCID