summaryrefslogtreecommitdiff
path: root/arch/riscv
diff options
context:
space:
mode:
Diffstat (limited to 'arch/riscv')
-rw-r--r--arch/riscv/Kconfig65
-rw-r--r--arch/riscv/Kconfig.socs8
-rw-r--r--arch/riscv/Makefile2
-rw-r--r--arch/riscv/boot/dts/Makefile1
-rw-r--r--arch/riscv/boot/dts/canaan/Makefile4
-rw-r--r--arch/riscv/boot/dts/canaan/k210.dtsi23
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts2
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts2
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_go.dts2
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maixduino.dts2
-rw-r--r--arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts4
-rw-r--r--arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi60
-rw-r--r--arch/riscv/boot/dts/sifive/fu540-c000.dtsi40
-rw-r--r--arch/riscv/boot/dts/sifive/fu740-c000.dtsi14
-rw-r--r--arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts1
-rw-r--r--arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts118
-rw-r--r--arch/riscv/boot/dts/starfive/Makefile2
-rw-r--r--arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts164
-rw-r--r--arch/riscv/boot/dts/starfive/jh7100.dtsi230
-rw-r--r--arch/riscv/configs/defconfig15
-rw-r--r--arch/riscv/configs/nommu_k210_defconfig3
-rw-r--r--arch/riscv/configs/nommu_k210_sdcard_defconfig4
-rw-r--r--arch/riscv/configs/nommu_virt_defconfig3
-rw-r--r--arch/riscv/configs/rv32_defconfig8
-rw-r--r--arch/riscv/errata/alternative.c3
-rw-r--r--arch/riscv/include/asm/Kbuild1
-rw-r--r--arch/riscv/include/asm/asm-extable.h65
-rw-r--r--arch/riscv/include/asm/bitops.h1
-rw-r--r--arch/riscv/include/asm/cpu_ops.h2
-rw-r--r--arch/riscv/include/asm/cpu_ops_sbi.h25
-rw-r--r--arch/riscv/include/asm/csr.h3
-rw-r--r--arch/riscv/include/asm/efi.h1
-rw-r--r--arch/riscv/include/asm/extable.h48
-rw-r--r--arch/riscv/include/asm/fixmap.h1
-rw-r--r--arch/riscv/include/asm/futex.h30
-rw-r--r--arch/riscv/include/asm/gpr-num.h77
-rw-r--r--arch/riscv/include/asm/kasan.h11
-rw-r--r--arch/riscv/include/asm/kvm_host.h20
-rw-r--r--arch/riscv/include/asm/kvm_types.h2
-rw-r--r--arch/riscv/include/asm/kvm_vcpu_sbi.h33
-rw-r--r--arch/riscv/include/asm/page.h16
-rw-r--r--arch/riscv/include/asm/pgalloc.h40
-rw-r--r--arch/riscv/include/asm/pgtable-64.h108
-rw-r--r--arch/riscv/include/asm/pgtable-bits.h2
-rw-r--r--arch/riscv/include/asm/pgtable.h85
-rw-r--r--arch/riscv/include/asm/sbi.h52
-rw-r--r--arch/riscv/include/asm/smp.h10
-rw-r--r--arch/riscv/include/asm/sparsemem.h6
-rw-r--r--arch/riscv/include/asm/spinlock_types.h2
-rw-r--r--arch/riscv/include/asm/uaccess.h163
-rw-r--r--arch/riscv/kernel/Makefile3
-rw-r--r--arch/riscv/kernel/asm-offsets.c3
-rw-r--r--arch/riscv/kernel/cpu-hotplug.c8
-rw-r--r--arch/riscv/kernel/cpu.c23
-rw-r--r--arch/riscv/kernel/cpu_ops.c26
-rw-r--r--arch/riscv/kernel/cpu_ops_sbi.c26
-rw-r--r--arch/riscv/kernel/cpu_ops_spinwait.c27
-rw-r--r--arch/riscv/kernel/head.S51
-rw-r--r--arch/riscv/kernel/head.h6
-rw-r--r--arch/riscv/kernel/kexec_relocate.S20
-rw-r--r--arch/riscv/kernel/machine_kexec.c3
-rw-r--r--arch/riscv/kernel/perf_callchain.c10
-rw-r--r--arch/riscv/kernel/ptrace.c4
-rw-r--r--arch/riscv/kernel/sbi.c224
-rw-r--r--arch/riscv/kernel/smp.c10
-rw-r--r--arch/riscv/kernel/smpboot.c2
-rw-r--r--arch/riscv/kernel/stacktrace.c4
-rw-r--r--arch/riscv/kernel/traps.c2
-rw-r--r--arch/riscv/kernel/vmlinux-xip.lds.S1
-rw-r--r--arch/riscv/kernel/vmlinux.lds.S3
-rw-r--r--arch/riscv/kvm/Makefile10
-rw-r--r--arch/riscv/kvm/main.c8
-rw-r--r--arch/riscv/kvm/mmu.c112
-rw-r--r--arch/riscv/kvm/vcpu.c30
-rw-r--r--arch/riscv/kvm/vcpu_exit.c2
-rw-r--r--arch/riscv/kvm/vcpu_fp.c2
-rw-r--r--arch/riscv/kvm/vcpu_sbi.c215
-rw-r--r--arch/riscv/kvm/vcpu_sbi_base.c99
-rw-r--r--arch/riscv/kvm/vcpu_sbi_hsm.c105
-rw-r--r--arch/riscv/kvm/vcpu_sbi_replace.c132
-rw-r--r--arch/riscv/kvm/vcpu_sbi_v01.c123
-rw-r--r--arch/riscv/kvm/vm.c15
-rw-r--r--arch/riscv/kvm/vmid.c6
-rw-r--r--arch/riscv/lib/uaccess.S28
-rw-r--r--arch/riscv/mm/cacheflush.c5
-rw-r--r--arch/riscv/mm/context.c4
-rw-r--r--arch/riscv/mm/extable.c66
-rw-r--r--arch/riscv/mm/fault.c6
-rw-r--r--arch/riscv/mm/init.c398
-rw-r--r--arch/riscv/mm/kasan_init.c248
-rw-r--r--arch/riscv/mm/tlbflush.c9
-rw-r--r--arch/riscv/net/bpf_jit_comp32.c6
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c18
93 files changed, 2625 insertions, 1062 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 821252b65f89..5adcbd9b5e88 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -14,6 +14,7 @@ config RISCV
def_bool y
select ARCH_CLOCKSOURCE_INIT
select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
+ select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
select ARCH_HAS_BINFMT_FLAT
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEBUG_VIRTUAL if MMU
@@ -75,6 +76,7 @@ config RISCV
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU
+ select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_VMAP_STACK if MMU && 64BIT
select HAVE_ASM_MODVERSIONS
@@ -83,7 +85,6 @@ config RISCV
select HAVE_DMA_CONTIGUOUS if MMU
select HAVE_EBPF_JIT if MMU
select HAVE_FUNCTION_ERROR_INJECTION
- select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_GCC_PLUGINS
select HAVE_GENERIC_VDSO if MMU && 64BIT
select HAVE_IRQ_TIME_ACCOUNTING
@@ -146,27 +147,16 @@ config MMU
Select if you want MMU-based virtualised addressing space
support by paged memory management. If unsure, say 'Y'.
-config VA_BITS
- int
- default 32 if 32BIT
- default 39 if 64BIT
-
-config PA_BITS
- int
- default 34 if 32BIT
- default 56 if 64BIT
-
config PAGE_OFFSET
hex
- default 0xC0000000 if 32BIT && MAXPHYSMEM_1GB
+ default 0xC0000000 if 32BIT
default 0x80000000 if 64BIT && !MMU
- default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
- default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB
+ default 0xffffaf8000000000 if 64BIT
config KASAN_SHADOW_OFFSET
hex
depends on KASAN_GENERIC
- default 0xdfffffc800000000 if 64BIT
+ default 0xdfffffff00000000 if 64BIT
default 0xffffffff if 32BIT
config ARCH_FLATMEM_ENABLE
@@ -212,7 +202,7 @@ config FIX_EARLYCON_MEM
config PGTABLE_LEVELS
int
- default 3 if 64BIT
+ default 4 if 64BIT
default 2
config LOCKDEP_SUPPORT
@@ -270,24 +260,6 @@ config MODULE_SECTIONS
bool
select HAVE_MOD_ARCH_SPECIFIC
-choice
- prompt "Maximum Physical Memory"
- default MAXPHYSMEM_1GB if 32BIT
- default MAXPHYSMEM_2GB if 64BIT && CMODEL_MEDLOW
- default MAXPHYSMEM_128GB if 64BIT && CMODEL_MEDANY
-
- config MAXPHYSMEM_1GB
- depends on 32BIT
- bool "1GiB"
- config MAXPHYSMEM_2GB
- depends on 64BIT && CMODEL_MEDLOW
- bool "2GiB"
- config MAXPHYSMEM_128GB
- depends on 64BIT && CMODEL_MEDANY
- bool "128GiB"
-endchoice
-
-
config SMP
bool "Symmetric Multi-Processing"
help
@@ -334,6 +306,8 @@ config NUMA
select GENERIC_ARCH_NUMA
select OF_NUMA
select ARCH_SUPPORTS_NUMA_BALANCING
+ select USE_PERCPU_NUMA_NODE_ID
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK
help
Enable NUMA (Non-Uniform Memory Access) support.
@@ -349,14 +323,6 @@ config NODES_SHIFT
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accommodate various tables.
-config USE_PERCPU_NUMA_NODE_ID
- def_bool y
- depends on NUMA
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool y
- depends on NUMA
-
config RISCV_ISA_C
bool "Emit compressed instructions when building Linux"
default y
@@ -397,12 +363,25 @@ source "kernel/Kconfig.hz"
config RISCV_SBI_V01
bool "SBI v0.1 support"
- default y
depends on RISCV_SBI
help
This config allows kernel to use SBI v0.1 APIs. This will be
deprecated in future once legacy M-mode software are no longer in use.
+config RISCV_BOOT_SPINWAIT
+ bool "Spinwait booting method"
+ depends on SMP
+ default y
+ help
+ This enables support for booting Linux via spinwait method. In the
+ spinwait method, all cores randomly jump to Linux. One of the cores
+ gets chosen via lottery and all other keep spinning on a percpu
+ variable. This method cannot support CPU hotplug and sparse hartid
+ scheme. It should be only enabled for M-mode Linux or platforms relying
+ on older firmware without SBI HSM extension. All other platforms should
+ rely on ordered booting via SBI HSM extension which gets chosen
+ dynamically at runtime if the firmware supports it.
+
config KEXEC
bool "Kexec system call"
select KEXEC_CORE
diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs
index 30676ebb16eb..6ec44a22278a 100644
--- a/arch/riscv/Kconfig.socs
+++ b/arch/riscv/Kconfig.socs
@@ -19,6 +19,14 @@ config SOC_SIFIVE
help
This enables support for SiFive SoC platform hardware.
+config SOC_STARFIVE
+ bool "StarFive SoCs"
+ select PINCTRL
+ select RESET_CONTROLLER
+ select SIFIVE_PLIC
+ help
+ This enables support for StarFive SoC platform hardware.
+
config SOC_VIRT
bool "QEMU Virt Machine"
select CLINT_TIMER if RISCV_M_MODE
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 5927c94302b8..8a107ed18b0d 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -107,11 +107,13 @@ PHONY += vdso_install
vdso_install:
$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
+ifeq ($(KBUILD_EXTMOD),)
ifeq ($(CONFIG_MMU),y)
prepare: vdso_prepare
vdso_prepare: prepare0
$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso include/generated/vdso-offsets.h
endif
+endif
ifneq ($(CONFIG_XIP_KERNEL),y)
ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy)
diff --git a/arch/riscv/boot/dts/Makefile b/arch/riscv/boot/dts/Makefile
index fe996b88319e..ff174996cdfd 100644
--- a/arch/riscv/boot/dts/Makefile
+++ b/arch/riscv/boot/dts/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
subdir-y += sifive
+subdir-y += starfive
subdir-$(CONFIG_SOC_CANAAN_K210_DTB_BUILTIN) += canaan
subdir-y += microchip
diff --git a/arch/riscv/boot/dts/canaan/Makefile b/arch/riscv/boot/dts/canaan/Makefile
index 9ee7156c0c31..c61b08ac8554 100644
--- a/arch/riscv/boot/dts/canaan/Makefile
+++ b/arch/riscv/boot/dts/canaan/Makefile
@@ -1,5 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
-ifneq ($(CONFIG_SOC_CANAAN_K210_DTB_SOURCE),"")
-dtb-y += $(strip $(shell echo $(CONFIG_SOC_CANAAN_K210_DTB_SOURCE))).dtb
+dtb-$(CONFIG_SOC_CANAAN_K210_DTB_BUILTIN) += $(addsuffix .dtb, $(CONFIG_SOC_CANAAN_K210_DTB_SOURCE))
obj-$(CONFIG_SOC_CANAAN_K210_DTB_BUILTIN) += $(addsuffix .o, $(dtb-y))
-endif
diff --git a/arch/riscv/boot/dts/canaan/k210.dtsi b/arch/riscv/boot/dts/canaan/k210.dtsi
index 5e8ca8142482..56f57118c633 100644
--- a/arch/riscv/boot/dts/canaan/k210.dtsi
+++ b/arch/riscv/boot/dts/canaan/k210.dtsi
@@ -103,8 +103,8 @@
clint0: timer@2000000 {
compatible = "canaan,k210-clint", "sifive,clint0";
reg = <0x2000000 0xC000>;
- interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7
- &cpu1_intc 3 &cpu1_intc 7>;
+ interrupts-extended = <&cpu0_intc 3>, <&cpu0_intc 7>,
+ <&cpu1_intc 3>, <&cpu1_intc 7>;
};
plic0: interrupt-controller@c000000 {
@@ -113,7 +113,7 @@
compatible = "canaan,k210-plic", "sifive,plic-1.0.0";
reg = <0xC000000 0x4000000>;
interrupt-controller;
- interrupts-extended = <&cpu0_intc 11 &cpu1_intc 11>;
+ interrupts-extended = <&cpu0_intc 11>, <&cpu1_intc 11>;
riscv,ndev = <65>;
};
@@ -130,10 +130,11 @@
compatible = "canaan,k210-gpiohs", "sifive,gpio0";
reg = <0x38001000 0x1000>;
interrupt-controller;
- interrupts = <34 35 36 37 38 39 40 41
- 42 43 44 45 46 47 48 49
- 50 51 52 53 54 55 56 57
- 58 59 60 61 62 63 64 65>;
+ interrupts = <34>, <35>, <36>, <37>, <38>, <39>, <40>,
+ <41>, <42>, <43>, <44>, <45>, <46>, <47>,
+ <48>, <49>, <50>, <51>, <52>, <53>, <54>,
+ <55>, <56>, <57>, <58>, <59>, <60>, <61>,
+ <62>, <63>, <64>, <65>;
gpio-controller;
ngpios = <32>;
};
@@ -141,7 +142,7 @@
dmac0: dma-controller@50000000 {
compatible = "snps,axi-dma-1.01a";
reg = <0x50000000 0x1000>;
- interrupts = <27 28 29 30 31 32>;
+ interrupts = <27>, <28>, <29>, <30>, <31>, <32>;
#dma-cells = <1>;
clocks = <&sysclk K210_CLK_DMA>, <&sysclk K210_CLK_DMA>;
clock-names = "core-clk", "cfgr-clk";
@@ -316,7 +317,7 @@
timer0: timer@502d0000 {
compatible = "snps,dw-apb-timer";
reg = <0x502D0000 0x100>;
- interrupts = <14 15>;
+ interrupts = <14>, <15>;
clocks = <&sysclk K210_CLK_TIMER0>,
<&sysclk K210_CLK_APB0>;
clock-names = "timer", "pclk";
@@ -326,7 +327,7 @@
timer1: timer@502e0000 {
compatible = "snps,dw-apb-timer";
reg = <0x502E0000 0x100>;
- interrupts = <16 17>;
+ interrupts = <16>, <17>;
clocks = <&sysclk K210_CLK_TIMER1>,
<&sysclk K210_CLK_APB0>;
clock-names = "timer", "pclk";
@@ -336,7 +337,7 @@
timer2: timer@502f0000 {
compatible = "snps,dw-apb-timer";
reg = <0x502F0000 0x100>;
- interrupts = <18 19>;
+ interrupts = <18>, <19>;
clocks = <&sysclk K210_CLK_TIMER2>,
<&sysclk K210_CLK_APB0>;
clock-names = "timer", "pclk";
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
index 0bcaf35045e7..984872f3d3a9 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
@@ -199,7 +199,7 @@
};
&spi3 {
- spi-flash@0 {
+ flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <50000000>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
index ac8a03f5867a..7ba99b4da304 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
@@ -201,7 +201,7 @@
};
&spi3 {
- spi-flash@0 {
+ flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <50000000>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
index 623998194bc1..be9b12c9b374 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
@@ -209,7 +209,7 @@
};
&spi3 {
- spi-flash@0 {
+ flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <50000000>;
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
index cf605ba0d67e..031c0c28f819 100644
--- a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
+++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
@@ -174,7 +174,7 @@
};
&spi3 {
- spi-flash@0 {
+ flash@0 {
compatible = "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <50000000>;
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
index fc1e5869df1b..0c748ae1b006 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
@@ -35,6 +35,10 @@
};
};
+&refclk {
+ clock-frequency = <600000000>;
+};
+
&serial0 {
status = "okay";
};
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
index c9f6d205d2ba..869aaf0d5c06 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
@@ -9,9 +9,6 @@
model = "Microchip PolarFire SoC";
compatible = "microchip,mpfs";
- chosen {
- };
-
cpus {
#address-cells = <1>;
#size-cells = <0>;
@@ -142,6 +139,11 @@
};
};
+ refclk: msspllclk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ };
+
soc {
#address-cells = <2>;
#size-cells = <2>;
@@ -156,62 +158,48 @@
cache-size = <2097152>;
cache-unified;
interrupt-parent = <&plic>;
- interrupts = <1 2 3>;
+ interrupts = <1>, <2>, <3>;
reg = <0x0 0x2010000 0x0 0x1000>;
};
clint@2000000 {
compatible = "sifive,fu540-c000-clint", "sifive,clint0";
reg = <0x0 0x2000000 0x0 0xC000>;
- interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7
- &cpu1_intc 3 &cpu1_intc 7
- &cpu2_intc 3 &cpu2_intc 7
- &cpu3_intc 3 &cpu3_intc 7
- &cpu4_intc 3 &cpu4_intc 7>;
+ interrupts-extended = <&cpu0_intc 3>, <&cpu0_intc 7>,
+ <&cpu1_intc 3>, <&cpu1_intc 7>,
+ <&cpu2_intc 3>, <&cpu2_intc 7>,
+ <&cpu3_intc 3>, <&cpu3_intc 7>,
+ <&cpu4_intc 3>, <&cpu4_intc 7>;
};
plic: interrupt-controller@c000000 {
- #interrupt-cells = <1>;
compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0";
reg = <0x0 0xc000000 0x0 0x4000000>;
- riscv,ndev = <186>;
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
interrupt-controller;
- interrupts-extended = <&cpu0_intc 11
- &cpu1_intc 11 &cpu1_intc 9
- &cpu2_intc 11 &cpu2_intc 9
- &cpu3_intc 11 &cpu3_intc 9
- &cpu4_intc 11 &cpu4_intc 9>;
+ interrupts-extended = <&cpu0_intc 11>,
+ <&cpu1_intc 11>, <&cpu1_intc 9>,
+ <&cpu2_intc 11>, <&cpu2_intc 9>,
+ <&cpu3_intc 11>, <&cpu3_intc 9>,
+ <&cpu4_intc 11>, <&cpu4_intc 9>;
+ riscv,ndev = <186>;
};
dma@3000000 {
compatible = "sifive,fu540-c000-pdma";
reg = <0x0 0x3000000 0x0 0x8000>;
interrupt-parent = <&plic>;
- interrupts = <23 24 25 26 27 28 29 30>;
+ interrupts = <23>, <24>, <25>, <26>, <27>, <28>, <29>,
+ <30>;
#dma-cells = <1>;
};
- refclk: refclk {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <600000000>;
- clock-output-names = "msspllclk";
- };
-
clkcfg: clkcfg@20002000 {
compatible = "microchip,mpfs-clkcfg";
reg = <0x0 0x20002000 0x0 0x1000>;
- reg-names = "mss_sysreg";
clocks = <&refclk>;
#clock-cells = <1>;
- clock-output-names = "cpu", "axi", "ahb", "envm", /* 0-3 */
- "mac0", "mac1", "mmc", "timer", /* 4-7 */
- "mmuart0", "mmuart1", "mmuart2", "mmuart3", /* 8-11 */
- "mmuart4", "spi0", "spi1", "i2c0", /* 12-15 */
- "i2c1", "can0", "can1", "usb", /* 16-19 */
- "rsvd", "rtc", "qspi", "gpio0", /* 20-23 */
- "gpio1", "gpio2", "ddrc", "fic0", /* 24-27 */
- "fic1", "fic2", "fic3", "athena", "cfm"; /* 28-32 */
};
serial0: serial@20000000 {
@@ -267,7 +255,7 @@
compatible = "microchip,mpfs-sd4hc", "cdns,sd4hc";
reg = <0x0 0x20008000 0x0 0x1000>;
interrupt-parent = <&plic>;
- interrupts = <88 89>;
+ interrupts = <88>, <89>;
clocks = <&clkcfg 6>;
max-frequency = <200000000>;
status = "disabled";
@@ -277,7 +265,7 @@
compatible = "cdns,macb";
reg = <0x0 0x20110000 0x0 0x2000>;
interrupt-parent = <&plic>;
- interrupts = <64 65 66 67>;
+ interrupts = <64>, <65>, <66>, <67>;
local-mac-address = [00 00 00 00 00 00];
clocks = <&clkcfg 4>, <&clkcfg 2>;
clock-names = "pclk", "hclk";
@@ -290,7 +278,7 @@
compatible = "cdns,macb";
reg = <0x0 0x20112000 0x0 0x2000>;
interrupt-parent = <&plic>;
- interrupts = <70 71 72 73>;
+ interrupts = <70>, <71>, <72>, <73>;
local-mac-address = [00 00 00 00 00 00];
clocks = <&clkcfg 5>, <&clkcfg 2>;
status = "disabled";
diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
index 0655b5c4201d..3eef52b1a59b 100644
--- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
@@ -137,20 +137,21 @@
soc {
#address-cells = <2>;
#size-cells = <2>;
- compatible = "sifive,fu540-c000", "sifive,fu540", "simple-bus";
+ compatible = "simple-bus";
ranges;
plic0: interrupt-controller@c000000 {
- #interrupt-cells = <1>;
compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0";
reg = <0x0 0xc000000 0x0 0x4000000>;
- riscv,ndev = <53>;
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
interrupt-controller;
- interrupts-extended = <
- &cpu0_intc 0xffffffff
- &cpu1_intc 0xffffffff &cpu1_intc 9
- &cpu2_intc 0xffffffff &cpu2_intc 9
- &cpu3_intc 0xffffffff &cpu3_intc 9
- &cpu4_intc 0xffffffff &cpu4_intc 9>;
+ interrupts-extended =
+ <&cpu0_intc 0xffffffff>,
+ <&cpu1_intc 0xffffffff>, <&cpu1_intc 9>,
+ <&cpu2_intc 0xffffffff>, <&cpu2_intc 9>,
+ <&cpu3_intc 0xffffffff>, <&cpu3_intc 9>,
+ <&cpu4_intc 0xffffffff>, <&cpu4_intc 9>;
+ riscv,ndev = <53>;
};
prci: clock-controller@10000000 {
compatible = "sifive,fu540-c000-prci";
@@ -170,7 +171,8 @@
compatible = "sifive,fu540-c000-pdma";
reg = <0x0 0x3000000 0x0 0x8000>;
interrupt-parent = <&plic0>;
- interrupts = <23 24 25 26 27 28 29 30>;
+ interrupts = <23>, <24>, <25>, <26>, <27>, <28>, <29>,
+ <30>;
#dma-cells = <1>;
};
uart1: serial@10011000 {
@@ -195,8 +197,8 @@
};
qspi0: spi@10040000 {
compatible = "sifive,fu540-c000-spi", "sifive,spi0";
- reg = <0x0 0x10040000 0x0 0x1000
- 0x0 0x20000000 0x0 0x10000000>;
+ reg = <0x0 0x10040000 0x0 0x1000>,
+ <0x0 0x20000000 0x0 0x10000000>;
interrupt-parent = <&plic0>;
interrupts = <51>;
clocks = <&prci PRCI_CLK_TLCLK>;
@@ -206,8 +208,8 @@
};
qspi1: spi@10041000 {
compatible = "sifive,fu540-c000-spi", "sifive,spi0";
- reg = <0x0 0x10041000 0x0 0x1000
- 0x0 0x30000000 0x0 0x10000000>;
+ reg = <0x0 0x10041000 0x0 0x1000>,
+ <0x0 0x30000000 0x0 0x10000000>;
interrupt-parent = <&plic0>;
interrupts = <52>;
clocks = <&prci PRCI_CLK_TLCLK>;
@@ -229,8 +231,8 @@
compatible = "sifive,fu540-c000-gem";
interrupt-parent = <&plic0>;
interrupts = <53>;
- reg = <0x0 0x10090000 0x0 0x2000
- 0x0 0x100a0000 0x0 0x1000>;
+ reg = <0x0 0x10090000 0x0 0x2000>,
+ <0x0 0x100a0000 0x0 0x1000>;
local-mac-address = [00 00 00 00 00 00];
clock-names = "pclk", "hclk";
clocks = <&prci PRCI_CLK_GEMGXLPLL>,
@@ -243,7 +245,7 @@
compatible = "sifive,fu540-c000-pwm", "sifive,pwm0";
reg = <0x0 0x10020000 0x0 0x1000>;
interrupt-parent = <&plic0>;
- interrupts = <42 43 44 45>;
+ interrupts = <42>, <43>, <44>, <45>;
clocks = <&prci PRCI_CLK_TLCLK>;
#pwm-cells = <3>;
status = "disabled";
@@ -252,7 +254,7 @@
compatible = "sifive,fu540-c000-pwm", "sifive,pwm0";
reg = <0x0 0x10021000 0x0 0x1000>;
interrupt-parent = <&plic0>;
- interrupts = <46 47 48 49>;
+ interrupts = <46>, <47>, <48>, <49>;
clocks = <&prci PRCI_CLK_TLCLK>;
#pwm-cells = <3>;
status = "disabled";
@@ -265,7 +267,7 @@
cache-size = <2097152>;
cache-unified;
interrupt-parent = <&plic0>;
- interrupts = <1 2 3>;
+ interrupts = <1>, <2>, <3>;
reg = <0x0 0x2010000 0x0 0x1000>;
};
gpio: gpio@10060000 {
diff --git a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
index abbb960f90a0..8464b0e3c887 100644
--- a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
@@ -147,12 +147,12 @@
reg = <0x0 0xc000000 0x0 0x4000000>;
riscv,ndev = <69>;
interrupt-controller;
- interrupts-extended = <
- &cpu0_intc 0xffffffff
- &cpu1_intc 0xffffffff &cpu1_intc 9
- &cpu2_intc 0xffffffff &cpu2_intc 9
- &cpu3_intc 0xffffffff &cpu3_intc 9
- &cpu4_intc 0xffffffff &cpu4_intc 9>;
+ interrupts-extended =
+ <&cpu0_intc 0xffffffff>,
+ <&cpu1_intc 0xffffffff>, <&cpu1_intc 9>,
+ <&cpu2_intc 0xffffffff>, <&cpu2_intc 9>,
+ <&cpu3_intc 0xffffffff>, <&cpu3_intc 9>,
+ <&cpu4_intc 0xffffffff>, <&cpu4_intc 9>;
};
prci: clock-controller@10000000 {
compatible = "sifive,fu740-c000-prci";
@@ -273,7 +273,7 @@
cache-size = <2097152>;
cache-unified;
interrupt-parent = <&plic0>;
- interrupts = <19 21 22 20>;
+ interrupts = <19>, <21>, <22>, <20>;
reg = <0x0 0x2010000 0x0 0x1000>;
};
gpio: gpio@10060000 {
diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
index ba304d4c455c..ced0d4e47938 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
@@ -76,6 +76,7 @@
spi-max-frequency = <20000000>;
voltage-ranges = <3300 3300>;
disable-wp;
+ gpios = <&gpio 11 GPIO_ACTIVE_LOW>;
};
};
diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
index 4f66919215f6..c4ed9efdff03 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
@@ -2,6 +2,7 @@
/* Copyright (c) 2020 SiFive, Inc */
#include "fu740-c000.dtsi"
+#include <dt-bindings/gpio/gpio.h>
#include <dt-bindings/interrupt-controller/irq.h>
/* Clock frequency (in Hz) of the PCB crystal for rtcclk */
@@ -38,6 +39,11 @@
clock-frequency = <RTCCLK_FREQ>;
clock-output-names = "rtcclk";
};
+
+ gpio-poweroff {
+ compatible = "gpio-poweroff";
+ gpios = <&gpio 2 GPIO_ACTIVE_LOW>;
+ };
};
&uart0 {
@@ -54,10 +60,21 @@
temperature-sensor@4c {
compatible = "ti,tmp451";
reg = <0x4c>;
+ vcc-supply = <&vdd_bpro>;
interrupt-parent = <&gpio>;
interrupts = <6 IRQ_TYPE_LEVEL_LOW>;
};
+ eeprom@54 {
+ compatible = "microchip,24c02", "atmel,24c02";
+ reg = <0x54>;
+ vcc-supply = <&vdd_bpro>;
+ label = "board-id";
+ pagesize = <16>;
+ read-only;
+ size = <256>;
+ };
+
pmic@58 {
compatible = "dlg,da9063";
reg = <0x58>;
@@ -65,48 +82,44 @@
interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
interrupt-controller;
- regulators {
- vdd_bcore1: bcore1 {
- regulator-min-microvolt = <900000>;
- regulator-max-microvolt = <900000>;
- regulator-min-microamp = <5000000>;
- regulator-max-microamp = <5000000>;
- regulator-always-on;
- };
+ onkey {
+ compatible = "dlg,da9063-onkey";
+ };
+
+ rtc {
+ compatible = "dlg,da9063-rtc";
+ };
- vdd_bcore2: bcore2 {
- regulator-min-microvolt = <900000>;
- regulator-max-microvolt = <900000>;
- regulator-min-microamp = <5000000>;
- regulator-max-microamp = <5000000>;
+ wdt {
+ compatible = "dlg,da9063-watchdog";
+ };
+
+ regulators {
+ vdd_bcore: bcores-merged {
+ regulator-min-microvolt = <1050000>;
+ regulator-max-microvolt = <1050000>;
+ regulator-min-microamp = <4800000>;
+ regulator-max-microamp = <4800000>;
regulator-always-on;
};
vdd_bpro: bpro {
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
- regulator-min-microamp = <2500000>;
- regulator-max-microamp = <2500000>;
+ regulator-min-microamp = <2400000>;
+ regulator-max-microamp = <2400000>;
regulator-always-on;
};
vdd_bperi: bperi {
- regulator-min-microvolt = <1050000>;
- regulator-max-microvolt = <1050000>;
+ regulator-min-microvolt = <1060000>;
+ regulator-max-microvolt = <1060000>;
regulator-min-microamp = <1500000>;
regulator-max-microamp = <1500000>;
regulator-always-on;
};
- vdd_bmem: bmem {
- regulator-min-microvolt = <1200000>;
- regulator-max-microvolt = <1200000>;
- regulator-min-microamp = <3000000>;
- regulator-max-microamp = <3000000>;
- regulator-always-on;
- };
-
- vdd_bio: bio {
+ vdd_bmem_bio: bmem-bio-merged {
regulator-min-microvolt = <1200000>;
regulator-max-microvolt = <1200000>;
regulator-min-microamp = <3000000>;
@@ -117,86 +130,66 @@
vdd_ldo1: ldo1 {
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
- regulator-min-microamp = <100000>;
- regulator-max-microamp = <100000>;
regulator-always-on;
};
vdd_ldo2: ldo2 {
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
- regulator-min-microamp = <200000>;
- regulator-max-microamp = <200000>;
regulator-always-on;
};
vdd_ldo3: ldo3 {
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
- regulator-min-microamp = <200000>;
- regulator-max-microamp = <200000>;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
regulator-always-on;
};
vdd_ldo4: ldo4 {
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
- regulator-min-microamp = <200000>;
- regulator-max-microamp = <200000>;
+ regulator-min-microvolt = <2500000>;
+ regulator-max-microvolt = <2500000>;
regulator-always-on;
};
vdd_ldo5: ldo5 {
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
- regulator-min-microamp = <100000>;
- regulator-max-microamp = <100000>;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
regulator-always-on;
};
vdd_ldo6: ldo6 {
- regulator-min-microvolt = <3300000>;
- regulator-max-microvolt = <3300000>;
- regulator-min-microamp = <200000>;
- regulator-max-microamp = <200000>;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
regulator-always-on;
};
vdd_ldo7: ldo7 {
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
- regulator-min-microamp = <200000>;
- regulator-max-microamp = <200000>;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
regulator-always-on;
};
vdd_ldo8: ldo8 {
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
- regulator-min-microamp = <200000>;
- regulator-max-microamp = <200000>;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
regulator-always-on;
};
vdd_ld09: ldo9 {
regulator-min-microvolt = <1050000>;
regulator-max-microvolt = <1050000>;
- regulator-min-microamp = <200000>;
- regulator-max-microamp = <200000>;
+ regulator-always-on;
};
vdd_ldo10: ldo10 {
regulator-min-microvolt = <1000000>;
regulator-max-microvolt = <1000000>;
- regulator-min-microamp = <300000>;
- regulator-max-microamp = <300000>;
+ regulator-always-on;
};
vdd_ldo11: ldo11 {
regulator-min-microvolt = <2500000>;
regulator-max-microvolt = <2500000>;
- regulator-min-microamp = <300000>;
- regulator-max-microamp = <300000>;
regulator-always-on;
};
};
@@ -223,6 +216,7 @@
spi-max-frequency = <20000000>;
voltage-ranges = <3300 3300>;
disable-wp;
+ gpios = <&gpio 15 GPIO_ACTIVE_LOW>;
};
};
@@ -245,4 +239,8 @@
&gpio {
status = "okay";
+ gpio-line-names = "J29.1", "PMICNTB", "PMICSHDN", "J8.1", "J8.3",
+ "PCIe_PWREN", "THERM", "UBRDG_RSTN", "PCIe_PERSTN",
+ "ULPI_RSTN", "J8.2", "UHUB_RSTN", "GEMGXL_RST", "J8.4",
+ "EN_VDD_SD", "SD_CD";
};
diff --git a/arch/riscv/boot/dts/starfive/Makefile b/arch/riscv/boot/dts/starfive/Makefile
new file mode 100644
index 000000000000..0ea1bc15ab30
--- /dev/null
+++ b/arch/riscv/boot/dts/starfive/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+dtb-$(CONFIG_SOC_STARFIVE) += jh7100-beaglev-starlight.dtb
diff --git a/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts b/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts
new file mode 100644
index 000000000000..c9af67f7a0d2
--- /dev/null
+++ b/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2021 StarFive Technology Co., Ltd.
+ * Copyright (C) 2021 Emil Renner Berthing <kernel@esmil.dk>
+ */
+
+/dts-v1/;
+#include "jh7100.dtsi"
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/leds/common.h>
+#include <dt-bindings/pinctrl/pinctrl-starfive.h>
+
+/ {
+ model = "BeagleV Starlight Beta";
+ compatible = "beagle,beaglev-starlight-jh7100-r0", "starfive,jh7100";
+
+ aliases {
+ serial0 = &uart3;
+ };
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
+ cpus {
+ timebase-frequency = <6250000>;
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x0 0x80000000 0x2 0x0>;
+ };
+
+ leds {
+ compatible = "gpio-leds";
+
+ led-ack {
+ gpios = <&gpio 43 GPIO_ACTIVE_HIGH>;
+ color = <LED_COLOR_ID_GREEN>;
+ function = LED_FUNCTION_HEARTBEAT;
+ linux,default-trigger = "heartbeat";
+ label = "ack";
+ };
+ };
+};
+
+&gpio {
+ i2c0_pins: i2c0-0 {
+ i2c-pins {
+ pinmux = <GPIOMUX(62, GPO_LOW,
+ GPO_I2C0_PAD_SCK_OEN,
+ GPI_I2C0_PAD_SCK_IN)>,
+ <GPIOMUX(61, GPO_LOW,
+ GPO_I2C0_PAD_SDA_OEN,
+ GPI_I2C0_PAD_SDA_IN)>;
+ bias-disable; /* external pull-up */
+ input-enable;
+ input-schmitt-enable;
+ };
+ };
+
+ i2c1_pins: i2c1-0 {
+ i2c-pins {
+ pinmux = <GPIOMUX(47, GPO_LOW,
+ GPO_I2C1_PAD_SCK_OEN,
+ GPI_I2C1_PAD_SCK_IN)>,
+ <GPIOMUX(48, GPO_LOW,
+ GPO_I2C1_PAD_SDA_OEN,
+ GPI_I2C1_PAD_SDA_IN)>;
+ bias-pull-up;
+ input-enable;
+ input-schmitt-enable;
+ };
+ };
+
+ i2c2_pins: i2c2-0 {
+ i2c-pins {
+ pinmux = <GPIOMUX(60, GPO_LOW,
+ GPO_I2C2_PAD_SCK_OEN,
+ GPI_I2C2_PAD_SCK_IN)>,
+ <GPIOMUX(59, GPO_LOW,
+ GPO_I2C2_PAD_SDA_OEN,
+ GPI_I2C2_PAD_SDA_IN)>;
+ bias-disable; /* external pull-up */
+ input-enable;
+ input-schmitt-enable;
+ };
+ };
+
+ uart3_pins: uart3-0 {
+ rx-pins {
+ pinmux = <GPIOMUX(13, GPO_LOW, GPO_DISABLE,
+ GPI_UART3_PAD_SIN)>;
+ bias-pull-up;
+ drive-strength = <14>;
+ input-enable;
+ input-schmitt-enable;
+ slew-rate = <0>;
+ };
+ tx-pins {
+ pinmux = <GPIOMUX(14, GPO_UART3_PAD_SOUT,
+ GPO_ENABLE, GPI_NONE)>;
+ bias-disable;
+ drive-strength = <35>;
+ input-disable;
+ input-schmitt-disable;
+ slew-rate = <0>;
+ };
+ };
+};
+
+&i2c0 {
+ clock-frequency = <100000>;
+ i2c-sda-hold-time-ns = <300>;
+ i2c-sda-falling-time-ns = <500>;
+ i2c-scl-falling-time-ns = <500>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c0_pins>;
+ status = "okay";
+
+ pmic@5e {
+ compatible = "ti,tps65086";
+ reg = <0x5e>;
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ regulators {
+ };
+ };
+};
+
+&i2c1 {
+ clock-frequency = <400000>;
+ i2c-sda-hold-time-ns = <300>;
+ i2c-sda-falling-time-ns = <100>;
+ i2c-scl-falling-time-ns = <100>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c1_pins>;
+ status = "okay";
+};
+
+&i2c2 {
+ clock-frequency = <100000>;
+ i2c-sda-hold-time-ns = <300>;
+ i2c-sda-falling-time-ns = <500>;
+ i2c-scl-falling-time-ns = <500>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c2_pins>;
+ status = "okay";
+};
+
+&osc_sys {
+ clock-frequency = <25000000>;
+};
+
+&osc_aud {
+ clock-frequency = <27000000>;
+};
+
+&uart3 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&uart3_pins>;
+ status = "okay";
+};
diff --git a/arch/riscv/boot/dts/starfive/jh7100.dtsi b/arch/riscv/boot/dts/starfive/jh7100.dtsi
new file mode 100644
index 000000000000..69f22f9aad9d
--- /dev/null
+++ b/arch/riscv/boot/dts/starfive/jh7100.dtsi
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2021 StarFive Technology Co., Ltd.
+ * Copyright (C) 2021 Emil Renner Berthing <kernel@esmil.dk>
+ */
+
+/dts-v1/;
+#include <dt-bindings/clock/starfive-jh7100.h>
+#include <dt-bindings/reset/starfive-jh7100.h>
+
+/ {
+ compatible = "starfive,jh7100";
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ compatible = "sifive,u74-mc", "riscv";
+ reg = <0>;
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <32>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <64>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <32>;
+ mmu-type = "riscv,sv39";
+ riscv,isa = "rv64imafdc";
+ tlb-split;
+
+ cpu0_intc: interrupt-controller {
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+ };
+
+ cpu@1 {
+ compatible = "sifive,u74-mc", "riscv";
+ reg = <1>;
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <32>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <64>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <32>;
+ mmu-type = "riscv,sv39";
+ riscv,isa = "rv64imafdc";
+ tlb-split;
+
+ cpu1_intc: interrupt-controller {
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+ };
+ };
+
+ osc_sys: osc_sys {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ /* This value must be overridden by the board */
+ clock-frequency = <0>;
+ };
+
+ osc_aud: osc_aud {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ /* This value must be overridden by the board */
+ clock-frequency = <0>;
+ };
+
+ gmac_rmii_ref: gmac_rmii_ref {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ /* Should be overridden by the board when needed */
+ clock-frequency = <0>;
+ };
+
+ gmac_gr_mii_rxclk: gmac_gr_mii_rxclk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ /* Should be overridden by the board when needed */
+ clock-frequency = <0>;
+ };
+
+ soc {
+ compatible = "simple-bus";
+ interrupt-parent = <&plic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ clint: clint@2000000 {
+ compatible = "starfive,jh7100-clint", "sifive,clint0";
+ reg = <0x0 0x2000000 0x0 0x10000>;
+ interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7
+ &cpu1_intc 3 &cpu1_intc 7>;
+ };
+
+ plic: interrupt-controller@c000000 {
+ compatible = "starfive,jh7100-plic", "sifive,plic-1.0.0";
+ reg = <0x0 0xc000000 0x0 0x4000000>;
+ interrupts-extended = <&cpu0_intc 11 &cpu0_intc 9
+ &cpu1_intc 11 &cpu1_intc 9>;
+ interrupt-controller;
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
+ riscv,ndev = <127>;
+ };
+
+ clkgen: clock-controller@11800000 {
+ compatible = "starfive,jh7100-clkgen";
+ reg = <0x0 0x11800000 0x0 0x10000>;
+ clocks = <&osc_sys>, <&osc_aud>, <&gmac_rmii_ref>, <&gmac_gr_mii_rxclk>;
+ clock-names = "osc_sys", "osc_aud", "gmac_rmii_ref", "gmac_gr_mii_rxclk";
+ #clock-cells = <1>;
+ };
+
+ rstgen: reset-controller@11840000 {
+ compatible = "starfive,jh7100-reset";
+ reg = <0x0 0x11840000 0x0 0x10000>;
+ #reset-cells = <1>;
+ };
+
+ i2c0: i2c@118b0000 {
+ compatible = "snps,designware-i2c";
+ reg = <0x0 0x118b0000 0x0 0x10000>;
+ clocks = <&clkgen JH7100_CLK_I2C0_CORE>,
+ <&clkgen JH7100_CLK_I2C0_APB>;
+ clock-names = "ref", "pclk";
+ resets = <&rstgen JH7100_RSTN_I2C0_APB>;
+ interrupts = <96>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
+ i2c1: i2c@118c0000 {
+ compatible = "snps,designware-i2c";
+ reg = <0x0 0x118c0000 0x0 0x10000>;
+ clocks = <&clkgen JH7100_CLK_I2C1_CORE>,
+ <&clkgen JH7100_CLK_I2C1_APB>;
+ clock-names = "ref", "pclk";
+ resets = <&rstgen JH7100_RSTN_I2C1_APB>;
+ interrupts = <97>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
+ gpio: pinctrl@11910000 {
+ compatible = "starfive,jh7100-pinctrl";
+ reg = <0x0 0x11910000 0x0 0x10000>,
+ <0x0 0x11858000 0x0 0x1000>;
+ reg-names = "gpio", "padctl";
+ clocks = <&clkgen JH7100_CLK_GPIO_APB>;
+ resets = <&rstgen JH7100_RSTN_GPIO_APB>;
+ interrupts = <32>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+ uart2: serial@12430000 {
+ compatible = "starfive,jh7100-uart", "snps,dw-apb-uart";
+ reg = <0x0 0x12430000 0x0 0x10000>;
+ clocks = <&clkgen JH7100_CLK_UART2_CORE>,
+ <&clkgen JH7100_CLK_UART2_APB>;
+ clock-names = "baudclk", "apb_pclk";
+ resets = <&rstgen JH7100_RSTN_UART2_APB>;
+ interrupts = <72>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ status = "disabled";
+ };
+
+ uart3: serial@12440000 {
+ compatible = "starfive,jh7100-uart", "snps,dw-apb-uart";
+ reg = <0x0 0x12440000 0x0 0x10000>;
+ clocks = <&clkgen JH7100_CLK_UART3_CORE>,
+ <&clkgen JH7100_CLK_UART3_APB>;
+ clock-names = "baudclk", "apb_pclk";
+ resets = <&rstgen JH7100_RSTN_UART3_APB>;
+ interrupts = <73>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ status = "disabled";
+ };
+
+ i2c2: i2c@12450000 {
+ compatible = "snps,designware-i2c";
+ reg = <0x0 0x12450000 0x0 0x10000>;
+ clocks = <&clkgen JH7100_CLK_I2C2_CORE>,
+ <&clkgen JH7100_CLK_I2C2_APB>;
+ clock-names = "ref", "pclk";
+ resets = <&rstgen JH7100_RSTN_I2C2_APB>;
+ interrupts = <74>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
+ i2c3: i2c@12460000 {
+ compatible = "snps,designware-i2c";
+ reg = <0x0 0x12460000 0x0 0x10000>;
+ clocks = <&clkgen JH7100_CLK_I2C3_CORE>,
+ <&clkgen JH7100_CLK_I2C3_APB>;
+ clock-names = "ref", "pclk";
+ resets = <&rstgen JH7100_RSTN_I2C3_APB>;
+ interrupts = <75>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+ };
+};
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index c252fd5706d2..f120fcc43d0a 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -2,6 +2,7 @@ CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_SYSCALL=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_CGROUPS=y
@@ -13,12 +14,14 @@ CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
-CONFIG_BPF_SYSCALL=y
+# CONFIG_SYSFS_SYSCALL is not set
+CONFIG_SOC_MICROCHIP_POLARFIRE=y
CONFIG_SOC_SIFIVE=y
CONFIG_SOC_VIRT=y
-CONFIG_SOC_MICROCHIP_POLARFIRE=y
CONFIG_SMP=y
CONFIG_HOTPLUG_CPU=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM=m
CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
@@ -68,14 +71,14 @@ CONFIG_HW_RANDOM=y
CONFIG_HW_RANDOM_VIRTIO=y
CONFIG_SPI=y
CONFIG_SPI_SIFIVE=y
+# CONFIG_PTP_1588_CLOCK is not set
CONFIG_GPIOLIB=y
CONFIG_GPIO_SIFIVE=y
-# CONFIG_PTP_1588_CLOCK is not set
-CONFIG_POWER_RESET=y
CONFIG_DRM=m
CONFIG_DRM_RADEON=m
CONFIG_DRM_NOUVEAU=m
CONFIG_DRM_VIRTIO_GPU=m
+CONFIG_FB=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_USB=y
CONFIG_USB_XHCI_HCD=y
@@ -86,10 +89,10 @@ CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_HCD_PLATFORM=y
CONFIG_USB_STORAGE=y
CONFIG_USB_UAS=y
+CONFIG_MMC=y
CONFIG_MMC_SDHCI=y
CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_SDHCI_CADENCE=y
-CONFIG_MMC=y
CONFIG_MMC_SPI=y
CONFIG_RTC_CLASS=y
CONFIG_VIRTIO_PCI=y
@@ -140,5 +143,3 @@ CONFIG_RCU_EQS_DEBUG=y
# CONFIG_FTRACE is not set
# CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_MEMTEST=y
-# CONFIG_SYSFS_SYSCALL is not set
-CONFIG_EFI=y
diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig
index b16a2a12c82a..3f42ed87dde8 100644
--- a/arch/riscv/configs/nommu_k210_defconfig
+++ b/arch/riscv/configs/nommu_k210_defconfig
@@ -29,8 +29,6 @@ CONFIG_EMBEDDED=y
CONFIG_SLOB=y
# CONFIG_MMU is not set
CONFIG_SOC_CANAAN=y
-CONFIG_SOC_CANAAN_K210_DTB_SOURCE="k210_generic"
-CONFIG_MAXPHYSMEM_2GB=y
CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_CMDLINE="earlycon console=ttySIF0"
@@ -75,7 +73,6 @@ CONFIG_LEDS_GPIO=y
CONFIG_LEDS_USER=y
# CONFIG_VIRTIO_MENU is not set
# CONFIG_VHOST_MENU is not set
-# CONFIG_SURFACE_PLATFORMS is not set
# CONFIG_FILE_LOCKING is not set
# CONFIG_DNOTIFY is not set
# CONFIG_INOTIFY_USER is not set
diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig
index 61f887f65419..2a82a3b2992b 100644
--- a/arch/riscv/configs/nommu_k210_sdcard_defconfig
+++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig
@@ -21,8 +21,6 @@ CONFIG_EMBEDDED=y
CONFIG_SLOB=y
# CONFIG_MMU is not set
CONFIG_SOC_CANAAN=y
-CONFIG_SOC_CANAAN_K210_DTB_SOURCE="k210_generic"
-CONFIG_MAXPHYSMEM_2GB=y
CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_CMDLINE="earlycon console=ttySIF0 rootdelay=2 root=/dev/mmcblk0p1 ro"
@@ -30,7 +28,6 @@ CONFIG_CMDLINE_FORCE=y
# CONFIG_SECCOMP is not set
# CONFIG_STACKPROTECTOR is not set
# CONFIG_GCC_PLUGINS is not set
-# CONFIG_BLK_DEV_BSG is not set
# CONFIG_MQ_IOSCHED_DEADLINE is not set
# CONFIG_MQ_IOSCHED_KYBER is not set
CONFIG_BINFMT_FLAT=y
@@ -72,7 +69,6 @@ CONFIG_LEDS_GPIO=y
CONFIG_LEDS_USER=y
# CONFIG_VIRTIO_MENU is not set
# CONFIG_VHOST_MENU is not set
-# CONFIG_SURFACE_PLATFORMS is not set
CONFIG_EXT2_FS=y
# CONFIG_FILE_LOCKING is not set
# CONFIG_DNOTIFY is not set
diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig
index e046a0babde4..e1c9864b6237 100644
--- a/arch/riscv/configs/nommu_virt_defconfig
+++ b/arch/riscv/configs/nommu_virt_defconfig
@@ -24,15 +24,12 @@ CONFIG_EXPERT=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
-# CONFIG_SLAB_MERGE_DEFAULT is not set
# CONFIG_MMU is not set
CONFIG_SOC_VIRT=y
-CONFIG_MAXPHYSMEM_2GB=y
CONFIG_SMP=y
CONFIG_CMDLINE="root=/dev/vda rw earlycon=uart8250,mmio,0x10000000,115200n8 console=ttyS0"
CONFIG_CMDLINE_FORCE=y
CONFIG_JUMP_LABEL=y
-# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
# CONFIG_MSDOS_PARTITION is not set
# CONFIG_EFI_PARTITION is not set
diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
index 434ef5b64599..8b56a7f1eb06 100644
--- a/arch/riscv/configs/rv32_defconfig
+++ b/arch/riscv/configs/rv32_defconfig
@@ -2,6 +2,7 @@ CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_SYSCALL=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_CGROUPS=y
@@ -13,12 +14,14 @@ CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
-CONFIG_BPF_SYSCALL=y
+# CONFIG_SYSFS_SYSCALL is not set
CONFIG_SOC_SIFIVE=y
CONFIG_SOC_VIRT=y
CONFIG_ARCH_RV32I=y
CONFIG_SMP=y
CONFIG_HOTPLUG_CPU=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM=m
CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
@@ -67,10 +70,10 @@ CONFIG_HW_RANDOM_VIRTIO=y
CONFIG_SPI=y
CONFIG_SPI_SIFIVE=y
# CONFIG_PTP_1588_CLOCK is not set
-CONFIG_POWER_RESET=y
CONFIG_DRM=y
CONFIG_DRM_RADEON=y
CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_FB=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_USB=y
CONFIG_USB_XHCI_HCD=y
@@ -130,4 +133,3 @@ CONFIG_RCU_EQS_DEBUG=y
# CONFIG_FTRACE is not set
# CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_MEMTEST=y
-# CONFIG_SYSFS_SYSCALL is not set
diff --git a/arch/riscv/errata/alternative.c b/arch/riscv/errata/alternative.c
index 3b15885db70b..e8b4a0fe488c 100644
--- a/arch/riscv/errata/alternative.c
+++ b/arch/riscv/errata/alternative.c
@@ -22,7 +22,8 @@ static struct cpu_manufacturer_info_t {
} cpu_mfr_info;
static void (*vendor_patch_func)(struct alt_entry *begin, struct alt_entry *end,
- unsigned long archid, unsigned long impid);
+ unsigned long archid,
+ unsigned long impid) __initdata;
static inline void __init riscv_fill_cpu_mfr_info(void)
{
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 445ccc97305a..57b86fd9916c 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
generic-y += early_ioremap.h
-generic-y += extable.h
generic-y += flat.h
generic-y += kvm_para.h
generic-y += user.h
diff --git a/arch/riscv/include/asm/asm-extable.h b/arch/riscv/include/asm/asm-extable.h
new file mode 100644
index 000000000000..14be0673f5b5
--- /dev/null
+++ b/arch/riscv/include/asm/asm-extable.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_ASM_EXTABLE_H
+#define __ASM_ASM_EXTABLE_H
+
+#define EX_TYPE_NONE 0
+#define EX_TYPE_FIXUP 1
+#define EX_TYPE_BPF 2
+#define EX_TYPE_UACCESS_ERR_ZERO 3
+
+#ifdef __ASSEMBLY__
+
+#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
+ .pushsection __ex_table, "a"; \
+ .balign 4; \
+ .long ((insn) - .); \
+ .long ((fixup) - .); \
+ .short (type); \
+ .short (data); \
+ .popsection;
+
+ .macro _asm_extable, insn, fixup
+ __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0)
+ .endm
+
+#else /* __ASSEMBLY__ */
+
+#include <linux/bits.h>
+#include <linux/stringify.h>
+#include <asm/gpr-num.h>
+
+#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
+ ".pushsection __ex_table, \"a\"\n" \
+ ".balign 4\n" \
+ ".long ((" insn ") - .)\n" \
+ ".long ((" fixup ") - .)\n" \
+ ".short (" type ")\n" \
+ ".short (" data ")\n" \
+ ".popsection\n"
+
+#define _ASM_EXTABLE(insn, fixup) \
+ __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0")
+
+#define EX_DATA_REG_ERR_SHIFT 0
+#define EX_DATA_REG_ERR GENMASK(4, 0)
+#define EX_DATA_REG_ZERO_SHIFT 5
+#define EX_DATA_REG_ZERO GENMASK(9, 5)
+
+#define EX_DATA_REG(reg, gpr) \
+ "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
+
+#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \
+ __DEFINE_ASM_GPR_NUMS \
+ __ASM_EXTABLE_RAW(#insn, #fixup, \
+ __stringify(EX_TYPE_UACCESS_ERR_ZERO), \
+ "(" \
+ EX_DATA_REG(ERR, err) " | " \
+ EX_DATA_REG(ZERO, zero) \
+ ")")
+
+#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero)
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_ASM_EXTABLE_H */
diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h
index 396a3303c537..3540b690944b 100644
--- a/arch/riscv/include/asm/bitops.h
+++ b/arch/riscv/include/asm/bitops.h
@@ -20,7 +20,6 @@
#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/ffs.h>
diff --git a/arch/riscv/include/asm/cpu_ops.h b/arch/riscv/include/asm/cpu_ops.h
index a8ec3c5c1bd2..134590f1b843 100644
--- a/arch/riscv/include/asm/cpu_ops.h
+++ b/arch/riscv/include/asm/cpu_ops.h
@@ -40,7 +40,5 @@ struct cpu_operations {
extern const struct cpu_operations *cpu_ops[NR_CPUS];
void __init cpu_set_ops(int cpu);
-void cpu_update_secondary_bootdata(unsigned int cpuid,
- struct task_struct *tidle);
#endif /* ifndef __ASM_CPU_OPS_H */
diff --git a/arch/riscv/include/asm/cpu_ops_sbi.h b/arch/riscv/include/asm/cpu_ops_sbi.h
new file mode 100644
index 000000000000..56e4b76d09ff
--- /dev/null
+++ b/arch/riscv/include/asm/cpu_ops_sbi.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021 by Rivos Inc.
+ */
+#ifndef __ASM_CPU_OPS_SBI_H
+#define __ASM_CPU_OPS_SBI_H
+
+#ifndef __ASSEMBLY__
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/threads.h>
+
+/**
+ * struct sbi_hart_boot_data - Hart specific boot used during booting and
+ * cpu hotplug.
+ * @task_ptr: A pointer to the hart specific tp
+ * @stack_ptr: A pointer to the hart specific sp
+ */
+struct sbi_hart_boot_data {
+ void *task_ptr;
+ void *stack_ptr;
+};
+#endif
+
+#endif /* ifndef __ASM_CPU_OPS_SBI_H */
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 5046f431645c..ae711692eec9 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -40,14 +40,13 @@
#ifndef CONFIG_64BIT
#define SATP_PPN _AC(0x003FFFFF, UL)
#define SATP_MODE_32 _AC(0x80000000, UL)
-#define SATP_MODE SATP_MODE_32
#define SATP_ASID_BITS 9
#define SATP_ASID_SHIFT 22
#define SATP_ASID_MASK _AC(0x1FF, UL)
#else
#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
#define SATP_MODE_39 _AC(0x8000000000000000, UL)
-#define SATP_MODE SATP_MODE_39
+#define SATP_MODE_48 _AC(0x9000000000000000, UL)
#define SATP_ASID_BITS 16
#define SATP_ASID_SHIFT 44
#define SATP_ASID_MASK _AC(0xFFFF, UL)
diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h
index 49b398fe99f1..cc4f6787f937 100644
--- a/arch/riscv/include/asm/efi.h
+++ b/arch/riscv/include/asm/efi.h
@@ -13,7 +13,6 @@
#ifdef CONFIG_EFI
extern void efi_init(void);
-extern void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
#else
#define efi_init()
#endif
diff --git a/arch/riscv/include/asm/extable.h b/arch/riscv/include/asm/extable.h
new file mode 100644
index 000000000000..512012d193dc
--- /dev/null
+++ b/arch/riscv/include/asm/extable.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_EXTABLE_H
+#define _ASM_RISCV_EXTABLE_H
+
+/*
+ * The exception table consists of pairs of relative offsets: the first
+ * is the relative offset to an instruction that is allowed to fault,
+ * and the second is the relative offset at which the program should
+ * continue. No registers are modified, so it is entirely up to the
+ * continuation code to figure out what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path. This means when everything is well,
+ * we don't even have to jump over them. Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry {
+ int insn, fixup;
+ short type, data;
+};
+
+#define ARCH_HAS_RELATIVE_EXTABLE
+
+#define swap_ex_entry_fixup(a, b, tmp, delta) \
+do { \
+ (a)->fixup = (b)->fixup + (delta); \
+ (b)->fixup = (tmp).fixup - (delta); \
+ (a)->type = (b)->type; \
+ (b)->type = (tmp).type; \
+ (a)->data = (b)->data; \
+ (b)->data = (tmp).data; \
+} while (0)
+
+bool fixup_exception(struct pt_regs *regs);
+
+#if defined(CONFIG_BPF_JIT) && defined(CONFIG_ARCH_RV64I)
+bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs);
+#else
+static inline bool
+ex_handler_bpf(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
+{
+ return false;
+}
+#endif
+
+#endif
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 54cbf07fb4e9..58a718573ad6 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -24,6 +24,7 @@ enum fixed_addresses {
FIX_HOLE,
FIX_PTE,
FIX_PMD,
+ FIX_PUD,
FIX_TEXT_POKE1,
FIX_TEXT_POKE0,
FIX_EARLYCON_MEM_BASE,
diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
index 1b00badb9f87..fc8130f995c1 100644
--- a/arch/riscv/include/asm/futex.h
+++ b/arch/riscv/include/asm/futex.h
@@ -11,6 +11,7 @@
#include <linux/uaccess.h>
#include <linux/errno.h>
#include <asm/asm.h>
+#include <asm/asm-extable.h>
/* We don't even really need the extable code, but for now keep it simple */
#ifndef CONFIG_MMU
@@ -20,23 +21,14 @@
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
{ \
- uintptr_t tmp; \
__enable_user_access(); \
__asm__ __volatile__ ( \
"1: " insn " \n" \
"2: \n" \
- " .section .fixup,\"ax\" \n" \
- " .balign 4 \n" \
- "3: li %[r],%[e] \n" \
- " jump 2b,%[t] \n" \
- " .previous \n" \
- " .section __ex_table,\"a\" \n" \
- " .balign " RISCV_SZPTR " \n" \
- " " RISCV_PTR " 1b, 3b \n" \
- " .previous \n" \
+ _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %[r]) \
: [r] "+r" (ret), [ov] "=&r" (oldval), \
- [u] "+m" (*uaddr), [t] "=&r" (tmp) \
- : [op] "Jr" (oparg), [e] "i" (-EFAULT) \
+ [u] "+m" (*uaddr) \
+ : [op] "Jr" (oparg) \
: "memory"); \
__disable_user_access(); \
}
@@ -98,18 +90,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
"2: sc.w.aqrl %[t],%z[nv],%[u] \n"
" bnez %[t],1b \n"
"3: \n"
- " .section .fixup,\"ax\" \n"
- " .balign 4 \n"
- "4: li %[r],%[e] \n"
- " jump 3b,%[t] \n"
- " .previous \n"
- " .section __ex_table,\"a\" \n"
- " .balign " RISCV_SZPTR " \n"
- " " RISCV_PTR " 1b, 4b \n"
- " " RISCV_PTR " 2b, 4b \n"
- " .previous \n"
+ _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %[r]) \
+ _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %[r]) \
: [r] "+r" (ret), [v] "=&r" (val), [u] "+m" (*uaddr), [t] "=&r" (tmp)
- : [ov] "Jr" (oldval), [nv] "Jr" (newval), [e] "i" (-EFAULT)
+ : [ov] "Jr" (oldval), [nv] "Jr" (newval)
: "memory");
__disable_user_access();
diff --git a/arch/riscv/include/asm/gpr-num.h b/arch/riscv/include/asm/gpr-num.h
new file mode 100644
index 000000000000..dfee2829fc7c
--- /dev/null
+++ b/arch/riscv/include/asm/gpr-num.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_GPR_NUM_H
+#define __ASM_GPR_NUM_H
+
+#ifdef __ASSEMBLY__
+ .equ .L__gpr_num_zero, 0
+ .equ .L__gpr_num_ra, 1
+ .equ .L__gpr_num_sp, 2
+ .equ .L__gpr_num_gp, 3
+ .equ .L__gpr_num_tp, 4
+ .equ .L__gpr_num_t0, 5
+ .equ .L__gpr_num_t1, 6
+ .equ .L__gpr_num_t2, 7
+ .equ .L__gpr_num_s0, 8
+ .equ .L__gpr_num_s1, 9
+ .equ .L__gpr_num_a0, 10
+ .equ .L__gpr_num_a1, 11
+ .equ .L__gpr_num_a2, 12
+ .equ .L__gpr_num_a3, 13
+ .equ .L__gpr_num_a4, 14
+ .equ .L__gpr_num_a5, 15
+ .equ .L__gpr_num_a6, 16
+ .equ .L__gpr_num_a7, 17
+ .equ .L__gpr_num_s2, 18
+ .equ .L__gpr_num_s3, 19
+ .equ .L__gpr_num_s4, 20
+ .equ .L__gpr_num_s5, 21
+ .equ .L__gpr_num_s6, 22
+ .equ .L__gpr_num_s7, 23
+ .equ .L__gpr_num_s8, 24
+ .equ .L__gpr_num_s9, 25
+ .equ .L__gpr_num_s10, 26
+ .equ .L__gpr_num_s11, 27
+ .equ .L__gpr_num_t3, 28
+ .equ .L__gpr_num_t4, 29
+ .equ .L__gpr_num_t5, 30
+ .equ .L__gpr_num_t6, 31
+
+#else /* __ASSEMBLY__ */
+
+#define __DEFINE_ASM_GPR_NUMS \
+" .equ .L__gpr_num_zero, 0\n" \
+" .equ .L__gpr_num_ra, 1\n" \
+" .equ .L__gpr_num_sp, 2\n" \
+" .equ .L__gpr_num_gp, 3\n" \
+" .equ .L__gpr_num_tp, 4\n" \
+" .equ .L__gpr_num_t0, 5\n" \
+" .equ .L__gpr_num_t1, 6\n" \
+" .equ .L__gpr_num_t2, 7\n" \
+" .equ .L__gpr_num_s0, 8\n" \
+" .equ .L__gpr_num_s1, 9\n" \
+" .equ .L__gpr_num_a0, 10\n" \
+" .equ .L__gpr_num_a1, 11\n" \
+" .equ .L__gpr_num_a2, 12\n" \
+" .equ .L__gpr_num_a3, 13\n" \
+" .equ .L__gpr_num_a4, 14\n" \
+" .equ .L__gpr_num_a5, 15\n" \
+" .equ .L__gpr_num_a6, 16\n" \
+" .equ .L__gpr_num_a7, 17\n" \
+" .equ .L__gpr_num_s2, 18\n" \
+" .equ .L__gpr_num_s3, 19\n" \
+" .equ .L__gpr_num_s4, 20\n" \
+" .equ .L__gpr_num_s5, 21\n" \
+" .equ .L__gpr_num_s6, 22\n" \
+" .equ .L__gpr_num_s7, 23\n" \
+" .equ .L__gpr_num_s8, 24\n" \
+" .equ .L__gpr_num_s9, 25\n" \
+" .equ .L__gpr_num_s10, 26\n" \
+" .equ .L__gpr_num_s11, 27\n" \
+" .equ .L__gpr_num_t3, 28\n" \
+" .equ .L__gpr_num_t4, 29\n" \
+" .equ .L__gpr_num_t5, 30\n" \
+" .equ .L__gpr_num_t6, 31\n"
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_GPR_NUM_H */
diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h
index b00f503ec124..0b85e363e778 100644
--- a/arch/riscv/include/asm/kasan.h
+++ b/arch/riscv/include/asm/kasan.h
@@ -27,13 +27,18 @@
*/
#define KASAN_SHADOW_SCALE_SHIFT 3
-#define KASAN_SHADOW_SIZE (UL(1) << ((CONFIG_VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT))
-#define KASAN_SHADOW_START KERN_VIRT_START
-#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
+#define KASAN_SHADOW_SIZE (UL(1) << ((VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT))
+/*
+ * Depending on the size of the virtual address space, the region may not be
+ * aligned on PGDIR_SIZE, so force its alignment to ease its population.
+ */
+#define KASAN_SHADOW_START ((KASAN_SHADOW_END - KASAN_SHADOW_SIZE) & PGDIR_MASK)
+#define KASAN_SHADOW_END MODULES_LOWEST_VADDR
#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
void kasan_init(void);
asmlinkage void kasan_early_init(void);
+void kasan_swapper_init(void);
#endif
#endif
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 25ba21f98504..99ef6a120617 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -12,14 +12,12 @@
#include <linux/types.h>
#include <linux/kvm.h>
#include <linux/kvm_types.h>
+#include <asm/csr.h>
#include <asm/kvm_vcpu_fp.h>
#include <asm/kvm_vcpu_timer.h>
-#ifdef CONFIG_64BIT
-#define KVM_MAX_VCPUS (1U << 16)
-#else
-#define KVM_MAX_VCPUS (1U << 9)
-#endif
+#define KVM_MAX_VCPUS \
+ ((HGATP_VMID_MASK >> HGATP_VMID_SHIFT) + 1)
#define KVM_HALT_POLL_NS_DEFAULT 500000
@@ -79,13 +77,6 @@ struct kvm_sbi_context {
int return_handled;
};
-#define KVM_MMU_PAGE_CACHE_NR_OBJS 32
-
-struct kvm_mmu_page_cache {
- int nobjs;
- void *objects[KVM_MMU_PAGE_CACHE_NR_OBJS];
-};
-
struct kvm_cpu_trap {
unsigned long sepc;
unsigned long scause;
@@ -195,7 +186,7 @@ struct kvm_vcpu_arch {
struct kvm_sbi_context sbi_context;
/* Cache pages needed to program page tables with spinlock held */
- struct kvm_mmu_page_cache mmu_page_cache;
+ struct kvm_mmu_memory_cache mmu_page_cache;
/* VCPU power-off state */
bool power_off;
@@ -210,7 +201,6 @@ struct kvm_vcpu_arch {
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
-static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
#define KVM_ARCH_WANT_MMU_NOTIFIER
@@ -223,12 +213,12 @@ void __kvm_riscv_hfence_gvma_all(void);
int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
struct kvm_memory_slot *memslot,
gpa_t gpa, unsigned long hva, bool is_write);
-void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu);
int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm);
void kvm_riscv_stage2_free_pgd(struct kvm *kvm);
void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu);
void kvm_riscv_stage2_mode_detect(void);
unsigned long kvm_riscv_stage2_mode(void);
+int kvm_riscv_stage2_gpa_bits(void);
void kvm_riscv_stage2_vmid_detect(void);
unsigned long kvm_riscv_stage2_vmid_bits(void);
diff --git a/arch/riscv/include/asm/kvm_types.h b/arch/riscv/include/asm/kvm_types.h
index e476b404eb67..e15765f98d7a 100644
--- a/arch/riscv/include/asm/kvm_types.h
+++ b/arch/riscv/include/asm/kvm_types.h
@@ -2,6 +2,6 @@
#ifndef _ASM_RISCV_KVM_TYPES_H
#define _ASM_RISCV_KVM_TYPES_H
-#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40
+#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 32
#endif /* _ASM_RISCV_KVM_TYPES_H */
diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h
new file mode 100644
index 000000000000..76e4e17a3e00
--- /dev/null
+++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/**
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#ifndef __RISCV_KVM_VCPU_SBI_H__
+#define __RISCV_KVM_VCPU_SBI_H__
+
+#define KVM_SBI_IMPID 3
+
+#define KVM_SBI_VERSION_MAJOR 0
+#define KVM_SBI_VERSION_MINOR 2
+
+struct kvm_vcpu_sbi_extension {
+ unsigned long extid_start;
+ unsigned long extid_end;
+ /**
+ * SBI extension handler. It can be defined for a given extension or group of
+ * extension. But it should always return linux error codes rather than SBI
+ * specific error codes.
+ */
+ int (*handler)(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val, struct kvm_cpu_trap *utrap,
+ bool *exit);
+};
+
+void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run);
+const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid);
+
+#endif /* __RISCV_KVM_VCPU_SBI_H__ */
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index b3e5ff0125fe..160e3a1e8f8b 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -31,9 +31,20 @@
* When not using MMU this corresponds to the first free page in
* physical memory (aligned on a page boundary).
*/
+#ifdef CONFIG_64BIT
+#ifdef CONFIG_MMU
+#define PAGE_OFFSET kernel_map.page_offset
+#else
#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
-
-#define KERN_VIRT_SIZE (-PAGE_OFFSET)
+#endif
+/*
+ * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so
+ * define the PAGE_OFFSET value for SV39.
+ */
+#define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL)
+#else
+#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
+#endif /* CONFIG_64BIT */
#ifndef __ASSEMBLY__
@@ -86,6 +97,7 @@ extern unsigned long riscv_pfn_base;
#endif /* CONFIG_MMU */
struct kernel_mapping {
+ unsigned long page_offset;
unsigned long virt_addr;
uintptr_t phys_addr;
uintptr_t size;
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index 0af6933a7100..11823004b87a 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -11,6 +11,8 @@
#include <asm/tlb.h>
#ifdef CONFIG_MMU
+#define __HAVE_ARCH_PUD_ALLOC_ONE
+#define __HAVE_ARCH_PUD_FREE
#include <asm-generic/pgalloc.h>
static inline void pmd_populate_kernel(struct mm_struct *mm,
@@ -36,6 +38,44 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
set_pud(pud, __pud((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
+
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
+{
+ if (pgtable_l4_enabled) {
+ unsigned long pfn = virt_to_pfn(pud);
+
+ set_p4d(p4d, __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+ }
+}
+
+static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
+ pud_t *pud)
+{
+ if (pgtable_l4_enabled) {
+ unsigned long pfn = virt_to_pfn(pud);
+
+ set_p4d_safe(p4d,
+ __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+ }
+}
+
+#define pud_alloc_one pud_alloc_one
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ if (pgtable_l4_enabled)
+ return __pud_alloc_one(mm, addr);
+
+ return NULL;
+}
+
+#define pud_free pud_free
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ if (pgtable_l4_enabled)
+ __pud_free(mm, pud);
+}
+
+#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
#endif /* __PAGETABLE_PMD_FOLDED */
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 228261aa9628..bbbdd66e5e2f 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -8,16 +8,36 @@
#include <linux/const.h>
-#define PGDIR_SHIFT 30
+extern bool pgtable_l4_enabled;
+
+#define PGDIR_SHIFT_L3 30
+#define PGDIR_SHIFT_L4 39
+#define PGDIR_SIZE_L3 (_AC(1, UL) << PGDIR_SHIFT_L3)
+
+#define PGDIR_SHIFT (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)
/* Size of region mapped by a page global directory */
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
+/* pud is folded into pgd in case of 3-level page table */
+#define PUD_SHIFT 30
+#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE - 1))
+
#define PMD_SHIFT 21
/* Size of region mapped by a page middle directory */
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE - 1))
+/* Page Upper Directory entry */
+typedef struct {
+ unsigned long pud;
+} pud_t;
+
+#define pud_val(x) ((x).pud)
+#define __pud(x) ((pud_t) { (x) })
+#define PTRS_PER_PUD (PAGE_SIZE / sizeof(pud_t))
+
/* Page Middle Directory entry */
typedef struct {
unsigned long pmd;
@@ -59,6 +79,16 @@ static inline void pud_clear(pud_t *pudp)
set_pud(pudp, __pud(0));
}
+static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot)
+{
+ return __pud((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
+}
+
+static inline unsigned long _pud_pfn(pud_t pud)
+{
+ return pud_val(pud) >> _PAGE_PFN_SHIFT;
+}
+
static inline pmd_t *pud_pgtable(pud_t pud)
{
return (pmd_t *)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT);
@@ -69,6 +99,17 @@ static inline struct page *pud_page(pud_t pud)
return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
}
+#define mm_pud_folded mm_pud_folded
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+ if (pgtable_l4_enabled)
+ return false;
+
+ return true;
+}
+
+#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+
static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot)
{
return __pmd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
@@ -84,4 +125,69 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pud_ERROR(e) \
+ pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
+
+static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ *p4dp = p4d;
+ else
+ set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) });
+}
+
+static inline int p4d_none(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return (p4d_val(p4d) == 0);
+
+ return 0;
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return (p4d_val(p4d) & _PAGE_PRESENT);
+
+ return 1;
+}
+
+static inline int p4d_bad(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return !p4d_present(p4d);
+
+ return 0;
+}
+
+static inline void p4d_clear(p4d_t *p4d)
+{
+ if (pgtable_l4_enabled)
+ set_p4d(p4d, __p4d(0));
+}
+
+static inline pud_t *p4d_pgtable(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
+
+ return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) });
+}
+
+static inline struct page *p4d_page(p4d_t p4d)
+{
+ return pfn_to_page(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
+}
+
+#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+#define pud_offset pud_offset
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+ if (pgtable_l4_enabled)
+ return p4d_pgtable(*p4d) + pud_index(address);
+
+ return (pud_t *)p4d;
+}
+
#endif /* _ASM_RISCV_PGTABLE_64_H */
diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h
index 2ee413912926..a6b0c89824c2 100644
--- a/arch/riscv/include/asm/pgtable-bits.h
+++ b/arch/riscv/include/asm/pgtable-bits.h
@@ -31,7 +31,7 @@
* _PAGE_PROT_NONE is set on not-present pages (and ignored by the hardware) to
* distinguish them from swapped out pages
*/
-#define _PAGE_PROT_NONE _PAGE_READ
+#define _PAGE_PROT_NONE _PAGE_GLOBAL
#define _PAGE_PFN_SHIFT 10
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index bf204e7c1f74..7e949f25c933 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -24,8 +24,19 @@
#define KERNEL_LINK_ADDR PAGE_OFFSET
#endif
+/* Number of entries in the page global directory */
+#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t))
+/* Number of entries in the page table */
+#define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t))
+
+/*
+ * Half of the kernel address space (half of the entries of the page global
+ * directory) is for the direct mapping.
+ */
+#define KERN_VIRT_SIZE ((PTRS_PER_PGD / 2 * PGDIR_SIZE) / 2)
+
#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
-#define VMALLOC_END (PAGE_OFFSET - 1)
+#define VMALLOC_END PAGE_OFFSET
#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
#define BPF_JIT_REGION_SIZE (SZ_128M)
@@ -39,8 +50,10 @@
/* Modules always live before the kernel */
#ifdef CONFIG_64BIT
-#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
-#define MODULES_END (PFN_ALIGN((unsigned long)&_start))
+/* This is used to define the end of the KASAN shadow region */
+#define MODULES_LOWEST_VADDR (KERNEL_LINK_ADDR - SZ_2G)
+#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
+#define MODULES_END (PFN_ALIGN((unsigned long)&_start))
#endif
/*
@@ -48,10 +61,16 @@
* struct pages to map half the virtual address space. Then
* position vmemmap directly below the VMALLOC region.
*/
+#ifdef CONFIG_64BIT
+#define VA_BITS (pgtable_l4_enabled ? 48 : 39)
+#else
+#define VA_BITS 32
+#endif
+
#define VMEMMAP_SHIFT \
- (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
+ (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT)
-#define VMEMMAP_END (VMALLOC_START - 1)
+#define VMEMMAP_END VMALLOC_START
#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE)
/*
@@ -83,8 +102,7 @@
#ifndef __ASSEMBLY__
-/* Page Upper Directory not used in RISC-V */
-#include <asm-generic/pgtable-nopud.h>
+#include <asm-generic/pgtable-nop4d.h>
#include <asm/page.h>
#include <asm/tlbflush.h>
#include <linux/mm_types.h>
@@ -107,19 +125,27 @@
#define XIP_FIXUP(addr) (addr)
#endif /* CONFIG_XIP_KERNEL */
-#ifdef CONFIG_MMU
-/* Number of entries in the page global directory */
-#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t))
-/* Number of entries in the page table */
-#define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t))
+struct pt_alloc_ops {
+ pte_t *(*get_pte_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pte)(uintptr_t va);
+#ifndef __PAGETABLE_PMD_FOLDED
+ pmd_t *(*get_pmd_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pmd)(uintptr_t va);
+ pud_t *(*get_pud_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pud)(uintptr_t va);
+#endif
+};
+
+extern struct pt_alloc_ops pt_ops __initdata;
+#ifdef CONFIG_MMU
/* Number of PGD entries that a user-mode program can use */
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
/* Page protection bits */
#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER)
-#define PAGE_NONE __pgprot(_PAGE_PROT_NONE)
+#define PAGE_NONE __pgprot(_PAGE_PROT_NONE | _PAGE_READ)
#define PAGE_READ __pgprot(_PAGE_BASE | _PAGE_READ)
#define PAGE_WRITE __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_WRITE)
#define PAGE_EXEC __pgprot(_PAGE_BASE | _PAGE_EXEC)
@@ -628,11 +654,12 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
*
* Format of swap PTE:
* bit 0: _PAGE_PRESENT (zero)
- * bit 1: _PAGE_PROT_NONE (zero)
- * bits 2 to 6: swap type
- * bits 7 to XLEN-1: swap offset
+ * bit 1 to 3: _PAGE_LEAF (zero)
+ * bit 5: _PAGE_PROT_NONE (zero)
+ * bits 6 to 10: swap type
+ * bits 10 to XLEN-1: swap offset
*/
-#define __SWP_TYPE_SHIFT 2
+#define __SWP_TYPE_SHIFT 6
#define __SWP_TYPE_BITS 5
#define __SWP_TYPE_MASK ((1UL << __SWP_TYPE_BITS) - 1)
#define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT)
@@ -648,12 +675,17 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) })
+#define __swp_entry_to_pmd(swp) __pmd((swp).val)
+#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
+
/*
* In the RV64 Linux scheme, we give the user half of the virtual-address space
* and give the kernel the other (upper) half.
*/
#ifdef CONFIG_64BIT
-#define KERN_VIRT_START (-(BIT(CONFIG_VA_BITS)) + TASK_SIZE)
+#define KERN_VIRT_START (-(BIT(VA_BITS)) + TASK_SIZE)
#else
#define KERN_VIRT_START FIXADDR_START
#endif
@@ -661,11 +693,22 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
/*
* Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
* Note that PGDIR_SIZE must evenly divide TASK_SIZE.
+ * Task size is:
+ * - 0x9fc00000 (~2.5GB) for RV32.
+ * - 0x4000000000 ( 256GB) for RV64 using SV39 mmu
+ * - 0x800000000000 ( 128TB) for RV64 using SV48 mmu
+ *
+ * Note that PGDIR_SIZE must evenly divide TASK_SIZE since "RISC-V
+ * Instruction Set Manual Volume II: Privileged Architecture" states that
+ * "load and store effective addresses, which are 64bits, must have bits
+ * 63–48 all equal to bit 47, or else a page-fault exception will occur."
*/
#ifdef CONFIG_64BIT
-#define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2)
+#define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2)
+#define TASK_SIZE_MIN (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2)
#else
-#define TASK_SIZE FIXADDR_START
+#define TASK_SIZE FIXADDR_START
+#define TASK_SIZE_MIN TASK_SIZE
#endif
#else /* CONFIG_MMU */
@@ -691,6 +734,8 @@ extern uintptr_t _dtb_early_pa;
#define dtb_early_va _dtb_early_va
#define dtb_early_pa _dtb_early_pa
#endif /* CONFIG_XIP_KERNEL */
+extern u64 satp_mode;
+extern bool pgtable_l4_enabled;
void paging_init(void);
void misc_mem_init(void);
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 0d42693cb65e..d1c37479d828 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -8,6 +8,7 @@
#define _ASM_RISCV_SBI_H
#include <linux/types.h>
+#include <linux/cpumask.h>
#ifdef CONFIG_RISCV_SBI
enum sbi_ext_id {
@@ -27,6 +28,15 @@ enum sbi_ext_id {
SBI_EXT_IPI = 0x735049,
SBI_EXT_RFENCE = 0x52464E43,
SBI_EXT_HSM = 0x48534D,
+ SBI_EXT_SRST = 0x53525354,
+
+ /* Experimentals extensions must lie within this range */
+ SBI_EXT_EXPERIMENTAL_START = 0x08000000,
+ SBI_EXT_EXPERIMENTAL_END = 0x08FFFFFF,
+
+ /* Vendor extensions must lie within this range */
+ SBI_EXT_VENDOR_START = 0x09000000,
+ SBI_EXT_VENDOR_END = 0x09FFFFFF,
};
enum sbi_ext_base_fid {
@@ -70,6 +80,21 @@ enum sbi_hsm_hart_status {
SBI_HSM_HART_STATUS_STOP_PENDING,
};
+enum sbi_ext_srst_fid {
+ SBI_EXT_SRST_RESET = 0,
+};
+
+enum sbi_srst_reset_type {
+ SBI_SRST_RESET_TYPE_SHUTDOWN = 0,
+ SBI_SRST_RESET_TYPE_COLD_REBOOT,
+ SBI_SRST_RESET_TYPE_WARM_REBOOT,
+};
+
+enum sbi_srst_reset_reason {
+ SBI_SRST_RESET_REASON_NONE = 0,
+ SBI_SRST_RESET_REASON_SYS_FAILURE,
+};
+
#define SBI_SPEC_VERSION_DEFAULT 0x1
#define SBI_SPEC_VERSION_MAJOR_SHIFT 24
#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f
@@ -82,6 +107,7 @@ enum sbi_hsm_hart_status {
#define SBI_ERR_INVALID_PARAM -3
#define SBI_ERR_DENIED -4
#define SBI_ERR_INVALID_ADDRESS -5
+#define SBI_ERR_ALREADY_AVAILABLE -6
extern unsigned long sbi_spec_version;
struct sbiret {
@@ -103,27 +129,27 @@ long sbi_get_mimpid(void);
void sbi_set_timer(uint64_t stime_value);
void sbi_shutdown(void);
void sbi_clear_ipi(void);
-int sbi_send_ipi(const unsigned long *hart_mask);
-int sbi_remote_fence_i(const unsigned long *hart_mask);
-int sbi_remote_sfence_vma(const unsigned long *hart_mask,
+int sbi_send_ipi(const struct cpumask *cpu_mask);
+int sbi_remote_fence_i(const struct cpumask *cpu_mask);
+int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size);
-int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid);
-int sbi_remote_hfence_gvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size);
-int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma_vmid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long vmid);
-int sbi_remote_hfence_vvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size);
-int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid);
@@ -148,9 +174,17 @@ static inline unsigned long sbi_minor_version(void)
return sbi_spec_version & SBI_SPEC_VERSION_MINOR_MASK;
}
+/* Make SBI version */
+static inline unsigned long sbi_mk_version(unsigned long major,
+ unsigned long minor)
+{
+ return ((major & SBI_SPEC_VERSION_MAJOR_MASK) <<
+ SBI_SPEC_VERSION_MAJOR_SHIFT) | minor;
+}
+
int sbi_err_map_linux_errno(int err);
#else /* CONFIG_RISCV_SBI */
-static inline int sbi_remote_fence_i(const unsigned long *hart_mask) { return -1; }
+static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1; }
static inline void sbi_init(void) {}
#endif /* CONFIG_RISCV_SBI */
#endif /* _ASM_RISCV_SBI_H */
diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
index a7d2811f3536..23170c933d73 100644
--- a/arch/riscv/include/asm/smp.h
+++ b/arch/riscv/include/asm/smp.h
@@ -43,7 +43,6 @@ void arch_send_call_function_ipi_mask(struct cpumask *mask);
void arch_send_call_function_single_ipi(int cpu);
int riscv_hartid_to_cpuid(int hartid);
-void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out);
/* Set custom IPI operations */
void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops);
@@ -63,8 +62,6 @@ asmlinkage void smp_callin(void);
#if defined CONFIG_HOTPLUG_CPU
int __cpu_disable(void);
void __cpu_die(unsigned int cpu);
-void cpu_stop(void);
-#else
#endif /* CONFIG_HOTPLUG_CPU */
#else
@@ -85,13 +82,6 @@ static inline unsigned long cpuid_to_hartid_map(int cpu)
return boot_cpu_hartid;
}
-static inline void riscv_cpuid_to_hartid_mask(const struct cpumask *in,
- struct cpumask *out)
-{
- cpumask_clear(out);
- cpumask_set_cpu(boot_cpu_hartid, out);
-}
-
static inline void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops)
{
}
diff --git a/arch/riscv/include/asm/sparsemem.h b/arch/riscv/include/asm/sparsemem.h
index 45a7018a8118..63acaecc3374 100644
--- a/arch/riscv/include/asm/sparsemem.h
+++ b/arch/riscv/include/asm/sparsemem.h
@@ -4,7 +4,11 @@
#define _ASM_RISCV_SPARSEMEM_H
#ifdef CONFIG_SPARSEMEM
-#define MAX_PHYSMEM_BITS CONFIG_PA_BITS
+#ifdef CONFIG_64BIT
+#define MAX_PHYSMEM_BITS 56
+#else
+#define MAX_PHYSMEM_BITS 34
+#endif /* CONFIG_64BIT */
#define SECTION_SIZE_BITS 27
#endif /* CONFIG_SPARSEMEM */
diff --git a/arch/riscv/include/asm/spinlock_types.h b/arch/riscv/include/asm/spinlock_types.h
index f398e7638dd6..5a35a49505da 100644
--- a/arch/riscv/include/asm/spinlock_types.h
+++ b/arch/riscv/include/asm/spinlock_types.h
@@ -6,7 +6,7 @@
#ifndef _ASM_RISCV_SPINLOCK_TYPES_H
#define _ASM_RISCV_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
# error "please don't include this file directly"
#endif
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index f314ff44c48d..c701a5e57a2b 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -8,6 +8,7 @@
#ifndef _ASM_RISCV_UACCESS_H
#define _ASM_RISCV_UACCESS_H
+#include <asm/asm-extable.h>
#include <asm/pgtable.h> /* for TASK_SIZE */
/*
@@ -80,25 +81,14 @@ static inline int __access_ok(unsigned long addr, unsigned long size)
#define __get_user_asm(insn, x, ptr, err) \
do { \
- uintptr_t __tmp; \
__typeof__(x) __x; \
__asm__ __volatile__ ( \
"1:\n" \
- " " insn " %1, %3\n" \
+ " " insn " %1, %2\n" \
"2:\n" \
- " .section .fixup,\"ax\"\n" \
- " .balign 4\n" \
- "3:\n" \
- " li %0, %4\n" \
- " li %1, 0\n" \
- " jump 2b, %2\n" \
- " .previous\n" \
- " .section __ex_table,\"a\"\n" \
- " .balign " RISCV_SZPTR "\n" \
- " " RISCV_PTR " 1b, 3b\n" \
- " .previous" \
- : "+r" (err), "=&r" (__x), "=r" (__tmp) \
- : "m" (*(ptr)), "i" (-EFAULT)); \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %0, %1) \
+ : "+r" (err), "=&r" (__x) \
+ : "m" (*(ptr))); \
(x) = __x; \
} while (0)
@@ -110,30 +100,18 @@ do { \
do { \
u32 __user *__ptr = (u32 __user *)(ptr); \
u32 __lo, __hi; \
- uintptr_t __tmp; \
__asm__ __volatile__ ( \
"1:\n" \
- " lw %1, %4\n" \
+ " lw %1, %3\n" \
"2:\n" \
- " lw %2, %5\n" \
+ " lw %2, %4\n" \
"3:\n" \
- " .section .fixup,\"ax\"\n" \
- " .balign 4\n" \
- "4:\n" \
- " li %0, %6\n" \
- " li %1, 0\n" \
- " li %2, 0\n" \
- " jump 3b, %3\n" \
- " .previous\n" \
- " .section __ex_table,\"a\"\n" \
- " .balign " RISCV_SZPTR "\n" \
- " " RISCV_PTR " 1b, 4b\n" \
- " " RISCV_PTR " 2b, 4b\n" \
- " .previous" \
- : "+r" (err), "=&r" (__lo), "=r" (__hi), \
- "=r" (__tmp) \
- : "m" (__ptr[__LSW]), "m" (__ptr[__MSW]), \
- "i" (-EFAULT)); \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 3b, %0, %1) \
+ _ASM_EXTABLE_UACCESS_ERR_ZERO(2b, 3b, %0, %1) \
+ : "+r" (err), "=&r" (__lo), "=r" (__hi) \
+ : "m" (__ptr[__LSW]), "m" (__ptr[__MSW])); \
+ if (err) \
+ __hi = 0; \
(x) = (__typeof__(x))((__typeof__((x)-(x)))( \
(((u64)__hi << 32) | __lo))); \
} while (0)
@@ -221,24 +199,14 @@ do { \
#define __put_user_asm(insn, x, ptr, err) \
do { \
- uintptr_t __tmp; \
__typeof__(*(ptr)) __x = x; \
__asm__ __volatile__ ( \
"1:\n" \
- " " insn " %z3, %2\n" \
+ " " insn " %z2, %1\n" \
"2:\n" \
- " .section .fixup,\"ax\"\n" \
- " .balign 4\n" \
- "3:\n" \
- " li %0, %4\n" \
- " jump 2b, %1\n" \
- " .previous\n" \
- " .section __ex_table,\"a\"\n" \
- " .balign " RISCV_SZPTR "\n" \
- " " RISCV_PTR " 1b, 3b\n" \
- " .previous" \
- : "+r" (err), "=r" (__tmp), "=m" (*(ptr)) \
- : "rJ" (__x), "i" (-EFAULT)); \
+ _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %0) \
+ : "+r" (err), "=m" (*(ptr)) \
+ : "rJ" (__x)); \
} while (0)
#ifdef CONFIG_64BIT
@@ -249,28 +217,18 @@ do { \
do { \
u32 __user *__ptr = (u32 __user *)(ptr); \
u64 __x = (__typeof__((x)-(x)))(x); \
- uintptr_t __tmp; \
__asm__ __volatile__ ( \
"1:\n" \
- " sw %z4, %2\n" \
+ " sw %z3, %1\n" \
"2:\n" \
- " sw %z5, %3\n" \
+ " sw %z4, %2\n" \
"3:\n" \
- " .section .fixup,\"ax\"\n" \
- " .balign 4\n" \
- "4:\n" \
- " li %0, %6\n" \
- " jump 3b, %1\n" \
- " .previous\n" \
- " .section __ex_table,\"a\"\n" \
- " .balign " RISCV_SZPTR "\n" \
- " " RISCV_PTR " 1b, 4b\n" \
- " " RISCV_PTR " 2b, 4b\n" \
- " .previous" \
- : "+r" (err), "=r" (__tmp), \
+ _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0) \
+ _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0) \
+ : "+r" (err), \
"=m" (__ptr[__LSW]), \
"=m" (__ptr[__MSW]) \
- : "rJ" (__x), "rJ" (__x >> 32), "i" (-EFAULT)); \
+ : "rJ" (__x), "rJ" (__x >> 32)); \
} while (0)
#endif /* CONFIG_64BIT */
@@ -388,81 +346,6 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
__clear_user(to, n) : n;
}
-/*
- * Atomic compare-and-exchange, but with a fixup for userspace faults. Faults
- * will set "err" to -EFAULT, while successful accesses return the previous
- * value.
- */
-#define __cmpxchg_user(ptr, old, new, err, size, lrb, scb) \
-({ \
- __typeof__(ptr) __ptr = (ptr); \
- __typeof__(*(ptr)) __old = (old); \
- __typeof__(*(ptr)) __new = (new); \
- __typeof__(*(ptr)) __ret; \
- __typeof__(err) __err = 0; \
- register unsigned int __rc; \
- __enable_user_access(); \
- switch (size) { \
- case 4: \
- __asm__ __volatile__ ( \
- "0:\n" \
- " lr.w" #scb " %[ret], %[ptr]\n" \
- " bne %[ret], %z[old], 1f\n" \
- " sc.w" #lrb " %[rc], %z[new], %[ptr]\n" \
- " bnez %[rc], 0b\n" \
- "1:\n" \
- ".section .fixup,\"ax\"\n" \
- ".balign 4\n" \
- "2:\n" \
- " li %[err], %[efault]\n" \
- " jump 1b, %[rc]\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- ".balign " RISCV_SZPTR "\n" \
- " " RISCV_PTR " 1b, 2b\n" \
- ".previous\n" \
- : [ret] "=&r" (__ret), \
- [rc] "=&r" (__rc), \
- [ptr] "+A" (*__ptr), \
- [err] "=&r" (__err) \
- : [old] "rJ" (__old), \
- [new] "rJ" (__new), \
- [efault] "i" (-EFAULT)); \
- break; \
- case 8: \
- __asm__ __volatile__ ( \
- "0:\n" \
- " lr.d" #scb " %[ret], %[ptr]\n" \
- " bne %[ret], %z[old], 1f\n" \
- " sc.d" #lrb " %[rc], %z[new], %[ptr]\n" \
- " bnez %[rc], 0b\n" \
- "1:\n" \
- ".section .fixup,\"ax\"\n" \
- ".balign 4\n" \
- "2:\n" \
- " li %[err], %[efault]\n" \
- " jump 1b, %[rc]\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- ".balign " RISCV_SZPTR "\n" \
- " " RISCV_PTR " 1b, 2b\n" \
- ".previous\n" \
- : [ret] "=&r" (__ret), \
- [rc] "=&r" (__rc), \
- [ptr] "+A" (*__ptr), \
- [err] "=&r" (__err) \
- : [old] "rJ" (__old), \
- [new] "rJ" (__new), \
- [efault] "i" (-EFAULT)); \
- break; \
- default: \
- BUILD_BUG(); \
- } \
- __disable_user_access(); \
- (err) = __err; \
- __ret; \
-})
-
#define HAVE_GET_KERNEL_NOFAULT
#define __get_kernel_nofault(dst, src, type, err_label) \
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 3397ddac1a30..612556faa527 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -43,7 +43,8 @@ obj-$(CONFIG_FPU) += fpu.o
obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SMP) += cpu_ops.o
-obj-$(CONFIG_SMP) += cpu_ops_spinwait.o
+
+obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index 253126e4beef..df0519a64eaf 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -12,6 +12,7 @@
#include <asm/kvm_host.h>
#include <asm/thread_info.h>
#include <asm/ptrace.h>
+#include <asm/cpu_ops_sbi.h>
void asm_offsets(void);
@@ -468,4 +469,6 @@ void asm_offsets(void)
DEFINE(PT_SIZE_ON_STACK, ALIGN(sizeof(struct pt_regs), STACK_ALIGN));
OFFSET(KERNEL_MAP_VIRT_ADDR, kernel_mapping, virt_addr);
+ OFFSET(SBI_HART_BOOT_TASK_PTR_OFFSET, sbi_hart_boot_data, task_ptr);
+ OFFSET(SBI_HART_BOOT_STACK_PTR_OFFSET, sbi_hart_boot_data, stack_ptr);
}
diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c
index df84e0c13db1..be7f05b542bb 100644
--- a/arch/riscv/kernel/cpu-hotplug.c
+++ b/arch/riscv/kernel/cpu-hotplug.c
@@ -14,12 +14,6 @@
#include <asm/cpu_ops.h>
#include <asm/sbi.h>
-void cpu_stop(void);
-void arch_cpu_idle_dead(void)
-{
- cpu_stop();
-}
-
bool cpu_has_hotplug(unsigned int cpu)
{
if (cpu_ops[cpu]->cpu_stop)
@@ -75,7 +69,7 @@ void __cpu_die(unsigned int cpu)
/*
* Called from the idle thread for the CPU which has been shutdown.
*/
-void cpu_stop(void)
+void arch_cpu_idle_dead(void)
{
idle_task_exit();
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index f13b2c9ea912..ad0a7e9f828b 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -7,6 +7,7 @@
#include <linux/seq_file.h>
#include <linux/of.h>
#include <asm/smp.h>
+#include <asm/pgtable.h>
/*
* Returns the hart ID of the given device tree node, or -ENODEV if the node
@@ -71,18 +72,19 @@ static void print_isa(struct seq_file *f, const char *isa)
seq_puts(f, "\n");
}
-static void print_mmu(struct seq_file *f, const char *mmu_type)
+static void print_mmu(struct seq_file *f)
{
+ char sv_type[16];
+
#if defined(CONFIG_32BIT)
- if (strcmp(mmu_type, "riscv,sv32") != 0)
- return;
+ strncpy(sv_type, "sv32", 5);
#elif defined(CONFIG_64BIT)
- if (strcmp(mmu_type, "riscv,sv39") != 0 &&
- strcmp(mmu_type, "riscv,sv48") != 0)
- return;
+ if (pgtable_l4_enabled)
+ strncpy(sv_type, "sv48", 5);
+ else
+ strncpy(sv_type, "sv39", 5);
#endif
-
- seq_printf(f, "mmu\t\t: %s\n", mmu_type+6);
+ seq_printf(f, "mmu\t\t: %s\n", sv_type);
}
static void *c_start(struct seq_file *m, loff_t *pos)
@@ -107,14 +109,13 @@ static int c_show(struct seq_file *m, void *v)
{
unsigned long cpu_id = (unsigned long)v - 1;
struct device_node *node = of_get_cpu_node(cpu_id, NULL);
- const char *compat, *isa, *mmu;
+ const char *compat, *isa;
seq_printf(m, "processor\t: %lu\n", cpu_id);
seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id));
if (!of_property_read_string(node, "riscv,isa", &isa))
print_isa(m, isa);
- if (!of_property_read_string(node, "mmu-type", &mmu))
- print_mmu(m, mmu);
+ print_mmu(m);
if (!of_property_read_string(node, "compatible", &compat)
&& strcmp(compat, "riscv"))
seq_printf(m, "uarch\t\t: %s\n", compat);
diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c
index 1985884fe829..170d07e57721 100644
--- a/arch/riscv/kernel/cpu_ops.c
+++ b/arch/riscv/kernel/cpu_ops.c
@@ -8,37 +8,29 @@
#include <linux/of.h>
#include <linux/string.h>
#include <linux/sched.h>
-#include <linux/sched/task_stack.h>
#include <asm/cpu_ops.h>
#include <asm/sbi.h>
#include <asm/smp.h>
const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
-void *__cpu_up_stack_pointer[NR_CPUS] __section(".data");
-void *__cpu_up_task_pointer[NR_CPUS] __section(".data");
-
extern const struct cpu_operations cpu_ops_sbi;
+#ifdef CONFIG_RISCV_BOOT_SPINWAIT
extern const struct cpu_operations cpu_ops_spinwait;
-
-void cpu_update_secondary_bootdata(unsigned int cpuid,
- struct task_struct *tidle)
-{
- int hartid = cpuid_to_hartid_map(cpuid);
-
- /* Make sure tidle is updated */
- smp_mb();
- WRITE_ONCE(__cpu_up_stack_pointer[hartid],
- task_stack_page(tidle) + THREAD_SIZE);
- WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle);
-}
+#else
+const struct cpu_operations cpu_ops_spinwait = {
+ .name = "",
+ .cpu_prepare = NULL,
+ .cpu_start = NULL,
+};
+#endif
void __init cpu_set_ops(int cpuid)
{
#if IS_ENABLED(CONFIG_RISCV_SBI)
if (sbi_probe_extension(SBI_EXT_HSM) > 0) {
if (!cpuid)
- pr_info("SBI v0.2 HSM extension detected\n");
+ pr_info("SBI HSM extension detected\n");
cpu_ops[cpuid] = &cpu_ops_sbi;
} else
#endif
diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c
index 685fae72b7f5..dae29cbfe550 100644
--- a/arch/riscv/kernel/cpu_ops_sbi.c
+++ b/arch/riscv/kernel/cpu_ops_sbi.c
@@ -7,13 +7,22 @@
#include <linux/init.h>
#include <linux/mm.h>
+#include <linux/sched/task_stack.h>
#include <asm/cpu_ops.h>
+#include <asm/cpu_ops_sbi.h>
#include <asm/sbi.h>
#include <asm/smp.h>
extern char secondary_start_sbi[];
const struct cpu_operations cpu_ops_sbi;
+/*
+ * Ordered booting via HSM brings one cpu at a time. However, cpu hotplug can
+ * be invoked from multiple threads in parallel. Define a per cpu data
+ * to handle that.
+ */
+DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data);
+
static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr,
unsigned long priv)
{
@@ -55,14 +64,19 @@ static int sbi_hsm_hart_get_status(unsigned long hartid)
static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle)
{
- int rc;
unsigned long boot_addr = __pa_symbol(secondary_start_sbi);
int hartid = cpuid_to_hartid_map(cpuid);
-
- cpu_update_secondary_bootdata(cpuid, tidle);
- rc = sbi_hsm_hart_start(hartid, boot_addr, 0);
-
- return rc;
+ unsigned long hsm_data;
+ struct sbi_hart_boot_data *bdata = &per_cpu(boot_data, cpuid);
+
+ /* Make sure tidle is updated */
+ smp_mb();
+ bdata->task_ptr = tidle;
+ bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE;
+ /* Make sure boot data is updated */
+ smp_mb();
+ hsm_data = __pa(bdata);
+ return sbi_hsm_hart_start(hartid, boot_addr, hsm_data);
}
static int sbi_cpu_prepare(unsigned int cpuid)
diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c
index b2c957bb68c1..346847f6c41c 100644
--- a/arch/riscv/kernel/cpu_ops_spinwait.c
+++ b/arch/riscv/kernel/cpu_ops_spinwait.c
@@ -6,11 +6,36 @@
#include <linux/errno.h>
#include <linux/of.h>
#include <linux/string.h>
+#include <linux/sched/task_stack.h>
#include <asm/cpu_ops.h>
#include <asm/sbi.h>
#include <asm/smp.h>
const struct cpu_operations cpu_ops_spinwait;
+void *__cpu_spinwait_stack_pointer[NR_CPUS] __section(".data");
+void *__cpu_spinwait_task_pointer[NR_CPUS] __section(".data");
+
+static void cpu_update_secondary_bootdata(unsigned int cpuid,
+ struct task_struct *tidle)
+{
+ int hartid = cpuid_to_hartid_map(cpuid);
+
+ /*
+ * The hartid must be less than NR_CPUS to avoid out-of-bound access
+ * errors for __cpu_spinwait_stack/task_pointer. That is not always possible
+ * for platforms with discontiguous hartid numbering scheme. That's why
+ * spinwait booting is not the recommended approach for any platforms
+ * booting Linux in S-mode and can be disabled in the future.
+ */
+ if (hartid == INVALID_HARTID || hartid >= NR_CPUS)
+ return;
+
+ /* Make sure tidle is updated */
+ smp_mb();
+ WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid],
+ task_stack_page(tidle) + THREAD_SIZE);
+ WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle);
+}
static int spinwait_cpu_prepare(unsigned int cpuid)
{
@@ -28,7 +53,7 @@ static int spinwait_cpu_start(unsigned int cpuid, struct task_struct *tidle)
* selects the first cpu to boot the kernel and causes the remainder
* of the cpus to spin in a loop waiting for their stack pointer to be
* setup by that main cpu. Writing to bootdata
- * (i.e __cpu_up_stack_pointer) signals to the spinning cpus that they
+ * (i.e __cpu_spinwait_stack_pointer) signals to the spinning cpus that they
* can continue the boot process.
*/
cpu_update_secondary_bootdata(cpuid, tidle);
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index f52f01ecbeea..2363b43312fc 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -11,6 +11,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/csr.h>
+#include <asm/cpu_ops_sbi.h>
#include <asm/hwcap.h>
#include <asm/image.h>
#include "efi-header.S"
@@ -105,7 +106,8 @@ relocate:
/* Compute satp for kernel page tables, but don't load it yet */
srl a2, a0, PAGE_SHIFT
- li a1, SATP_MODE
+ la a1, satp_mode
+ REG_L a1, 0(a1)
or a2, a2, a1
/*
@@ -135,7 +137,7 @@ relocate:
/*
* Switch to kernel page tables. A full fence is necessary in order to
* avoid using the trampoline translations, which are only correct for
- * the first superpage. Fetching the fence is guarnteed to work
+ * the first superpage. Fetching the fence is guaranteed to work
* because that first superpage is translated the same way.
*/
csrw CSR_SATP, a2
@@ -167,18 +169,17 @@ secondary_start_sbi:
la a3, .Lsecondary_park
csrw CSR_TVEC, a3
- slli a3, a0, LGREG
- la a4, __cpu_up_stack_pointer
- XIP_FIXUP_OFFSET a4
- la a5, __cpu_up_task_pointer
- XIP_FIXUP_OFFSET a5
- add a4, a3, a4
- add a5, a3, a5
- REG_L sp, (a4)
- REG_L tp, (a5)
-
- .global secondary_start_common
-secondary_start_common:
+ /* a0 contains the hartid & a1 contains boot data */
+ li a2, SBI_HART_BOOT_TASK_PTR_OFFSET
+ XIP_FIXUP_OFFSET a2
+ add a2, a2, a1
+ REG_L tp, (a2)
+ li a3, SBI_HART_BOOT_STACK_PTR_OFFSET
+ XIP_FIXUP_OFFSET a3
+ add a3, a3, a1
+ REG_L sp, (a3)
+
+.Lsecondary_start_common:
#ifdef CONFIG_MMU
/* Enable virtual memory and relocate to virtual address */
@@ -258,13 +259,13 @@ pmp_done:
li t0, SR_FS
csrc CSR_STATUS, t0
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RISCV_BOOT_SPINWAIT
li t0, CONFIG_NR_CPUS
blt a0, t0, .Lgood_cores
tail .Lsecondary_park
.Lgood_cores:
-#endif
+ /* The lottery system is only required for spinwait booting method */
#ifndef CONFIG_XIP_KERNEL
/* Pick one hart to run the main boot sequence */
la a3, hart_lottery
@@ -283,6 +284,10 @@ pmp_done:
/* first time here if hart_lottery in RAM is not set */
beq t0, t1, .Lsecondary_start
+#endif /* CONFIG_XIP */
+#endif /* CONFIG_RISCV_BOOT_SPINWAIT */
+
+#ifdef CONFIG_XIP_KERNEL
la sp, _end + THREAD_SIZE
XIP_FIXUP_OFFSET sp
mv s0, a0
@@ -339,16 +344,16 @@ clear_bss_done:
call soc_early_init
tail start_kernel
+#if CONFIG_RISCV_BOOT_SPINWAIT
.Lsecondary_start:
-#ifdef CONFIG_SMP
/* Set trap vector to spin forever to help debug */
la a3, .Lsecondary_park
csrw CSR_TVEC, a3
slli a3, a0, LGREG
- la a1, __cpu_up_stack_pointer
+ la a1, __cpu_spinwait_stack_pointer
XIP_FIXUP_OFFSET a1
- la a2, __cpu_up_task_pointer
+ la a2, __cpu_spinwait_task_pointer
XIP_FIXUP_OFFSET a2
add a1, a3, a1
add a2, a3, a2
@@ -365,8 +370,8 @@ clear_bss_done:
beqz tp, .Lwait_for_cpu_up
fence
- tail secondary_start_common
-#endif
+ tail .Lsecondary_start_common
+#endif /* CONFIG_RISCV_BOOT_SPINWAIT */
END(_start_kernel)
@@ -448,7 +453,3 @@ ENTRY(reset_regs)
ret
END(reset_regs)
#endif /* CONFIG_RISCV_M_MODE */
-
-__PAGE_ALIGNED_BSS
- /* Empty zero page */
- .balign PAGE_SIZE
diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h
index aabbc3ac3e48..726731ada534 100644
--- a/arch/riscv/kernel/head.h
+++ b/arch/riscv/kernel/head.h
@@ -16,7 +16,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa);
asmlinkage void __init __copy_data(void);
#endif
-extern void *__cpu_up_stack_pointer[];
-extern void *__cpu_up_task_pointer[];
+#ifdef CONFIG_RISCV_BOOT_SPINWAIT
+extern void *__cpu_spinwait_stack_pointer[];
+extern void *__cpu_spinwait_task_pointer[];
+#endif
#endif /* __ASM_HEAD_H */
diff --git a/arch/riscv/kernel/kexec_relocate.S b/arch/riscv/kernel/kexec_relocate.S
index a80b52a74f58..059c5e216ae7 100644
--- a/arch/riscv/kernel/kexec_relocate.S
+++ b/arch/riscv/kernel/kexec_relocate.S
@@ -159,25 +159,15 @@ SYM_CODE_START(riscv_kexec_norelocate)
* s0: (const) Phys address to jump to
* s1: (const) Phys address of the FDT image
* s2: (const) The hartid of the current hart
- * s3: (const) kernel_map.va_pa_offset, used when switching MMU off
*/
mv s0, a1
mv s1, a2
mv s2, a3
- mv s3, a4
/* Disable / cleanup interrupts */
csrw CSR_SIE, zero
csrw CSR_SIP, zero
- /* Switch to physical addressing */
- la s4, 1f
- sub s4, s4, s3
- csrw CSR_STVEC, s4
- csrw CSR_SATP, zero
-
-.align 2
-1:
/* Pass the arguments to the next kernel / Cleanup*/
mv a0, s2
mv a1, s1
@@ -214,7 +204,15 @@ SYM_CODE_START(riscv_kexec_norelocate)
csrw CSR_SCAUSE, zero
csrw CSR_SSCRATCH, zero
- jalr zero, a2, 0
+ /*
+ * Switch to physical addressing
+ * This will also trigger a jump to CSR_STVEC
+ * which in this case is the address of the new
+ * kernel.
+ */
+ csrw CSR_STVEC, a2
+ csrw CSR_SATP, zero
+
SYM_CODE_END(riscv_kexec_norelocate)
.section ".rodata"
diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c
index e6eca271a4d6..cbef0fc73afa 100644
--- a/arch/riscv/kernel/machine_kexec.c
+++ b/arch/riscv/kernel/machine_kexec.c
@@ -169,7 +169,8 @@ machine_kexec(struct kimage *image)
struct kimage_arch *internal = &image->arch;
unsigned long jump_addr = (unsigned long) image->start;
unsigned long first_ind_entry = (unsigned long) &image->head;
- unsigned long this_hart_id = raw_smp_processor_id();
+ unsigned long this_cpu_id = smp_processor_id();
+ unsigned long this_hart_id = cpuid_to_hartid_map(this_cpu_id);
unsigned long fdt_addr = internal->fdt_addr;
void *control_code_buffer = page_address(image->control_code_page);
riscv_kexec_method kexec_method = NULL;
diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c
index 0bb1854dce83..1fc075b8f764 100644
--- a/arch/riscv/kernel/perf_callchain.c
+++ b/arch/riscv/kernel/perf_callchain.c
@@ -58,10 +58,6 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
{
unsigned long fp = 0;
- /* RISC-V does not support perf in guest mode. */
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
- return;
-
fp = regs->s0;
perf_callchain_store(entry, regs->epc);
@@ -78,11 +74,5 @@ static bool fill_callchain(void *entry, unsigned long pc)
void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
- /* RISC-V does not support perf in guest mode. */
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
- pr_warn("RISC-V does not support perf in guest mode!");
- return;
- }
-
walk_stackframe(NULL, regs, fill_callchain, entry);
}
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 9c0511119bad..a89243730153 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -42,12 +42,10 @@ static int riscv_gpr_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- int ret;
struct pt_regs *regs;
regs = task_pt_regs(target);
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1);
- return ret;
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1);
}
#ifdef CONFIG_FPU
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
index 7402a417f38e..f72527fcb347 100644
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -7,6 +7,7 @@
#include <linux/init.h>
#include <linux/pm.h>
+#include <linux/reboot.h>
#include <asm/sbi.h>
#include <asm/smp.h>
@@ -15,8 +16,8 @@ unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
EXPORT_SYMBOL(sbi_spec_version);
static void (*__sbi_set_timer)(uint64_t stime) __ro_after_init;
-static int (*__sbi_send_ipi)(const unsigned long *hart_mask) __ro_after_init;
-static int (*__sbi_rfence)(int fid, const unsigned long *hart_mask,
+static int (*__sbi_send_ipi)(const struct cpumask *cpu_mask) __ro_after_init;
+static int (*__sbi_rfence)(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5) __ro_after_init;
@@ -66,6 +67,30 @@ int sbi_err_map_linux_errno(int err)
EXPORT_SYMBOL(sbi_err_map_linux_errno);
#ifdef CONFIG_RISCV_SBI_V01
+static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mask)
+{
+ unsigned long cpuid, hartid;
+ unsigned long hmask = 0;
+
+ /*
+ * There is no maximum hartid concept in RISC-V and NR_CPUS must not be
+ * associated with hartid. As SBI v0.1 is only kept for backward compatibility
+ * and will be removed in the future, there is no point in supporting hartid
+ * greater than BITS_PER_LONG (32 for RV32 and 64 for RV64). Ideally, SBI v0.2
+ * should be used for platforms with hartid greater than BITS_PER_LONG.
+ */
+ for_each_cpu(cpuid, cpu_mask) {
+ hartid = cpuid_to_hartid_map(cpuid);
+ if (hartid >= BITS_PER_LONG) {
+ pr_warn("Unable to send any request to hartid > BITS_PER_LONG for SBI v0.1\n");
+ break;
+ }
+ hmask |= 1 << hartid;
+ }
+
+ return hmask;
+}
+
/**
* sbi_console_putchar() - Writes given character to the console device.
* @ch: The data to be written to the console.
@@ -131,33 +156,44 @@ static void __sbi_set_timer_v01(uint64_t stime_value)
#endif
}
-static int __sbi_send_ipi_v01(const unsigned long *hart_mask)
+static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask)
{
- sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)hart_mask,
+ unsigned long hart_mask;
+
+ if (!cpu_mask)
+ cpu_mask = cpu_online_mask;
+ hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask);
+
+ sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)(&hart_mask),
0, 0, 0, 0, 0);
return 0;
}
-static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask,
+static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5)
{
int result = 0;
+ unsigned long hart_mask;
+
+ if (!cpu_mask)
+ cpu_mask = cpu_online_mask;
+ hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask);
/* v0.2 function IDs are equivalent to v0.1 extension IDs */
switch (fid) {
case SBI_EXT_RFENCE_REMOTE_FENCE_I:
sbi_ecall(SBI_EXT_0_1_REMOTE_FENCE_I, 0,
- (unsigned long)hart_mask, 0, 0, 0, 0, 0);
+ (unsigned long)&hart_mask, 0, 0, 0, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA, 0,
- (unsigned long)hart_mask, start, size,
+ (unsigned long)&hart_mask, start, size,
0, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID, 0,
- (unsigned long)hart_mask, start, size,
+ (unsigned long)&hart_mask, start, size,
arg4, 0, 0);
break;
default:
@@ -179,7 +215,7 @@ static void __sbi_set_timer_v01(uint64_t stime_value)
sbi_major_version(), sbi_minor_version());
}
-static int __sbi_send_ipi_v01(const unsigned long *hart_mask)
+static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask)
{
pr_warn("IPI extension is not available in SBI v%lu.%lu\n",
sbi_major_version(), sbi_minor_version());
@@ -187,7 +223,7 @@ static int __sbi_send_ipi_v01(const unsigned long *hart_mask)
return 0;
}
-static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask,
+static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5)
{
@@ -211,37 +247,33 @@ static void __sbi_set_timer_v02(uint64_t stime_value)
#endif
}
-static int __sbi_send_ipi_v02(const unsigned long *hart_mask)
+static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask)
{
- unsigned long hartid, hmask_val, hbase;
- struct cpumask tmask;
+ unsigned long hartid, cpuid, hmask = 0, hbase = 0;
struct sbiret ret = {0};
int result;
- if (!hart_mask || !(*hart_mask)) {
- riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask);
- hart_mask = cpumask_bits(&tmask);
- }
+ if (!cpu_mask)
+ cpu_mask = cpu_online_mask;
- hmask_val = 0;
- hbase = 0;
- for_each_set_bit(hartid, hart_mask, NR_CPUS) {
- if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) {
+ for_each_cpu(cpuid, cpu_mask) {
+ hartid = cpuid_to_hartid_map(cpuid);
+ if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) {
ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI,
- hmask_val, hbase, 0, 0, 0, 0);
+ hmask, hbase, 0, 0, 0, 0);
if (ret.error)
goto ecall_failed;
- hmask_val = 0;
+ hmask = 0;
hbase = 0;
}
- if (!hmask_val)
+ if (!hmask)
hbase = hartid;
- hmask_val |= 1UL << (hartid - hbase);
+ hmask |= 1UL << (hartid - hbase);
}
- if (hmask_val) {
+ if (hmask) {
ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI,
- hmask_val, hbase, 0, 0, 0, 0);
+ hmask, hbase, 0, 0, 0, 0);
if (ret.error)
goto ecall_failed;
}
@@ -251,11 +283,11 @@ static int __sbi_send_ipi_v02(const unsigned long *hart_mask)
ecall_failed:
result = sbi_err_map_linux_errno(ret.error);
pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n",
- __func__, hbase, hmask_val, result);
+ __func__, hbase, hmask, result);
return result;
}
-static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val,
+static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask,
unsigned long hbase, unsigned long start,
unsigned long size, unsigned long arg4,
unsigned long arg5)
@@ -266,31 +298,31 @@ static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val,
switch (fid) {
case SBI_EXT_RFENCE_REMOTE_FENCE_I:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, 0, 0, 0, 0);
+ ret = sbi_ecall(ext, fid, hmask, hbase, 0, 0, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, arg4, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, arg4, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
- ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
+ ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, arg4, 0);
break;
default:
@@ -302,43 +334,39 @@ static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val,
if (ret.error) {
result = sbi_err_map_linux_errno(ret.error);
pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n",
- __func__, hbase, hmask_val, result);
+ __func__, hbase, hmask, result);
}
return result;
}
-static int __sbi_rfence_v02(int fid, const unsigned long *hart_mask,
+static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5)
{
- unsigned long hmask_val, hartid, hbase;
- struct cpumask tmask;
+ unsigned long hartid, cpuid, hmask = 0, hbase = 0;
int result;
- if (!hart_mask || !(*hart_mask)) {
- riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask);
- hart_mask = cpumask_bits(&tmask);
- }
+ if (!cpu_mask)
+ cpu_mask = cpu_online_mask;
- hmask_val = 0;
- hbase = 0;
- for_each_set_bit(hartid, hart_mask, NR_CPUS) {
- if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) {
- result = __sbi_rfence_v02_call(fid, hmask_val, hbase,
+ for_each_cpu(cpuid, cpu_mask) {
+ hartid = cpuid_to_hartid_map(cpuid);
+ if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) {
+ result = __sbi_rfence_v02_call(fid, hmask, hbase,
start, size, arg4, arg5);
if (result)
return result;
- hmask_val = 0;
+ hmask = 0;
hbase = 0;
}
- if (!hmask_val)
+ if (!hmask)
hbase = hartid;
- hmask_val |= 1UL << (hartid - hbase);
+ hmask |= 1UL << (hartid - hbase);
}
- if (hmask_val) {
- result = __sbi_rfence_v02_call(fid, hmask_val, hbase,
+ if (hmask) {
+ result = __sbi_rfence_v02_call(fid, hmask, hbase,
start, size, arg4, arg5);
if (result)
return result;
@@ -360,44 +388,44 @@ void sbi_set_timer(uint64_t stime_value)
/**
* sbi_send_ipi() - Send an IPI to any hart.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
*
* Return: 0 on success, appropriate linux error code otherwise.
*/
-int sbi_send_ipi(const unsigned long *hart_mask)
+int sbi_send_ipi(const struct cpumask *cpu_mask)
{
- return __sbi_send_ipi(hart_mask);
+ return __sbi_send_ipi(cpu_mask);
}
EXPORT_SYMBOL(sbi_send_ipi);
/**
* sbi_remote_fence_i() - Execute FENCE.I instruction on given remote harts.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
*
* Return: 0 on success, appropriate linux error code otherwise.
*/
-int sbi_remote_fence_i(const unsigned long *hart_mask)
+int sbi_remote_fence_i(const struct cpumask *cpu_mask)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_FENCE_I,
- hart_mask, 0, 0, 0, 0);
+ cpu_mask, 0, 0, 0, 0);
}
EXPORT_SYMBOL(sbi_remote_fence_i);
/**
* sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote
* harts for the specified virtual address range.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the virtual address
* @size: Total size of the virtual address range.
*
* Return: 0 on success, appropriate linux error code otherwise.
*/
-int sbi_remote_sfence_vma(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
- hart_mask, start, size, 0, 0);
+ cpu_mask, start, size, 0, 0);
}
EXPORT_SYMBOL(sbi_remote_sfence_vma);
@@ -405,38 +433,38 @@ EXPORT_SYMBOL(sbi_remote_sfence_vma);
* sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given
* remote harts for a virtual address range belonging to a specific ASID.
*
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the virtual address
* @size: Total size of the virtual address range.
* @asid: The value of address space identifier (ASID).
*
* Return: 0 on success, appropriate linux error code otherwise.
*/
-int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
- hart_mask, start, size, asid, 0);
+ cpu_mask, start, size, asid, 0);
}
EXPORT_SYMBOL(sbi_remote_sfence_vma_asid);
/**
* sbi_remote_hfence_gvma() - Execute HFENCE.GVMA instructions on given remote
* harts for the specified guest physical address range.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the guest physical address
* @size: Total size of the guest physical address range.
*
* Return: None
*/
-int sbi_remote_hfence_gvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA,
- hart_mask, start, size, 0, 0);
+ cpu_mask, start, size, 0, 0);
}
EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma);
@@ -444,38 +472,38 @@ EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma);
* sbi_remote_hfence_gvma_vmid() - Execute HFENCE.GVMA instructions on given
* remote harts for a guest physical address range belonging to a specific VMID.
*
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the guest physical address
* @size: Total size of the guest physical address range.
* @vmid: The value of guest ID (VMID).
*
* Return: 0 if success, Error otherwise.
*/
-int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask,
+int sbi_remote_hfence_gvma_vmid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long vmid)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID,
- hart_mask, start, size, vmid, 0);
+ cpu_mask, start, size, vmid, 0);
}
EXPORT_SYMBOL(sbi_remote_hfence_gvma_vmid);
/**
* sbi_remote_hfence_vvma() - Execute HFENCE.VVMA instructions on given remote
* harts for the current guest virtual address range.
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the current guest virtual address
* @size: Total size of the current guest virtual address range.
*
* Return: None
*/
-int sbi_remote_hfence_vvma(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA,
- hart_mask, start, size, 0, 0);
+ cpu_mask, start, size, 0, 0);
}
EXPORT_SYMBOL(sbi_remote_hfence_vvma);
@@ -484,23 +512,49 @@ EXPORT_SYMBOL(sbi_remote_hfence_vvma);
* remote harts for current guest virtual address range belonging to a specific
* ASID.
*
- * @hart_mask: A cpu mask containing all the target harts.
+ * @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the current guest virtual address
* @size: Total size of the current guest virtual address range.
* @asid: The value of address space identifier (ASID).
*
* Return: None
*/
-int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask,
+int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID,
- hart_mask, start, size, asid, 0);
+ cpu_mask, start, size, asid, 0);
}
EXPORT_SYMBOL(sbi_remote_hfence_vvma_asid);
+static void sbi_srst_reset(unsigned long type, unsigned long reason)
+{
+ sbi_ecall(SBI_EXT_SRST, SBI_EXT_SRST_RESET, type, reason,
+ 0, 0, 0, 0);
+ pr_warn("%s: type=0x%lx reason=0x%lx failed\n",
+ __func__, type, reason);
+}
+
+static int sbi_srst_reboot(struct notifier_block *this,
+ unsigned long mode, void *cmd)
+{
+ sbi_srst_reset((mode == REBOOT_WARM || mode == REBOOT_SOFT) ?
+ SBI_SRST_RESET_TYPE_WARM_REBOOT :
+ SBI_SRST_RESET_TYPE_COLD_REBOOT,
+ SBI_SRST_RESET_REASON_NONE);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block sbi_srst_reboot_nb;
+
+static void sbi_srst_power_off(void)
+{
+ sbi_srst_reset(SBI_SRST_RESET_TYPE_SHUTDOWN,
+ SBI_SRST_RESET_REASON_NONE);
+}
+
/**
* sbi_probe_extension() - Check if an SBI extension ID is supported or not.
* @extid: The extension ID to be probed.
@@ -564,11 +618,7 @@ long sbi_get_mimpid(void)
static void sbi_send_cpumask_ipi(const struct cpumask *target)
{
- struct cpumask hartid_mask;
-
- riscv_cpuid_to_hartid_mask(target, &hartid_mask);
-
- sbi_send_ipi(cpumask_bits(&hartid_mask));
+ sbi_send_ipi(target);
}
static const struct riscv_ipi_ops sbi_ipi_ops = {
@@ -608,6 +658,14 @@ void __init sbi_init(void)
} else {
__sbi_rfence = __sbi_rfence_v01;
}
+ if ((sbi_spec_version >= sbi_mk_version(0, 3)) &&
+ (sbi_probe_extension(SBI_EXT_SRST) > 0)) {
+ pr_info("SBI SRST extension detected\n");
+ pm_power_off = sbi_srst_power_off;
+ sbi_srst_reboot_nb.notifier_call = sbi_srst_reboot;
+ sbi_srst_reboot_nb.priority = 192;
+ register_restart_handler(&sbi_srst_reboot_nb);
+ }
} else {
__sbi_set_timer = __sbi_set_timer_v01;
__sbi_send_ipi = __sbi_send_ipi_v01;
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 2f6da845c9ae..b5d30ea92292 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -59,16 +59,6 @@ int riscv_hartid_to_cpuid(int hartid)
return -ENOENT;
}
-void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out)
-{
- int cpu;
-
- cpumask_clear(out);
- for_each_cpu(cpu, in)
- cpumask_set_cpu(cpuid_to_hartid_map(cpu), out);
-}
-EXPORT_SYMBOL_GPL(riscv_cpuid_to_hartid_mask);
-
bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
{
return phys_id == cpuid_to_hartid_map(cpu);
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index bd82375db51a..622f226454d5 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -96,7 +96,7 @@ void __init setup_smp(void)
if (cpuid >= NR_CPUS) {
pr_warn("Invalid cpuid [%d] for hartid [%d]\n",
cpuid, hart);
- break;
+ continue;
}
cpuid_to_hartid_map(cpuid) = hart;
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 0fcdc0233fac..201ee206fb57 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -139,12 +139,8 @@ unsigned long __get_wchan(struct task_struct *task)
return pc;
}
-#ifdef CONFIG_STACKTRACE
-
noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
struct task_struct *task, struct pt_regs *regs)
{
walk_stackframe(task, regs, consume_entry, cookie);
}
-
-#endif /* CONFIG_STACKTRACE */
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 0daaa3e4630d..fe92e119e6a3 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -54,7 +54,7 @@ void die(struct pt_regs *regs, const char *str)
if (panic_on_oops)
panic("Fatal exception");
if (ret != NOTIFY_STOP)
- do_exit(SIGSEGV);
+ make_task_dead(SIGSEGV);
}
void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S
index f5ed08262139..75e0fa8a700a 100644
--- a/arch/riscv/kernel/vmlinux-xip.lds.S
+++ b/arch/riscv/kernel/vmlinux-xip.lds.S
@@ -45,7 +45,6 @@ SECTIONS
ENTRY_TEXT
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
- *(.fixup)
_etext = .;
}
RO_DATA(L1_CACHE_BYTES)
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index 5104f3a871e3..4e6c88aa4d87 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -4,7 +4,7 @@
* Copyright (C) 2017 SiFive
*/
-#define RO_EXCEPTION_TABLE_ALIGN 16
+#define RO_EXCEPTION_TABLE_ALIGN 4
#ifdef CONFIG_XIP_KERNEL
#include "vmlinux-xip.lds.S"
@@ -48,7 +48,6 @@ SECTIONS
ENTRY_TEXT
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
- *(.fixup)
_etext = .;
}
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 30cdd1df0098..e5c56182f48f 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -5,14 +5,10 @@
ccflags-y += -I $(srctree)/$(src)
-KVM := ../../../virt/kvm
+include $(srctree)/virt/kvm/Makefile.kvm
obj-$(CONFIG_KVM) += kvm.o
-kvm-y += $(KVM)/kvm_main.o
-kvm-y += $(KVM)/coalesced_mmio.o
-kvm-y += $(KVM)/binary_stats.o
-kvm-y += $(KVM)/eventfd.o
kvm-y += main.o
kvm-y += vm.o
kvm-y += vmid.o
@@ -23,4 +19,8 @@ kvm-y += vcpu_exit.o
kvm-y += vcpu_fp.o
kvm-y += vcpu_switch.o
kvm-y += vcpu_sbi.o
+kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o
+kvm-y += vcpu_sbi_base.o
+kvm-y += vcpu_sbi_replace.o
+kvm-y += vcpu_sbi_hsm.o
kvm-y += vcpu_timer.o
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index 421ecf4e6360..2e5ca43c8c49 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -58,6 +58,14 @@ int kvm_arch_hardware_enable(void)
void kvm_arch_hardware_disable(void)
{
+ /*
+ * After clearing the hideleg CSR, the host kernel will receive
+ * spurious interrupts if hvip CSR has pending interrupts and the
+ * corresponding enable bits in vsie CSR are asserted. To avoid it,
+ * hvip CSR and vsie CSR must be cleared before clearing hideleg CSR.
+ */
+ csr_write(CSR_VSIE, 0);
+ csr_write(CSR_HVIP, 0);
csr_write(CSR_HEDELEG, 0);
csr_write(CSR_HIDELEG, 0);
}
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index d81bae8eb55e..f80a34fbf102 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -83,43 +83,6 @@ static int stage2_level_to_page_size(u32 level, unsigned long *out_pgsize)
return 0;
}
-static int stage2_cache_topup(struct kvm_mmu_page_cache *pcache,
- int min, int max)
-{
- void *page;
-
- BUG_ON(max > KVM_MMU_PAGE_CACHE_NR_OBJS);
- if (pcache->nobjs >= min)
- return 0;
- while (pcache->nobjs < max) {
- page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
- if (!page)
- return -ENOMEM;
- pcache->objects[pcache->nobjs++] = page;
- }
-
- return 0;
-}
-
-static void stage2_cache_flush(struct kvm_mmu_page_cache *pcache)
-{
- while (pcache && pcache->nobjs)
- free_page((unsigned long)pcache->objects[--pcache->nobjs]);
-}
-
-static void *stage2_cache_alloc(struct kvm_mmu_page_cache *pcache)
-{
- void *p;
-
- if (!pcache)
- return NULL;
-
- BUG_ON(!pcache->nobjs);
- p = pcache->objects[--pcache->nobjs];
-
- return p;
-}
-
static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
pte_t **ptepp, u32 *ptep_level)
{
@@ -151,7 +114,6 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr)
{
- struct cpumask hmask;
unsigned long size = PAGE_SIZE;
struct kvm_vmid *vmid = &kvm->arch.vmid;
@@ -164,14 +126,13 @@ static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr)
* where the Guest/VM is running.
*/
preempt_disable();
- riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask);
- sbi_remote_hfence_gvma_vmid(cpumask_bits(&hmask), addr, size,
+ sbi_remote_hfence_gvma_vmid(cpu_online_mask, addr, size,
READ_ONCE(vmid->vmid));
preempt_enable();
}
static int stage2_set_pte(struct kvm *kvm, u32 level,
- struct kvm_mmu_page_cache *pcache,
+ struct kvm_mmu_memory_cache *pcache,
gpa_t addr, const pte_t *new_pte)
{
u32 current_level = stage2_pgd_levels - 1;
@@ -186,7 +147,9 @@ static int stage2_set_pte(struct kvm *kvm, u32 level,
return -EEXIST;
if (!pte_val(*ptep)) {
- next_ptep = stage2_cache_alloc(pcache);
+ if (!pcache)
+ return -ENOMEM;
+ next_ptep = kvm_mmu_memory_cache_alloc(pcache);
if (!next_ptep)
return -ENOMEM;
*ptep = pfn_pte(PFN_DOWN(__pa(next_ptep)),
@@ -209,7 +172,7 @@ static int stage2_set_pte(struct kvm *kvm, u32 level,
}
static int stage2_map_page(struct kvm *kvm,
- struct kvm_mmu_page_cache *pcache,
+ struct kvm_mmu_memory_cache *pcache,
gpa_t gpa, phys_addr_t hpa,
unsigned long page_size,
bool page_rdonly, bool page_exec)
@@ -384,7 +347,10 @@ static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
int ret = 0;
unsigned long pfn;
phys_addr_t addr, end;
- struct kvm_mmu_page_cache pcache = { 0, };
+ struct kvm_mmu_memory_cache pcache;
+
+ memset(&pcache, 0, sizeof(pcache));
+ pcache.gfp_zero = __GFP_ZERO;
end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK;
pfn = __phys_to_pfn(hpa);
@@ -395,9 +361,7 @@ static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
if (!writable)
pte = pte_wrprotect(pte);
- ret = stage2_cache_topup(&pcache,
- stage2_pgd_levels,
- KVM_MMU_PAGE_CACHE_NR_OBJS);
+ ret = kvm_mmu_topup_memory_cache(&pcache, stage2_pgd_levels);
if (ret)
goto out;
@@ -411,7 +375,7 @@ static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
}
out:
- stage2_cache_flush(&pcache);
+ kvm_mmu_free_memory_cache(&pcache);
return ret;
}
@@ -453,10 +417,15 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
+ gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
+ phys_addr_t size = slot->npages << PAGE_SHIFT;
+
+ spin_lock(&kvm->mmu_lock);
+ stage2_unmap_range(kvm, gpa, size, false);
+ spin_unlock(&kvm->mmu_lock);
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
@@ -466,18 +435,18 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
* allocated dirty_bitmap[], dirty pages will be tracked while
* the memory slot is write protected.
*/
- if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
- stage2_wp_memory_region(kvm, mem->slot);
+ if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES)
+ stage2_wp_memory_region(kvm, new->id);
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- hva_t hva = mem->userspace_addr;
- hva_t reg_end = hva + mem->memory_size;
- bool writable = !(mem->flags & KVM_MEM_READONLY);
+ hva_t hva, reg_end, size;
+ gpa_t base_gpa;
+ bool writable;
int ret = 0;
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
@@ -488,10 +457,16 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* Prevent userspace from creating a memory region outside of the GPA
* space addressable by the KVM guest GPA space.
*/
- if ((memslot->base_gfn + memslot->npages) >=
+ if ((new->base_gfn + new->npages) >=
(stage2_gpa_size >> PAGE_SHIFT))
return -EFAULT;
+ hva = new->userspace_addr;
+ size = new->npages << PAGE_SHIFT;
+ reg_end = hva + size;
+ base_gpa = new->base_gfn << PAGE_SHIFT;
+ writable = !(new->flags & KVM_MEM_READONLY);
+
mmap_read_lock(current->mm);
/*
@@ -527,15 +502,14 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
vm_end = min(reg_end, vma->vm_end);
if (vma->vm_flags & VM_PFNMAP) {
- gpa_t gpa = mem->guest_phys_addr +
- (vm_start - mem->userspace_addr);
+ gpa_t gpa = base_gpa + (vm_start - hva);
phys_addr_t pa;
pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
pa += vm_start - vma->vm_start;
/* IO region dirty page logging not allowed */
- if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+ if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
ret = -EINVAL;
goto out;
}
@@ -553,8 +527,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
spin_lock(&kvm->mmu_lock);
if (ret)
- stage2_unmap_range(kvm, mem->guest_phys_addr,
- mem->memory_size, false);
+ stage2_unmap_range(kvm, base_gpa, size, false);
spin_unlock(&kvm->mmu_lock);
out:
@@ -640,7 +613,7 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
gfn_t gfn = gpa >> PAGE_SHIFT;
struct vm_area_struct *vma;
struct kvm *kvm = vcpu->kvm;
- struct kvm_mmu_page_cache *pcache = &vcpu->arch.mmu_page_cache;
+ struct kvm_mmu_memory_cache *pcache = &vcpu->arch.mmu_page_cache;
bool logging = (memslot->dirty_bitmap &&
!(memslot->flags & KVM_MEM_READONLY)) ? true : false;
unsigned long vma_pagesize, mmu_seq;
@@ -675,8 +648,7 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
}
/* We need minimum second+third level pages */
- ret = stage2_cache_topup(pcache, stage2_pgd_levels,
- KVM_MMU_PAGE_CACHE_NR_OBJS);
+ ret = kvm_mmu_topup_memory_cache(pcache, stage2_pgd_levels);
if (ret) {
kvm_err("Failed to topup stage2 cache\n");
return ret;
@@ -725,11 +697,6 @@ out_unlock:
return ret;
}
-void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu)
-{
- stage2_cache_flush(&vcpu->arch.mmu_page_cache);
-}
-
int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm)
{
struct page *pgd_page;
@@ -800,3 +767,8 @@ unsigned long kvm_riscv_stage2_mode(void)
{
return stage2_mode >> HGATP_MODE_SHIFT;
}
+
+int kvm_riscv_stage2_gpa_bits(void)
+{
+ return stage2_gpa_bits;
+}
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index e3d3aed46184..0c5239e05721 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -53,6 +53,17 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context;
+ bool loaded;
+
+ /**
+ * The preemption should be disabled here because it races with
+ * kvm_sched_out/kvm_sched_in(called from preempt notifiers) which
+ * also calls vcpu_load/put.
+ */
+ get_cpu();
+ loaded = (vcpu->cpu != -1);
+ if (loaded)
+ kvm_arch_vcpu_put(vcpu);
memcpy(csr, reset_csr, sizeof(*csr));
@@ -64,6 +75,11 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
WRITE_ONCE(vcpu->arch.irqs_pending, 0);
WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
+
+ /* Reset the guest CSRs for hotplug usecase */
+ if (loaded)
+ kvm_arch_vcpu_load(vcpu, smp_processor_id());
+ put_cpu();
}
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
@@ -77,6 +93,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
/* Mark this VCPU never ran */
vcpu->arch.ran_atleast_once = false;
+ vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
/* Setup ISA features available to VCPU */
vcpu->arch.isa = riscv_isa_extension_base(NULL) & KVM_RISCV_ISA_ALLOWED;
@@ -100,6 +117,13 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
+ /**
+ * vcpu with id 0 is the designated boot cpu.
+ * Keep all vcpus with non-zero id in power-off state so that
+ * they can be brought up using SBI HSM extension.
+ */
+ if (vcpu->vcpu_idx != 0)
+ kvm_riscv_vcpu_power_off(vcpu);
}
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -107,8 +131,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
/* Cleanup VCPU timer */
kvm_riscv_vcpu_timer_deinit(vcpu);
- /* Flush the pages pre-allocated for Stage2 page table mappings */
- kvm_riscv_stage2_flush_cache(vcpu);
+ /* Free unused pages pre-allocated for Stage2 page table mappings */
+ kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
@@ -740,7 +764,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* Ensure we set mode to IN_GUEST_MODE after we disable
* interrupts and before the final VCPU requests check.
* See the comment in kvm_vcpu_exiting_guest_mode() and
- * Documentation/virtual/kvm/vcpu-requests.rst
+ * Documentation/virt/kvm/vcpu-requests.rst
*/
vcpu->mode = IN_GUEST_MODE;
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index 7f2d742ae4c6..571f319e995a 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -146,7 +146,7 @@ static int system_opcode_insn(struct kvm_vcpu *vcpu,
vcpu->stat.wfi_exit_stat++;
if (!kvm_arch_vcpu_runnable(vcpu)) {
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
- kvm_vcpu_block(vcpu);
+ kvm_vcpu_halt(vcpu);
vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
}
diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c
index 1b070152578f..4449a976e5a6 100644
--- a/arch/riscv/kvm/vcpu_fp.c
+++ b/arch/riscv/kvm/vcpu_fp.c
@@ -26,7 +26,7 @@ void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
cntx->sstatus |= SR_FS_OFF;
}
-void kvm_riscv_vcpu_fp_clean(struct kvm_cpu_context *cntx)
+static void kvm_riscv_vcpu_fp_clean(struct kvm_cpu_context *cntx)
{
cntx->sstatus &= ~SR_FS;
cntx->sstatus |= SR_FS_CLEAN;
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index eb3c045edf11..78aa3db76225 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/**
+/*
* Copyright (c) 2019 Western Digital Corporation or its affiliates.
*
* Authors:
@@ -9,15 +9,58 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kvm_host.h>
-#include <asm/csr.h>
#include <asm/sbi.h>
-#include <asm/kvm_vcpu_timer.h>
+#include <asm/kvm_vcpu_sbi.h>
-#define SBI_VERSION_MAJOR 0
-#define SBI_VERSION_MINOR 1
+static int kvm_linux_err_map_sbi(int err)
+{
+ switch (err) {
+ case 0:
+ return SBI_SUCCESS;
+ case -EPERM:
+ return SBI_ERR_DENIED;
+ case -EINVAL:
+ return SBI_ERR_INVALID_PARAM;
+ case -EFAULT:
+ return SBI_ERR_INVALID_ADDRESS;
+ case -EOPNOTSUPP:
+ return SBI_ERR_NOT_SUPPORTED;
+ case -EALREADY:
+ return SBI_ERR_ALREADY_AVAILABLE;
+ default:
+ return SBI_ERR_FAILURE;
+ };
+}
-static void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu,
- struct kvm_run *run)
+#ifdef CONFIG_RISCV_SBI_V01
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01;
+#else
+static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = {
+ .extid_start = -1UL,
+ .extid_end = -1UL,
+ .handler = NULL,
+};
+#endif
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor;
+
+static const struct kvm_vcpu_sbi_extension *sbi_ext[] = {
+ &vcpu_sbi_ext_v01,
+ &vcpu_sbi_ext_base,
+ &vcpu_sbi_ext_time,
+ &vcpu_sbi_ext_ipi,
+ &vcpu_sbi_ext_rfence,
+ &vcpu_sbi_ext_hsm,
+ &vcpu_sbi_ext_experimental,
+ &vcpu_sbi_ext_vendor,
+};
+
+void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
@@ -55,131 +98,73 @@ int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 0;
}
-#ifdef CONFIG_RISCV_SBI_V01
-
-static void kvm_sbi_system_shutdown(struct kvm_vcpu *vcpu,
- struct kvm_run *run, u32 type)
+const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid)
{
- int i;
- struct kvm_vcpu *tmp;
+ int i = 0;
- kvm_for_each_vcpu(i, tmp, vcpu->kvm)
- tmp->arch.power_off = true;
- kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
+ for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
+ if (sbi_ext[i]->extid_start <= extid &&
+ sbi_ext[i]->extid_end >= extid)
+ return sbi_ext[i];
+ }
- memset(&run->system_event, 0, sizeof(run->system_event));
- run->system_event.type = type;
- run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+ return NULL;
}
int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- ulong hmask;
- int i, ret = 1;
- u64 next_cycle;
- struct kvm_vcpu *rvcpu;
+ int ret = 1;
bool next_sepc = true;
- struct cpumask cm, hm;
- struct kvm *kvm = vcpu->kvm;
- struct kvm_cpu_trap utrap = { 0 };
+ bool userspace_exit = false;
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ const struct kvm_vcpu_sbi_extension *sbi_ext;
+ struct kvm_cpu_trap utrap = { 0 };
+ unsigned long out_val = 0;
+ bool ext_is_v01 = false;
- if (!cp)
- return -EINVAL;
-
- switch (cp->a7) {
- case SBI_EXT_0_1_CONSOLE_GETCHAR:
- case SBI_EXT_0_1_CONSOLE_PUTCHAR:
- /*
- * The CONSOLE_GETCHAR/CONSOLE_PUTCHAR SBI calls cannot be
- * handled in kernel so we forward these to user-space
- */
- kvm_riscv_vcpu_sbi_forward(vcpu, run);
- next_sepc = false;
- ret = 0;
- break;
- case SBI_EXT_0_1_SET_TIMER:
-#if __riscv_xlen == 32
- next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0;
-#else
- next_cycle = (u64)cp->a0;
+ sbi_ext = kvm_vcpu_sbi_find_ext(cp->a7);
+ if (sbi_ext && sbi_ext->handler) {
+#ifdef CONFIG_RISCV_SBI_V01
+ if (cp->a7 >= SBI_EXT_0_1_SET_TIMER &&
+ cp->a7 <= SBI_EXT_0_1_SHUTDOWN)
+ ext_is_v01 = true;
#endif
- kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle);
- break;
- case SBI_EXT_0_1_CLEAR_IPI:
- kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_SOFT);
- break;
- case SBI_EXT_0_1_SEND_IPI:
- if (cp->a0)
- hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
- &utrap);
- else
- hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1;
- if (utrap.scause) {
- utrap.sepc = cp->sepc;
- kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
- next_sepc = false;
- break;
- }
- for_each_set_bit(i, &hmask, BITS_PER_LONG) {
- rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
- kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_VS_SOFT);
- }
- break;
- case SBI_EXT_0_1_SHUTDOWN:
- kvm_sbi_system_shutdown(vcpu, run, KVM_SYSTEM_EVENT_SHUTDOWN);
- next_sepc = false;
- ret = 0;
- break;
- case SBI_EXT_0_1_REMOTE_FENCE_I:
- case SBI_EXT_0_1_REMOTE_SFENCE_VMA:
- case SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID:
- if (cp->a0)
- hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
- &utrap);
- else
- hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1;
- if (utrap.scause) {
- utrap.sepc = cp->sepc;
- kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
- next_sepc = false;
- break;
- }
- cpumask_clear(&cm);
- for_each_set_bit(i, &hmask, BITS_PER_LONG) {
- rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
- if (rvcpu->cpu < 0)
- continue;
- cpumask_set_cpu(rvcpu->cpu, &cm);
- }
- riscv_cpuid_to_hartid_mask(&cm, &hm);
- if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I)
- sbi_remote_fence_i(cpumask_bits(&hm));
- else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA)
- sbi_remote_hfence_vvma(cpumask_bits(&hm),
- cp->a1, cp->a2);
- else
- sbi_remote_hfence_vvma_asid(cpumask_bits(&hm),
- cp->a1, cp->a2, cp->a3);
- break;
- default:
+ ret = sbi_ext->handler(vcpu, run, &out_val, &utrap, &userspace_exit);
+ } else {
/* Return error for unsupported SBI calls */
cp->a0 = SBI_ERR_NOT_SUPPORTED;
- break;
+ goto ecall_done;
+ }
+
+ /* Handle special error cases i.e trap, exit or userspace forward */
+ if (utrap.scause) {
+ /* No need to increment sepc or exit ioctl loop */
+ ret = 1;
+ utrap.sepc = cp->sepc;
+ kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
+ next_sepc = false;
+ goto ecall_done;
}
+ /* Exit ioctl loop or Propagate the error code the guest */
+ if (userspace_exit) {
+ next_sepc = false;
+ ret = 0;
+ } else {
+ /**
+ * SBI extension handler always returns an Linux error code. Convert
+ * it to the SBI specific error code that can be propagated the SBI
+ * caller.
+ */
+ ret = kvm_linux_err_map_sbi(ret);
+ cp->a0 = ret;
+ ret = 1;
+ }
+ecall_done:
if (next_sepc)
cp->sepc += 4;
+ if (!ext_is_v01)
+ cp->a1 = out_val;
return ret;
}
-
-#else
-
-int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- kvm_riscv_vcpu_sbi_forward(vcpu, run);
- return 0;
-}
-
-#endif
diff --git a/arch/riscv/kvm/vcpu_sbi_base.c b/arch/riscv/kvm/vcpu_sbi_base.c
new file mode 100644
index 000000000000..4ecf377f483b
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_base.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_vcpu_timer.h>
+#include <asm/kvm_vcpu_sbi.h>
+
+static int kvm_sbi_ext_base_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *trap, bool *exit)
+{
+ int ret = 0;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ struct sbiret ecall_ret;
+
+ switch (cp->a6) {
+ case SBI_EXT_BASE_GET_SPEC_VERSION:
+ *out_val = (KVM_SBI_VERSION_MAJOR <<
+ SBI_SPEC_VERSION_MAJOR_SHIFT) |
+ KVM_SBI_VERSION_MINOR;
+ break;
+ case SBI_EXT_BASE_GET_IMP_ID:
+ *out_val = KVM_SBI_IMPID;
+ break;
+ case SBI_EXT_BASE_GET_IMP_VERSION:
+ *out_val = 0;
+ break;
+ case SBI_EXT_BASE_PROBE_EXT:
+ if ((cp->a0 >= SBI_EXT_EXPERIMENTAL_START &&
+ cp->a0 <= SBI_EXT_EXPERIMENTAL_END) ||
+ (cp->a0 >= SBI_EXT_VENDOR_START &&
+ cp->a0 <= SBI_EXT_VENDOR_END)) {
+ /*
+ * For experimental/vendor extensions
+ * forward it to the userspace
+ */
+ kvm_riscv_vcpu_sbi_forward(vcpu, run);
+ *exit = true;
+ } else
+ *out_val = kvm_vcpu_sbi_find_ext(cp->a0) ? 1 : 0;
+ break;
+ case SBI_EXT_BASE_GET_MVENDORID:
+ case SBI_EXT_BASE_GET_MARCHID:
+ case SBI_EXT_BASE_GET_MIMPID:
+ ecall_ret = sbi_ecall(SBI_EXT_BASE, cp->a6, 0, 0, 0, 0, 0, 0);
+ if (!ecall_ret.error)
+ *out_val = ecall_ret.value;
+ /*TODO: We are unnecessarily converting the error twice */
+ ret = sbi_err_map_linux_errno(ecall_ret.error);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base = {
+ .extid_start = SBI_EXT_BASE,
+ .extid_end = SBI_EXT_BASE,
+ .handler = kvm_sbi_ext_base_handler,
+};
+
+static int kvm_sbi_ext_forward_handler(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap,
+ bool *exit)
+{
+ /*
+ * Both SBI experimental and vendor extensions are
+ * unconditionally forwarded to userspace.
+ */
+ kvm_riscv_vcpu_sbi_forward(vcpu, run);
+ *exit = true;
+ return 0;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental = {
+ .extid_start = SBI_EXT_EXPERIMENTAL_START,
+ .extid_end = SBI_EXT_EXPERIMENTAL_END,
+ .handler = kvm_sbi_ext_forward_handler,
+};
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor = {
+ .extid_start = SBI_EXT_VENDOR_START,
+ .extid_end = SBI_EXT_VENDOR_END,
+ .handler = kvm_sbi_ext_forward_handler,
+};
diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c
new file mode 100644
index 000000000000..2e383687fa48
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_hsm.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_vcpu_sbi.h>
+
+static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *reset_cntx;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ struct kvm_vcpu *target_vcpu;
+ unsigned long target_vcpuid = cp->a0;
+
+ target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
+ if (!target_vcpu)
+ return -EINVAL;
+ if (!target_vcpu->arch.power_off)
+ return -EALREADY;
+
+ reset_cntx = &target_vcpu->arch.guest_reset_context;
+ /* start address */
+ reset_cntx->sepc = cp->a1;
+ /* target vcpu id to start */
+ reset_cntx->a0 = target_vcpuid;
+ /* private data passed from kernel */
+ reset_cntx->a1 = cp->a2;
+ kvm_make_request(KVM_REQ_VCPU_RESET, target_vcpu);
+
+ kvm_riscv_vcpu_power_on(target_vcpu);
+
+ return 0;
+}
+
+static int kvm_sbi_hsm_vcpu_stop(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.power_off)
+ return -EINVAL;
+
+ kvm_riscv_vcpu_power_off(vcpu);
+
+ return 0;
+}
+
+static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long target_vcpuid = cp->a0;
+ struct kvm_vcpu *target_vcpu;
+
+ target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
+ if (!target_vcpu)
+ return -EINVAL;
+ if (!target_vcpu->arch.power_off)
+ return SBI_HSM_HART_STATUS_STARTED;
+ else
+ return SBI_HSM_HART_STATUS_STOPPED;
+}
+
+static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap,
+ bool *exit)
+{
+ int ret = 0;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long funcid = cp->a6;
+
+ switch (funcid) {
+ case SBI_EXT_HSM_HART_START:
+ mutex_lock(&kvm->lock);
+ ret = kvm_sbi_hsm_vcpu_start(vcpu);
+ mutex_unlock(&kvm->lock);
+ break;
+ case SBI_EXT_HSM_HART_STOP:
+ ret = kvm_sbi_hsm_vcpu_stop(vcpu);
+ break;
+ case SBI_EXT_HSM_HART_STATUS:
+ ret = kvm_sbi_hsm_vcpu_get_status(vcpu);
+ if (ret >= 0) {
+ *out_val = ret;
+ ret = 0;
+ }
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm = {
+ .extid_start = SBI_EXT_HSM,
+ .extid_end = SBI_EXT_HSM,
+ .handler = kvm_sbi_ext_hsm_handler,
+};
diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c
new file mode 100644
index 000000000000..1bc0608a5bfd
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_replace.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_vcpu_timer.h>
+#include <asm/kvm_vcpu_sbi.h>
+
+static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap, bool *exit)
+{
+ int ret = 0;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ u64 next_cycle;
+
+ if (cp->a6 != SBI_EXT_TIME_SET_TIMER)
+ return -EINVAL;
+
+#if __riscv_xlen == 32
+ next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0;
+#else
+ next_cycle = (u64)cp->a0;
+#endif
+ kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle);
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time = {
+ .extid_start = SBI_EXT_TIME,
+ .extid_end = SBI_EXT_TIME,
+ .handler = kvm_sbi_ext_time_handler,
+};
+
+static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap, bool *exit)
+{
+ int ret = 0;
+ unsigned long i;
+ struct kvm_vcpu *tmp;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long hmask = cp->a0;
+ unsigned long hbase = cp->a1;
+
+ if (cp->a6 != SBI_EXT_IPI_SEND_IPI)
+ return -EINVAL;
+
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+ if (hbase != -1UL) {
+ if (tmp->vcpu_id < hbase)
+ continue;
+ if (!(hmask & (1UL << (tmp->vcpu_id - hbase))))
+ continue;
+ }
+ ret = kvm_riscv_vcpu_set_interrupt(tmp, IRQ_VS_SOFT);
+ if (ret < 0)
+ break;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi = {
+ .extid_start = SBI_EXT_IPI,
+ .extid_end = SBI_EXT_IPI,
+ .handler = kvm_sbi_ext_ipi_handler,
+};
+
+static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap, bool *exit)
+{
+ int ret = 0;
+ unsigned long i;
+ struct cpumask cm;
+ struct kvm_vcpu *tmp;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long hmask = cp->a0;
+ unsigned long hbase = cp->a1;
+ unsigned long funcid = cp->a6;
+
+ cpumask_clear(&cm);
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+ if (hbase != -1UL) {
+ if (tmp->vcpu_id < hbase)
+ continue;
+ if (!(hmask & (1UL << (tmp->vcpu_id - hbase))))
+ continue;
+ }
+ if (tmp->cpu < 0)
+ continue;
+ cpumask_set_cpu(tmp->cpu, &cm);
+ }
+
+ switch (funcid) {
+ case SBI_EXT_RFENCE_REMOTE_FENCE_I:
+ ret = sbi_remote_fence_i(&cm);
+ break;
+ case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
+ ret = sbi_remote_hfence_vvma(&cm, cp->a2, cp->a3);
+ break;
+ case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
+ ret = sbi_remote_hfence_vvma_asid(&cm, cp->a2,
+ cp->a3, cp->a4);
+ break;
+ case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
+ case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID:
+ case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA:
+ case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
+ /* TODO: implement for nested hypervisor case */
+ default:
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence = {
+ .extid_start = SBI_EXT_RFENCE,
+ .extid_end = SBI_EXT_RFENCE,
+ .handler = kvm_sbi_ext_rfence_handler,
+};
diff --git a/arch/riscv/kvm/vcpu_sbi_v01.c b/arch/riscv/kvm/vcpu_sbi_v01.c
new file mode 100644
index 000000000000..07e2de14433a
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_v01.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_vcpu_timer.h>
+#include <asm/kvm_vcpu_sbi.h>
+
+static void kvm_sbi_system_shutdown(struct kvm_vcpu *vcpu,
+ struct kvm_run *run, u32 type)
+{
+ unsigned long i;
+ struct kvm_vcpu *tmp;
+
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm)
+ tmp->arch.power_off = true;
+ kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
+
+ memset(&run->system_event, 0, sizeof(run->system_event));
+ run->system_event.type = type;
+ run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+}
+
+static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap,
+ bool *exit)
+{
+ ulong hmask;
+ int i, ret = 0;
+ u64 next_cycle;
+ struct kvm_vcpu *rvcpu;
+ struct cpumask cm;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+
+ switch (cp->a7) {
+ case SBI_EXT_0_1_CONSOLE_GETCHAR:
+ case SBI_EXT_0_1_CONSOLE_PUTCHAR:
+ /*
+ * The CONSOLE_GETCHAR/CONSOLE_PUTCHAR SBI calls cannot be
+ * handled in kernel so we forward these to user-space
+ */
+ kvm_riscv_vcpu_sbi_forward(vcpu, run);
+ *exit = true;
+ break;
+ case SBI_EXT_0_1_SET_TIMER:
+#if __riscv_xlen == 32
+ next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0;
+#else
+ next_cycle = (u64)cp->a0;
+#endif
+ ret = kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle);
+ break;
+ case SBI_EXT_0_1_CLEAR_IPI:
+ ret = kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_SOFT);
+ break;
+ case SBI_EXT_0_1_SEND_IPI:
+ if (cp->a0)
+ hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
+ utrap);
+ else
+ hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1;
+ if (utrap->scause)
+ break;
+
+ for_each_set_bit(i, &hmask, BITS_PER_LONG) {
+ rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
+ ret = kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_VS_SOFT);
+ if (ret < 0)
+ break;
+ }
+ break;
+ case SBI_EXT_0_1_SHUTDOWN:
+ kvm_sbi_system_shutdown(vcpu, run, KVM_SYSTEM_EVENT_SHUTDOWN);
+ *exit = true;
+ break;
+ case SBI_EXT_0_1_REMOTE_FENCE_I:
+ case SBI_EXT_0_1_REMOTE_SFENCE_VMA:
+ case SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID:
+ if (cp->a0)
+ hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
+ utrap);
+ else
+ hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1;
+ if (utrap->scause)
+ break;
+
+ cpumask_clear(&cm);
+ for_each_set_bit(i, &hmask, BITS_PER_LONG) {
+ rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
+ if (rvcpu->cpu < 0)
+ continue;
+ cpumask_set_cpu(rvcpu->cpu, &cm);
+ }
+ if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I)
+ ret = sbi_remote_fence_i(&cm);
+ else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA)
+ ret = sbi_remote_hfence_vvma(&cm, cp->a1, cp->a2);
+ else
+ ret = sbi_remote_hfence_vvma_asid(&cm, cp->a1, cp->a2, cp->a3);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ };
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = {
+ .extid_start = SBI_EXT_0_1_SET_TIMER,
+ .extid_end = SBI_EXT_0_1_SHUTDOWN,
+ .handler = kvm_sbi_ext_v01_handler,
+};
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index 26399df15b63..c768f75279ef 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -46,15 +46,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
- int i;
-
- for (i = 0; i < KVM_MAX_VCPUS; ++i) {
- if (kvm->vcpus[i]) {
- kvm_vcpu_destroy(kvm->vcpus[i]);
- kvm->vcpus[i] = NULL;
- }
- }
- atomic_set(&kvm->online_vcpus, 0);
+ kvm_destroy_vcpus(kvm);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -74,7 +66,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = 1;
break;
case KVM_CAP_NR_VCPUS:
- r = num_online_cpus();
+ r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
break;
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
@@ -82,6 +74,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_NR_MEMSLOTS:
r = KVM_USER_MEM_SLOTS;
break;
+ case KVM_CAP_VM_GPA_BITS:
+ r = kvm_riscv_stage2_gpa_bits();
+ break;
default:
r = 0;
break;
diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
index 2c6253b293bc..2fa4f7b1813d 100644
--- a/arch/riscv/kvm/vmid.c
+++ b/arch/riscv/kvm/vmid.c
@@ -65,9 +65,8 @@ bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid)
void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *v;
- struct cpumask hmask;
struct kvm_vmid *vmid = &vcpu->kvm->arch.vmid;
if (!kvm_riscv_stage2_vmid_ver_changed(vmid))
@@ -102,8 +101,7 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
* running, we force VM exits on all host CPUs using IPI and
* flush all Guest TLBs.
*/
- riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask);
- sbi_remote_hfence_gvma(cpumask_bits(&hmask), 0, 0);
+ sbi_remote_hfence_gvma(cpu_online_mask, 0, 0);
}
vmid->vmid = vmid_next;
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index 63bc691cff91..8c475f4da308 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -1,15 +1,13 @@
#include <linux/linkage.h>
#include <asm-generic/export.h>
#include <asm/asm.h>
+#include <asm/asm-extable.h>
#include <asm/csr.h>
.macro fixup op reg addr lbl
100:
\op \reg, \addr
- .section __ex_table,"a"
- .balign RISCV_SZPTR
- RISCV_PTR 100b, \lbl
- .previous
+ _asm_extable 100b, \lbl
.endm
ENTRY(__asm_copy_to_user)
@@ -173,6 +171,13 @@ ENTRY(__asm_copy_from_user)
csrc CSR_STATUS, t6
li a0, 0
ret
+
+ /* Exception fixup code */
+10:
+ /* Disable access to user memory */
+ csrs CSR_STATUS, t6
+ mv a0, t5
+ ret
ENDPROC(__asm_copy_to_user)
ENDPROC(__asm_copy_from_user)
EXPORT_SYMBOL(__asm_copy_to_user)
@@ -218,19 +223,12 @@ ENTRY(__clear_user)
addi a0, a0, 1
bltu a0, a3, 5b
j 3b
-ENDPROC(__clear_user)
-EXPORT_SYMBOL(__clear_user)
- .section .fixup,"ax"
- .balign 4
- /* Fixup code for __copy_user(10) and __clear_user(11) */
-10:
- /* Disable access to user memory */
- csrs CSR_STATUS, t6
- mv a0, t5
- ret
+ /* Exception fixup code */
11:
+ /* Disable access to user memory */
csrs CSR_STATUS, t6
mv a0, a1
ret
- .previous
+ENDPROC(__clear_user)
+EXPORT_SYMBOL(__clear_user)
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index 89f81067e09e..6cb7d96ad9c7 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -67,10 +67,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
*/
smp_mb();
} else if (IS_ENABLED(CONFIG_RISCV_SBI)) {
- cpumask_t hartid_mask;
-
- riscv_cpuid_to_hartid_mask(&others, &hartid_mask);
- sbi_remote_fence_i(cpumask_bits(&hartid_mask));
+ sbi_remote_fence_i(&others);
} else {
on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1);
}
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index ea54cc0c9106..7acbfbd14557 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -192,7 +192,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu)
switch_mm_fast:
csr_write(CSR_SATP, virt_to_pfn(mm->pgd) |
((cntx & asid_mask) << SATP_ASID_SHIFT) |
- SATP_MODE);
+ satp_mode);
if (need_flush_tlb)
local_flush_tlb_all();
@@ -201,7 +201,7 @@ switch_mm_fast:
static void set_mm_noasid(struct mm_struct *mm)
{
/* Switch the page table and blindly nuke entire local TLB */
- csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | SATP_MODE);
+ csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | satp_mode);
local_flush_tlb_all();
}
diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c
index ddb7d3b99e89..05978f78579f 100644
--- a/arch/riscv/mm/extable.c
+++ b/arch/riscv/mm/extable.c
@@ -7,27 +7,65 @@
*/
+#include <linux/bitfield.h>
#include <linux/extable.h>
#include <linux/module.h>
#include <linux/uaccess.h>
+#include <asm/asm-extable.h>
+#include <asm/ptrace.h>
-#if defined(CONFIG_BPF_JIT) && defined(CONFIG_ARCH_RV64I)
-int rv_bpf_fixup_exception(const struct exception_table_entry *ex, struct pt_regs *regs);
-#endif
+static inline unsigned long
+get_ex_fixup(const struct exception_table_entry *ex)
+{
+ return ((unsigned long)&ex->fixup + ex->fixup);
+}
+
+static bool ex_handler_fixup(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
+{
+ regs->epc = get_ex_fixup(ex);
+ return true;
+}
+
+static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset,
+ unsigned long val)
+{
+ if (unlikely(offset > MAX_REG_OFFSET))
+ return;
+
+ if (!offset)
+ *(unsigned long *)((unsigned long)regs + offset) = val;
+}
+
+static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
+{
+ int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
+ int reg_zero = FIELD_GET(EX_DATA_REG_ZERO, ex->data);
+
+ regs_set_gpr(regs, reg_err, -EFAULT);
+ regs_set_gpr(regs, reg_zero, 0);
+
+ regs->epc = get_ex_fixup(ex);
+ return true;
+}
-int fixup_exception(struct pt_regs *regs)
+bool fixup_exception(struct pt_regs *regs)
{
- const struct exception_table_entry *fixup;
+ const struct exception_table_entry *ex;
- fixup = search_exception_tables(regs->epc);
- if (!fixup)
- return 0;
+ ex = search_exception_tables(regs->epc);
+ if (!ex)
+ return false;
-#if defined(CONFIG_BPF_JIT) && defined(CONFIG_ARCH_RV64I)
- if (regs->epc >= BPF_JIT_REGION_START && regs->epc < BPF_JIT_REGION_END)
- return rv_bpf_fixup_exception(fixup, regs);
-#endif
+ switch (ex->type) {
+ case EX_TYPE_FIXUP:
+ return ex_handler_fixup(ex, regs);
+ case EX_TYPE_BPF:
+ return ex_handler_bpf(ex, regs);
+ case EX_TYPE_UACCESS_ERR_ZERO:
+ return ex_handler_uaccess_err_zero(ex, regs);
+ }
- regs->epc = fixup->fixup;
- return 1;
+ BUG();
}
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index aa08dd2f8fae..4e9efbe46d5f 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -31,7 +31,7 @@ static void die_kernel_fault(const char *msg, unsigned long addr,
bust_spinlocks(0);
die(regs, "Oops");
- do_exit(SIGKILL);
+ make_task_dead(SIGKILL);
}
static inline void no_context(struct pt_regs *regs, unsigned long addr)
@@ -235,7 +235,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
* only copy the information from the master page table,
* nothing more.
*/
- if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) {
+ if (unlikely((addr >= VMALLOC_START) && (addr < VMALLOC_END))) {
vmalloc_fault(regs, code, addr);
return;
}
@@ -330,7 +330,7 @@ good_area:
if (fault_signal_pending(fault, regs))
return;
- if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) {
+ if (unlikely(fault & VM_FAULT_RETRY)) {
flags |= FAULT_FLAG_TRIED;
/*
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 24b2b8044602..cf4d018b7d66 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -37,13 +37,19 @@ EXPORT_SYMBOL(kernel_map);
#define kernel_map (*(struct kernel_mapping *)XIP_FIXUP(&kernel_map))
#endif
+#ifdef CONFIG_64BIT
+u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_48 : SATP_MODE_39;
+#else
+u64 satp_mode = SATP_MODE_32;
+#endif
+EXPORT_SYMBOL(satp_mode);
+
+bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
+EXPORT_SYMBOL(pgtable_l4_enabled);
+
phys_addr_t phys_ram_base __ro_after_init;
EXPORT_SYMBOL(phys_ram_base);
-#ifdef CONFIG_XIP_KERNEL
-extern char _xiprom[], _exiprom[], __data_loc;
-#endif
-
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
__page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
@@ -53,15 +59,6 @@ extern char _start[];
void *_dtb_early_va __initdata;
uintptr_t _dtb_early_pa __initdata;
-struct pt_alloc_ops {
- pte_t *(*get_pte_virt)(phys_addr_t pa);
- phys_addr_t (*alloc_pte)(uintptr_t va);
-#ifndef __PAGETABLE_PMD_FOLDED
- pmd_t *(*get_pmd_virt)(phys_addr_t pa);
- phys_addr_t (*alloc_pmd)(uintptr_t va);
-#endif
-};
-
static phys_addr_t dma32_phys_limit __initdata;
static void __init zone_sizes_init(void)
@@ -102,10 +99,14 @@ static void __init print_vm_layout(void)
(unsigned long)VMALLOC_END);
print_mlm("lowmem", (unsigned long)PAGE_OFFSET,
(unsigned long)high_memory);
-#ifdef CONFIG_64BIT
- print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR,
- (unsigned long)ADDRESS_SPACE_END);
+ if (IS_ENABLED(CONFIG_64BIT)) {
+#ifdef CONFIG_KASAN
+ print_mlm("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END);
#endif
+
+ print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR,
+ (unsigned long)ADDRESS_SPACE_END);
+ }
}
#else
static void print_vm_layout(void) { }
@@ -130,18 +131,8 @@ void __init mem_init(void)
print_vm_layout();
}
-/*
- * The default maximal physical memory size is -PAGE_OFFSET for 32-bit kernel,
- * whereas for 64-bit kernel, the end of the virtual address space is occupied
- * by the modules/BPF/kernel mappings which reduces the available size of the
- * linear mapping.
- * Limit the memory size via mem.
- */
-#ifdef CONFIG_64BIT
-static phys_addr_t memory_limit = -PAGE_OFFSET - SZ_4G;
-#else
-static phys_addr_t memory_limit = -PAGE_OFFSET;
-#endif
+/* Limit the memory size via mem. */
+static phys_addr_t memory_limit;
static int __init early_mem(char *p)
{
@@ -162,35 +153,31 @@ early_param("mem", early_mem);
static void __init setup_bootmem(void)
{
phys_addr_t vmlinux_end = __pa_symbol(&_end);
- phys_addr_t vmlinux_start = __pa_symbol(&_start);
- phys_addr_t __maybe_unused max_mapped_addr;
- phys_addr_t phys_ram_end;
+ phys_addr_t max_mapped_addr;
+ phys_addr_t phys_ram_end, vmlinux_start;
-#ifdef CONFIG_XIP_KERNEL
- vmlinux_start = __pa_symbol(&_sdata);
-#endif
+ if (IS_ENABLED(CONFIG_XIP_KERNEL))
+ vmlinux_start = __pa_symbol(&_sdata);
+ else
+ vmlinux_start = __pa_symbol(&_start);
memblock_enforce_memory_limit(memory_limit);
/*
- * Reserve from the start of the kernel to the end of the kernel
- */
-#if defined(CONFIG_64BIT) && defined(CONFIG_STRICT_KERNEL_RWX)
- /*
* Make sure we align the reservation on PMD_SIZE since we will
* map the kernel in the linear mapping as read-only: we do not want
* any allocation to happen between _end and the next pmd aligned page.
*/
- vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK;
-#endif
+ if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
+ vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK;
+ /*
+ * Reserve from the start of the kernel to the end of the kernel
+ */
memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
-
phys_ram_end = memblock_end_of_DRAM();
-#ifndef CONFIG_64BIT
-#ifndef CONFIG_XIP_KERNEL
- phys_ram_base = memblock_start_of_DRAM();
-#endif
+ if (!IS_ENABLED(CONFIG_XIP_KERNEL))
+ phys_ram_base = memblock_start_of_DRAM();
/*
* memblock allocator is not aware of the fact that last 4K bytes of
* the addressable memory can not be mapped because of IS_ERR_VALUE
@@ -200,10 +187,11 @@ static void __init setup_bootmem(void)
* address space is occupied by the kernel mapping then this check must
* be done as soon as the kernel mapping base address is determined.
*/
- max_mapped_addr = __pa(~(ulong)0);
- if (max_mapped_addr == (phys_ram_end - 1))
- memblock_set_current_limit(max_mapped_addr - 4096);
-#endif
+ if (!IS_ENABLED(CONFIG_64BIT)) {
+ max_mapped_addr = __pa(~(ulong)0);
+ if (max_mapped_addr == (phys_ram_end - 1))
+ memblock_set_current_limit(max_mapped_addr - 4096);
+ }
min_low_pfn = PFN_UP(phys_ram_base);
max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end);
@@ -229,13 +217,7 @@ static void __init setup_bootmem(void)
}
#ifdef CONFIG_MMU
-static struct pt_alloc_ops _pt_ops __initdata;
-
-#ifdef CONFIG_XIP_KERNEL
-#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&_pt_ops))
-#else
-#define pt_ops _pt_ops
-#endif
+struct pt_alloc_ops pt_ops __initdata;
unsigned long riscv_pfn_base __ro_after_init;
EXPORT_SYMBOL(riscv_pfn_base);
@@ -245,9 +227,11 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
#ifdef CONFIG_XIP_KERNEL
+#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
#define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
#define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte))
#define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir))
@@ -333,6 +317,16 @@ static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
#define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd))
#endif /* CONFIG_XIP_KERNEL */
+static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss;
+static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss;
+static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
+
+#ifdef CONFIG_XIP_KERNEL
+#define trampoline_pud ((pud_t *)XIP_FIXUP(trampoline_pud))
+#define fixmap_pud ((pud_t *)XIP_FIXUP(fixmap_pud))
+#define early_pud ((pud_t *)XIP_FIXUP(early_pud))
+#endif /* CONFIG_XIP_KERNEL */
+
static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)
{
/* Before MMU is enabled */
@@ -352,7 +346,7 @@ static pmd_t *__init get_pmd_virt_late(phys_addr_t pa)
static phys_addr_t __init alloc_pmd_early(uintptr_t va)
{
- BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
+ BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT);
return (uintptr_t)early_pmd;
}
@@ -367,7 +361,8 @@ static phys_addr_t __init alloc_pmd_late(uintptr_t va)
unsigned long vaddr;
vaddr = __get_free_page(GFP_KERNEL);
- BUG_ON(!vaddr);
+ BUG_ON(!vaddr || !pgtable_pmd_page_ctor(virt_to_page(vaddr)));
+
return __pa(vaddr);
}
@@ -398,21 +393,97 @@ static void __init create_pmd_mapping(pmd_t *pmdp,
create_pte_mapping(ptep, va, pa, sz, prot);
}
-#define pgd_next_t pmd_t
-#define alloc_pgd_next(__va) pt_ops.alloc_pmd(__va)
-#define get_pgd_next_virt(__pa) pt_ops.get_pmd_virt(__pa)
+static pud_t *__init get_pud_virt_early(phys_addr_t pa)
+{
+ return (pud_t *)((uintptr_t)pa);
+}
+
+static pud_t *__init get_pud_virt_fixmap(phys_addr_t pa)
+{
+ clear_fixmap(FIX_PUD);
+ return (pud_t *)set_fixmap_offset(FIX_PUD, pa);
+}
+
+static pud_t *__init get_pud_virt_late(phys_addr_t pa)
+{
+ return (pud_t *)__va(pa);
+}
+
+static phys_addr_t __init alloc_pud_early(uintptr_t va)
+{
+ /* Only one PUD is available for early mapping */
+ BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
+
+ return (uintptr_t)early_pud;
+}
+
+static phys_addr_t __init alloc_pud_fixmap(uintptr_t va)
+{
+ return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+}
+
+static phys_addr_t alloc_pud_late(uintptr_t va)
+{
+ unsigned long vaddr;
+
+ vaddr = __get_free_page(GFP_KERNEL);
+ BUG_ON(!vaddr);
+ return __pa(vaddr);
+}
+
+static void __init create_pud_mapping(pud_t *pudp,
+ uintptr_t va, phys_addr_t pa,
+ phys_addr_t sz, pgprot_t prot)
+{
+ pmd_t *nextp;
+ phys_addr_t next_phys;
+ uintptr_t pud_index = pud_index(va);
+
+ if (sz == PUD_SIZE) {
+ if (pud_val(pudp[pud_index]) == 0)
+ pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot);
+ return;
+ }
+
+ if (pud_val(pudp[pud_index]) == 0) {
+ next_phys = pt_ops.alloc_pmd(va);
+ pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE);
+ nextp = pt_ops.get_pmd_virt(next_phys);
+ memset(nextp, 0, PAGE_SIZE);
+ } else {
+ next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index]));
+ nextp = pt_ops.get_pmd_virt(next_phys);
+ }
+
+ create_pmd_mapping(nextp, va, pa, sz, prot);
+}
+
+#define pgd_next_t pud_t
+#define alloc_pgd_next(__va) (pgtable_l4_enabled ? \
+ pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va))
+#define get_pgd_next_virt(__pa) (pgtable_l4_enabled ? \
+ pt_ops.get_pud_virt(__pa) : (pgd_next_t *)pt_ops.get_pmd_virt(__pa))
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
- create_pmd_mapping(__nextp, __va, __pa, __sz, __prot)
-#define fixmap_pgd_next fixmap_pmd
+ (pgtable_l4_enabled ? \
+ create_pud_mapping(__nextp, __va, __pa, __sz, __prot) : \
+ create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot))
+#define fixmap_pgd_next (pgtable_l4_enabled ? \
+ (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)
+#define trampoline_pgd_next (pgtable_l4_enabled ? \
+ (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)
+#define early_dtb_pgd_next (pgtable_l4_enabled ? \
+ (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)
#else
#define pgd_next_t pte_t
#define alloc_pgd_next(__va) pt_ops.alloc_pte(__va)
#define get_pgd_next_virt(__pa) pt_ops.get_pte_virt(__pa)
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
-#define fixmap_pgd_next fixmap_pte
+#define fixmap_pgd_next ((uintptr_t)fixmap_pte)
+#define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd)
+#define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot)
#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot)
-#endif
+#endif /* __PAGETABLE_PMD_FOLDED */
void __init create_pgd_mapping(pgd_t *pgdp,
uintptr_t va, phys_addr_t pa,
@@ -451,6 +522,8 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
}
#ifdef CONFIG_XIP_KERNEL
+extern char _xiprom[], _exiprom[], __data_loc;
+
/* called from head.S with MMU off */
asmlinkage void __init __copy_data(void)
{
@@ -499,6 +572,57 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
}
#endif /* CONFIG_STRICT_KERNEL_RWX */
+#ifdef CONFIG_64BIT
+static void __init disable_pgtable_l4(void)
+{
+ pgtable_l4_enabled = false;
+ kernel_map.page_offset = PAGE_OFFSET_L3;
+ satp_mode = SATP_MODE_39;
+}
+
+/*
+ * There is a simple way to determine if 4-level is supported by the
+ * underlying hardware: establish 1:1 mapping in 4-level page table mode
+ * then read SATP to see if the configuration was taken into account
+ * meaning sv48 is supported.
+ */
+static __init void set_satp_mode(void)
+{
+ u64 identity_satp, hw_satp;
+ uintptr_t set_satp_mode_pmd;
+
+ set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
+ create_pgd_mapping(early_pg_dir,
+ set_satp_mode_pmd, (uintptr_t)early_pud,
+ PGDIR_SIZE, PAGE_TABLE);
+ create_pud_mapping(early_pud,
+ set_satp_mode_pmd, (uintptr_t)early_pmd,
+ PUD_SIZE, PAGE_TABLE);
+ /* Handle the case where set_satp_mode straddles 2 PMDs */
+ create_pmd_mapping(early_pmd,
+ set_satp_mode_pmd, set_satp_mode_pmd,
+ PMD_SIZE, PAGE_KERNEL_EXEC);
+ create_pmd_mapping(early_pmd,
+ set_satp_mode_pmd + PMD_SIZE,
+ set_satp_mode_pmd + PMD_SIZE,
+ PMD_SIZE, PAGE_KERNEL_EXEC);
+
+ identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode;
+
+ local_flush_tlb_all();
+ csr_write(CSR_SATP, identity_satp);
+ hw_satp = csr_swap(CSR_SATP, 0ULL);
+ local_flush_tlb_all();
+
+ if (hw_satp != identity_satp)
+ disable_pgtable_l4();
+
+ memset(early_pg_dir, 0, PAGE_SIZE);
+ memset(early_pud, 0, PAGE_SIZE);
+ memset(early_pmd, 0, PAGE_SIZE);
+}
+#endif
+
/*
* setup_vm() is called from head.S with MMU-off.
*
@@ -563,10 +687,15 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);
create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
- IS_ENABLED(CONFIG_64BIT) ? (uintptr_t)early_dtb_pmd : pa,
+ IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa,
PGDIR_SIZE,
IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
+ if (pgtable_l4_enabled) {
+ create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
+ (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
+ }
+
if (IS_ENABLED(CONFIG_64BIT)) {
create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
pa, PMD_SIZE, PAGE_KERNEL);
@@ -588,11 +717,64 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
dtb_early_pa = dtb_pa;
}
+/*
+ * MMU is not enabled, the page tables are allocated directly using
+ * early_pmd/pud/p4d and the address returned is the physical one.
+ */
+void __init pt_ops_set_early(void)
+{
+ pt_ops.alloc_pte = alloc_pte_early;
+ pt_ops.get_pte_virt = get_pte_virt_early;
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = alloc_pmd_early;
+ pt_ops.get_pmd_virt = get_pmd_virt_early;
+ pt_ops.alloc_pud = alloc_pud_early;
+ pt_ops.get_pud_virt = get_pud_virt_early;
+#endif
+}
+
+/*
+ * MMU is enabled but page table setup is not complete yet.
+ * fixmap page table alloc functions must be used as a means to temporarily
+ * map the allocated physical pages since the linear mapping does not exist yet.
+ *
+ * Note that this is called with MMU disabled, hence kernel_mapping_pa_to_va,
+ * but it will be used as described above.
+ */
+void __init pt_ops_set_fixmap(void)
+{
+ pt_ops.alloc_pte = kernel_mapping_pa_to_va((uintptr_t)alloc_pte_fixmap);
+ pt_ops.get_pte_virt = kernel_mapping_pa_to_va((uintptr_t)get_pte_virt_fixmap);
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = kernel_mapping_pa_to_va((uintptr_t)alloc_pmd_fixmap);
+ pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap);
+ pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap);
+ pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap);
+#endif
+}
+
+/*
+ * MMU is enabled and page table setup is complete, so from now, we can use
+ * generic page allocation functions to setup page table.
+ */
+void __init pt_ops_set_late(void)
+{
+ pt_ops.alloc_pte = alloc_pte_late;
+ pt_ops.get_pte_virt = get_pte_virt_late;
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = alloc_pmd_late;
+ pt_ops.get_pmd_virt = get_pmd_virt_late;
+ pt_ops.alloc_pud = alloc_pud_late;
+ pt_ops.get_pud_virt = get_pud_virt_late;
+#endif
+}
+
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd;
kernel_map.virt_addr = KERNEL_LINK_ADDR;
+ kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
#ifdef CONFIG_XIP_KERNEL
kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR;
@@ -607,11 +789,24 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
kernel_map.phys_addr = (uintptr_t)(&_start);
kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr;
#endif
+
+#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
+ set_satp_mode();
+#endif
+
kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr;
kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr);
+ /*
+ * The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit
+ * kernel, whereas for 64-bit kernel, the end of the virtual address
+ * space is occupied by the modules/BPF/kernel mappings which reduces
+ * the available size of the linear mapping.
+ */
+ memory_limit = KERN_VIRT_SIZE - (IS_ENABLED(CONFIG_64BIT) ? SZ_4G : 0);
+
/* Sanity check alignment and size */
BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0);
@@ -624,23 +819,25 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K);
#endif
- pt_ops.alloc_pte = alloc_pte_early;
- pt_ops.get_pte_virt = get_pte_virt_early;
-#ifndef __PAGETABLE_PMD_FOLDED
- pt_ops.alloc_pmd = alloc_pmd_early;
- pt_ops.get_pmd_virt = get_pmd_virt_early;
-#endif
+ pt_ops_set_early();
+
/* Setup early PGD for fixmap */
create_pgd_mapping(early_pg_dir, FIXADDR_START,
- (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
+ fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
#ifndef __PAGETABLE_PMD_FOLDED
- /* Setup fixmap PMD */
+ /* Setup fixmap PUD and PMD */
+ if (pgtable_l4_enabled)
+ create_pud_mapping(fixmap_pud, FIXADDR_START,
+ (uintptr_t)fixmap_pmd, PUD_SIZE, PAGE_TABLE);
create_pmd_mapping(fixmap_pmd, FIXADDR_START,
(uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
/* Setup trampoline PGD and PMD */
create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
- (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE);
+ trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
+ if (pgtable_l4_enabled)
+ create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
+ (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
#ifdef CONFIG_XIP_KERNEL
create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr,
kernel_map.xiprom, PMD_SIZE, PAGE_KERNEL_EXEC);
@@ -668,7 +865,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
* Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
* range can not span multiple pmds.
*/
- BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+ BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
!= (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
#ifndef __PAGETABLE_PMD_FOLDED
@@ -693,6 +890,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN);
}
#endif
+
+ pt_ops_set_fixmap();
}
static void __init setup_vm_final(void)
@@ -701,16 +900,6 @@ static void __init setup_vm_final(void)
phys_addr_t pa, start, end;
u64 i;
- /**
- * MMU is enabled at this point. But page table setup is not complete yet.
- * fixmap page table alloc functions should be used at this point
- */
- pt_ops.alloc_pte = alloc_pte_fixmap;
- pt_ops.get_pte_virt = get_pte_virt_fixmap;
-#ifndef __PAGETABLE_PMD_FOLDED
- pt_ops.alloc_pmd = alloc_pmd_fixmap;
- pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
-#endif
/* Setup swapper PGD for fixmap */
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
__pa_symbol(fixmap_pgd_next),
@@ -735,26 +924,24 @@ static void __init setup_vm_final(void)
}
}
-#ifdef CONFIG_64BIT
/* Map the kernel */
- create_kernel_page_table(swapper_pg_dir, false);
+ if (IS_ENABLED(CONFIG_64BIT))
+ create_kernel_page_table(swapper_pg_dir, false);
+
+#ifdef CONFIG_KASAN
+ kasan_swapper_init();
#endif
/* Clear fixmap PTE and PMD mappings */
clear_fixmap(FIX_PTE);
clear_fixmap(FIX_PMD);
+ clear_fixmap(FIX_PUD);
/* Move to swapper page table */
- csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
+ csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode);
local_flush_tlb_all();
- /* generic page allocation functions must be used to setup page table */
- pt_ops.alloc_pte = alloc_pte_late;
- pt_ops.get_pte_virt = get_pte_virt_late;
-#ifndef __PAGETABLE_PMD_FOLDED
- pt_ops.alloc_pmd = alloc_pmd_late;
- pt_ops.get_pmd_virt = get_pmd_virt_late;
-#endif
+ pt_ops_set_late();
}
#else
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
@@ -790,12 +977,10 @@ static void __init reserve_crashkernel(void)
* since it doesn't make much sense and we have limited memory
* resources.
*/
-#ifdef CONFIG_CRASH_DUMP
if (is_kdump_kernel()) {
pr_info("crashkernel: ignoring reservation request\n");
return;
}
-#endif
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
&crash_size, &crash_base);
@@ -812,13 +997,22 @@ static void __init reserve_crashkernel(void)
/*
* Current riscv boot protocol requires 2MB alignment for
* RV64 and 4MB alignment for RV32 (hugepage size)
+ *
+ * Try to alloc from 32bit addressible physical memory so that
+ * swiotlb can work on the crash kernel.
*/
crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
- search_start, search_end);
+ search_start,
+ min(search_end, (unsigned long) SZ_4G));
if (crash_base == 0) {
- pr_warn("crashkernel: couldn't allocate %lldKB\n",
- crash_size >> 10);
- return;
+ /* Try again without restricting region to 32bit addressible memory */
+ crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
+ search_start, search_end);
+ if (crash_base == 0) {
+ pr_warn("crashkernel: couldn't allocate %lldKB\n",
+ crash_size >> 10);
+ return;
+ }
}
pr_info("crashkernel: reserved 0x%016llx - 0x%016llx (%lld MB)\n",
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index 54294f83513d..f61f7ca6fe0f 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -11,45 +11,27 @@
#include <asm/fixmap.h>
#include <asm/pgalloc.h>
-extern pgd_t early_pg_dir[PTRS_PER_PGD];
-asmlinkage void __init kasan_early_init(void)
-{
- uintptr_t i;
- pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START);
-
- BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
- KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
-
- for (i = 0; i < PTRS_PER_PTE; ++i)
- set_pte(kasan_early_shadow_pte + i,
- mk_pte(virt_to_page(kasan_early_shadow_page),
- PAGE_KERNEL));
-
- for (i = 0; i < PTRS_PER_PMD; ++i)
- set_pmd(kasan_early_shadow_pmd + i,
- pfn_pmd(PFN_DOWN
- (__pa((uintptr_t) kasan_early_shadow_pte)),
- __pgprot(_PAGE_TABLE)));
-
- for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
- i += PGDIR_SIZE, ++pgd)
- set_pgd(pgd,
- pfn_pgd(PFN_DOWN
- (__pa(((uintptr_t) kasan_early_shadow_pmd))),
- __pgprot(_PAGE_TABLE)));
-
- /* init for swapper_pg_dir */
- pgd = pgd_offset_k(KASAN_SHADOW_START);
-
- for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
- i += PGDIR_SIZE, ++pgd)
- set_pgd(pgd,
- pfn_pgd(PFN_DOWN
- (__pa(((uintptr_t) kasan_early_shadow_pmd))),
- __pgprot(_PAGE_TABLE)));
+/*
+ * Kasan shadow region must lie at a fixed address across sv39, sv48 and sv57
+ * which is right before the kernel.
+ *
+ * For sv39, the region is aligned on PGDIR_SIZE so we only need to populate
+ * the page global directory with kasan_early_shadow_pmd.
+ *
+ * For sv48 and sv57, the region is not aligned on PGDIR_SIZE so the mapping
+ * must be divided as follows:
+ * - the first PGD entry, although incomplete, is populated with
+ * kasan_early_shadow_pud/p4d
+ * - the PGD entries in the middle are populated with kasan_early_shadow_pud/p4d
+ * - the last PGD entry is shared with the kernel mapping so populated at the
+ * lower levels pud/p4d
+ *
+ * In addition, when shallow populating a kasan region (for example vmalloc),
+ * this region may also not be aligned on PGDIR size, so we must go down to the
+ * pud level too.
+ */
- local_flush_tlb_all();
-}
+extern pgd_t early_pg_dir[PTRS_PER_PGD];
static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end)
{
@@ -73,15 +55,19 @@ static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned
set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(base_pte)), PAGE_TABLE));
}
-static void __init kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned long end)
+static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned long end)
{
phys_addr_t phys_addr;
pmd_t *pmdp, *base_pmd;
unsigned long next;
- base_pmd = (pmd_t *)pgd_page_vaddr(*pgd);
- if (base_pmd == lm_alias(kasan_early_shadow_pmd))
+ if (pud_none(*pud)) {
base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
+ } else {
+ base_pmd = (pmd_t *)pud_pgtable(*pud);
+ if (base_pmd == lm_alias(kasan_early_shadow_pmd))
+ base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
+ }
pmdp = base_pmd + pmd_index(vaddr);
@@ -105,59 +91,207 @@ static void __init kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned
* it entirely, memblock could allocate a page at a physical address
* where KASAN is not populated yet and then we'd get a page fault.
*/
- set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
+ set_pud(pud, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
+}
+
+static void __init kasan_populate_pud(pgd_t *pgd,
+ unsigned long vaddr, unsigned long end,
+ bool early)
+{
+ phys_addr_t phys_addr;
+ pud_t *pudp, *base_pud;
+ unsigned long next;
+
+ if (early) {
+ /*
+ * We can't use pgd_page_vaddr here as it would return a linear
+ * mapping address but it is not mapped yet, but when populating
+ * early_pg_dir, we need the physical address and when populating
+ * swapper_pg_dir, we need the kernel virtual address so use
+ * pt_ops facility.
+ */
+ base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd)));
+ } else {
+ base_pud = (pud_t *)pgd_page_vaddr(*pgd);
+ if (base_pud == lm_alias(kasan_early_shadow_pud))
+ base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE);
+ }
+
+ pudp = base_pud + pud_index(vaddr);
+
+ do {
+ next = pud_addr_end(vaddr, end);
+
+ if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) {
+ if (early) {
+ phys_addr = __pa(((uintptr_t)kasan_early_shadow_pmd));
+ set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_TABLE));
+ continue;
+ } else {
+ phys_addr = memblock_phys_alloc(PUD_SIZE, PUD_SIZE);
+ if (phys_addr) {
+ set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ continue;
+ }
+ }
+ }
+
+ kasan_populate_pmd(pudp, vaddr, next);
+ } while (pudp++, vaddr = next, vaddr != end);
+
+ /*
+ * Wait for the whole PGD to be populated before setting the PGD in
+ * the page table, otherwise, if we did set the PGD before populating
+ * it entirely, memblock could allocate a page at a physical address
+ * where KASAN is not populated yet and then we'd get a page fault.
+ */
+ if (!early)
+ set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pud)), PAGE_TABLE));
}
-static void __init kasan_populate_pgd(unsigned long vaddr, unsigned long end)
+#define kasan_early_shadow_pgd_next (pgtable_l4_enabled ? \
+ (uintptr_t)kasan_early_shadow_pud : \
+ (uintptr_t)kasan_early_shadow_pmd)
+#define kasan_populate_pgd_next(pgdp, vaddr, next, early) \
+ (pgtable_l4_enabled ? \
+ kasan_populate_pud(pgdp, vaddr, next, early) : \
+ kasan_populate_pmd((pud_t *)pgdp, vaddr, next))
+
+static void __init kasan_populate_pgd(pgd_t *pgdp,
+ unsigned long vaddr, unsigned long end,
+ bool early)
{
phys_addr_t phys_addr;
- pgd_t *pgdp = pgd_offset_k(vaddr);
unsigned long next;
do {
next = pgd_addr_end(vaddr, end);
- /*
- * pgdp can't be none since kasan_early_init initialized all KASAN
- * shadow region with kasan_early_shadow_pmd: if this is stillthe case,
- * that means we can try to allocate a hugepage as a replacement.
- */
- if (pgd_page_vaddr(*pgdp) == (unsigned long)lm_alias(kasan_early_shadow_pmd) &&
- IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) {
- phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE);
- if (phys_addr) {
- set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) {
+ if (early) {
+ phys_addr = __pa((uintptr_t)kasan_early_shadow_pgd_next);
+ set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_TABLE));
continue;
+ } else if (pgd_page_vaddr(*pgdp) ==
+ (unsigned long)lm_alias(kasan_early_shadow_pgd_next)) {
+ /*
+ * pgdp can't be none since kasan_early_init
+ * initialized all KASAN shadow region with
+ * kasan_early_shadow_pud: if this is still the
+ * case, that means we can try to allocate a
+ * hugepage as a replacement.
+ */
+ phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE);
+ if (phys_addr) {
+ set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ continue;
+ }
}
}
- kasan_populate_pmd(pgdp, vaddr, next);
+ kasan_populate_pgd_next(pgdp, vaddr, next, early);
} while (pgdp++, vaddr = next, vaddr != end);
}
+asmlinkage void __init kasan_early_init(void)
+{
+ uintptr_t i;
+
+ BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
+ KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
+
+ for (i = 0; i < PTRS_PER_PTE; ++i)
+ set_pte(kasan_early_shadow_pte + i,
+ mk_pte(virt_to_page(kasan_early_shadow_page),
+ PAGE_KERNEL));
+
+ for (i = 0; i < PTRS_PER_PMD; ++i)
+ set_pmd(kasan_early_shadow_pmd + i,
+ pfn_pmd(PFN_DOWN
+ (__pa((uintptr_t)kasan_early_shadow_pte)),
+ PAGE_TABLE));
+
+ if (pgtable_l4_enabled) {
+ for (i = 0; i < PTRS_PER_PUD; ++i)
+ set_pud(kasan_early_shadow_pud + i,
+ pfn_pud(PFN_DOWN
+ (__pa(((uintptr_t)kasan_early_shadow_pmd))),
+ PAGE_TABLE));
+ }
+
+ kasan_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START),
+ KASAN_SHADOW_START, KASAN_SHADOW_END, true);
+
+ local_flush_tlb_all();
+}
+
+void __init kasan_swapper_init(void)
+{
+ kasan_populate_pgd(pgd_offset_k(KASAN_SHADOW_START),
+ KASAN_SHADOW_START, KASAN_SHADOW_END, true);
+
+ local_flush_tlb_all();
+}
+
static void __init kasan_populate(void *start, void *end)
{
unsigned long vaddr = (unsigned long)start & PAGE_MASK;
unsigned long vend = PAGE_ALIGN((unsigned long)end);
- kasan_populate_pgd(vaddr, vend);
+ kasan_populate_pgd(pgd_offset_k(vaddr), vaddr, vend, false);
local_flush_tlb_all();
memset(start, KASAN_SHADOW_INIT, end - start);
}
+static void __init kasan_shallow_populate_pud(pgd_t *pgdp,
+ unsigned long vaddr, unsigned long end,
+ bool kasan_populate)
+{
+ unsigned long next;
+ pud_t *pudp, *base_pud;
+ pmd_t *base_pmd;
+ bool is_kasan_pmd;
+
+ base_pud = (pud_t *)pgd_page_vaddr(*pgdp);
+ pudp = base_pud + pud_index(vaddr);
+
+ if (kasan_populate)
+ memcpy(base_pud, (void *)kasan_early_shadow_pgd_next,
+ sizeof(pud_t) * PTRS_PER_PUD);
+
+ do {
+ next = pud_addr_end(vaddr, end);
+ is_kasan_pmd = (pud_pgtable(*pudp) == lm_alias(kasan_early_shadow_pmd));
+
+ if (is_kasan_pmd) {
+ base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
+ }
+ } while (pudp++, vaddr = next, vaddr != end);
+}
+
static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end)
{
unsigned long next;
void *p;
pgd_t *pgd_k = pgd_offset_k(vaddr);
+ bool is_kasan_pgd_next;
do {
next = pgd_addr_end(vaddr, end);
- if (pgd_page_vaddr(*pgd_k) == (unsigned long)lm_alias(kasan_early_shadow_pmd)) {
+ is_kasan_pgd_next = (pgd_page_vaddr(*pgd_k) ==
+ (unsigned long)lm_alias(kasan_early_shadow_pgd_next));
+
+ if (is_kasan_pgd_next) {
p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE));
}
+
+ if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE)
+ continue;
+
+ kasan_shallow_populate_pud(pgd_k, vaddr, next, is_kasan_pgd_next);
} while (pgd_k++, vaddr = next, vaddr != end);
}
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 64f8201237c2..37ed760d007c 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -32,7 +32,6 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
unsigned long size, unsigned long stride)
{
struct cpumask *cmask = mm_cpumask(mm);
- struct cpumask hmask;
unsigned int cpuid;
bool broadcast;
@@ -46,9 +45,7 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
unsigned long asid = atomic_long_read(&mm->context.id);
if (broadcast) {
- riscv_cpuid_to_hartid_mask(cmask, &hmask);
- sbi_remote_sfence_vma_asid(cpumask_bits(&hmask),
- start, size, asid);
+ sbi_remote_sfence_vma_asid(cmask, start, size, asid);
} else if (size <= stride) {
local_flush_tlb_page_asid(start, asid);
} else {
@@ -56,9 +53,7 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
}
} else {
if (broadcast) {
- riscv_cpuid_to_hartid_mask(cmask, &hmask);
- sbi_remote_sfence_vma(cpumask_bits(&hmask),
- start, size);
+ sbi_remote_sfence_vma(cmask, start, size);
} else if (size <= stride) {
local_flush_tlb_page(start);
} else {
diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c
index e6497424cbf6..529a83b85c1c 100644
--- a/arch/riscv/net/bpf_jit_comp32.c
+++ b/arch/riscv/net/bpf_jit_comp32.c
@@ -799,11 +799,10 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
emit_bcc(BPF_JGE, lo(idx_reg), RV_REG_T1, off, ctx);
/*
- * temp_tcc = tcc - 1;
- * if (tcc < 0)
+ * if (--tcc < 0)
* goto out;
*/
- emit(rv_addi(RV_REG_T1, RV_REG_TCC, -1), ctx);
+ emit(rv_addi(RV_REG_TCC, RV_REG_TCC, -1), ctx);
off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
emit_bcc(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx);
@@ -829,7 +828,6 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
if (is_12b_check(off, insn))
return -1;
emit(rv_lw(RV_REG_T0, off, RV_REG_T0), ctx);
- emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx);
/* Epilogue jumps to *(t0 + 4). */
__build_epilogue(true, ctx);
return 0;
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index f2a779c7e225..0bcda99d1d68 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -327,12 +327,12 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx);
- /* if (TCC-- < 0)
+ /* if (--TCC < 0)
* goto out;
*/
- emit_addi(RV_REG_T1, tcc, -1, ctx);
+ emit_addi(RV_REG_TCC, tcc, -1, ctx);
off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
- emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx);
+ emit_branch(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx);
/* prog = array->ptrs[index];
* if (!prog)
@@ -352,7 +352,6 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
if (is_12b_check(off, insn))
return -1;
emit_ld(RV_REG_T3, off, RV_REG_T2, ctx);
- emit_mv(RV_REG_TCC, RV_REG_T1, ctx);
__build_epilogue(true, ctx);
return 0;
}
@@ -459,10 +458,8 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx)
#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
#define BPF_FIXUP_REG_MASK GENMASK(31, 27)
-int rv_bpf_fixup_exception(const struct exception_table_entry *ex,
- struct pt_regs *regs);
-int rv_bpf_fixup_exception(const struct exception_table_entry *ex,
- struct pt_regs *regs)
+bool ex_handler_bpf(const struct exception_table_entry *ex,
+ struct pt_regs *regs)
{
off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
int regs_offset = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
@@ -470,7 +467,7 @@ int rv_bpf_fixup_exception(const struct exception_table_entry *ex,
*(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0;
regs->epc = (unsigned long)&ex->fixup - offset;
- return 1;
+ return true;
}
/* For accesses to BTF pointers, add an entry to the exception table */
@@ -500,7 +497,7 @@ static int add_exception_handler(const struct bpf_insn *insn,
offset = pc - (long)&ex->insn;
if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
return -ERANGE;
- ex->insn = pc;
+ ex->insn = offset;
/*
* Since the extable follows the program, the fixup offset is always
@@ -516,6 +513,7 @@ static int add_exception_handler(const struct bpf_insn *insn,
ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) |
FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
+ ex->type = EX_TYPE_BPF;
ctx->nexentries++;
return 0;