diff options
370 files changed, 5618 insertions, 3431 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 4955d2faf4cc..f5a27f067db9 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3393,7 +3393,7 @@ Disable SMAP (Supervisor Mode Access Prevention) even if it is supported by processor. - nosmep [X86,PPC] + nosmep [X86,PPC64s] Disable SMEP (Supervisor Mode Execution Prevention) even if it is supported by processor. @@ -4166,6 +4166,14 @@ Override pmtimer IOPort with a hex value. e.g. pmtmr=0x508 + pmu_override= [PPC] Override the PMU. + This option takes over the PMU facility, so it is no + longer usable by perf. Setting this option starts the + PMU counters by setting MMCR0 to 0 (the FC bit is + cleared). If a number is given, then MMCR1 is set to + that number, otherwise (e.g., 'pmu_override=on'), MMCR1 + remains 0. + pm_debug_messages [SUSPEND,KNL] Enable suspend/resume debug messages during boot up. @@ -6494,6 +6502,12 @@ controller on both pseries and powernv platforms. Only useful on POWER9 and above. + xive.store-eoi=off [PPC] + By default on POWER10 and above, the kernel will use + stores for EOI handling when the XIVE interrupt mode + is active. This option allows the XIVE driver to use + loads instead, as on POWER9. + xhci-hcd.quirks [USB,KNL] A hex value specifying bitmask with supplemental xhci host controller quirks. Meaning of each bit can be diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index dea74d7717c0..0631c9241af3 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -129,7 +129,7 @@ config PPC select ARCH_HAS_KCOV select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_MEMBARRIER_SYNC_CORE - select ARCH_HAS_MEMREMAP_COMPAT_ALIGN + select ARCH_HAS_MEMREMAP_COMPAT_ALIGN if PPC_64S_HASH_MMU select ARCH_HAS_MMIOWB if PPC64 select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PHYS_TO_DMA @@ -165,6 +165,7 @@ config PPC select BINFMT_ELF select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS + select CPUMASK_OFFSTACK if NR_CPUS >= 8192 select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN select DMA_OPS_BYPASS if PPC64 select DMA_OPS if PPC64 @@ -205,7 +206,7 @@ config PPC select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE - select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL + select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL || PPC32 select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) select HAVE_FAST_GUP @@ -229,7 +230,7 @@ config PPC select HAVE_KPROBES_ON_FTRACE select HAVE_KRETPROBES select HAVE_LD_DEAD_CODE_DATA_ELIMINATION - select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS + select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS && PPC64 select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) select HAVE_OPTPROBES @@ -845,7 +846,7 @@ config FORCE_MAX_ZONEORDER config PPC_SUBPAGE_PROT bool "Support setting protections for 4k subpages (subpage_prot syscall)" default n - depends on PPC_BOOK3S_64 && PPC_64K_PAGES + depends on PPC_64S_HASH_MMU && PPC_64K_PAGES help This option adds support for system call to allow user programs to set access permissions (read/write, readonly, or no access) @@ -943,6 +944,7 @@ config PPC_MEM_KEYS prompt "PowerPC Memory Protection Keys" def_bool y depends on PPC_BOOK3S_64 + depends on PPC_64S_HASH_MMU select ARCH_USES_HIGH_VMA_FLAGS select ARCH_HAS_PKEYS help diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index e02568f17334..5f16ac1583c5 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -245,7 +245,9 @@ cpu-as-$(CONFIG_E500) += -Wa,-me500 # When using '-many -mpower4' gas will first try and find a matching power4 # mnemonic and failing that it will allow any valid mnemonic that GAS knows # about. GCC will pass -many to GAS when assembling, clang does not. -cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4 -Wa,-many +# LLVM IAS doesn't understand either flag: https://github.com/ClangBuiltLinux/linux/issues/675 +# but LLVM IAS only supports ISA >= 2.06 for Book3S 64 anyway... +cpu-as-$(CONFIG_PPC_BOOK3S_64) += $(call as-option,-Wa$(comma)-mpower4) $(call as-option,-Wa$(comma)-many) cpu-as-$(CONFIG_PPC_E500MC) += $(call as-option,-Wa$(comma)-me500mc) KBUILD_AFLAGS += $(cpu-as-y) @@ -445,10 +447,11 @@ PHONY += checkbin # Check toolchain versions: # - gcc-4.6 is the minimum kernel-wide version so nothing required. checkbin: - @if test "x${CONFIG_CPU_LITTLE_ENDIAN}" = "xy" \ - && $(LD) --version | head -1 | grep ' 2\.24$$' >/dev/null ; then \ + @if test "x${CONFIG_LD_IS_LLD}" != "xy" -a \ + "x$(call ld-ifversion, -le, 22400, y)" = "xy" ; then \ echo -n '*** binutils 2.24 miscompiles weak symbols ' ; \ echo 'in some circumstances.' ; \ + echo '*** binutils 2.23 do not define the TOC symbol ' ; \ echo -n '*** Please use a different binutils version.' ; \ false ; \ fi diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index 1d83966f5ef6..feadee18e271 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -28,7 +28,7 @@ p_etext: .8byte _etext p_bss_start: .8byte __bss_start p_end: .8byte _end -p_toc: .8byte __toc_start + 0x8000 - p_base +p_toc: .8byte .TOC. - p_base p_dyn: .8byte __dynamic_start - p_base p_rela: .8byte __rela_dyn_start - p_base p_prom: .8byte 0 @@ -226,16 +226,19 @@ p_base: mflr r10 /* r10 now points to runtime addr of p_base */ #ifdef __powerpc64__ #define PROM_FRAME_SIZE 512 -#define SAVE_GPR(n, base) std n,8*(n)(base) -#define REST_GPR(n, base) ld n,8*(n)(base) -#define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base) -#define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base) -#define SAVE_8GPRS(n, base) SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base) -#define SAVE_10GPRS(n, base) SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base) -#define REST_2GPRS(n, base) REST_GPR(n, base); REST_GPR(n+1, base) -#define REST_4GPRS(n, base) REST_2GPRS(n, base); REST_2GPRS(n+2, base) -#define REST_8GPRS(n, base) REST_4GPRS(n, base); REST_4GPRS(n+4, base) -#define REST_10GPRS(n, base) REST_8GPRS(n, base); REST_2GPRS(n+8, base) + +.macro OP_REGS op, width, start, end, base, offset + .Lreg=\start + .rept (\end - \start + 1) + \op .Lreg,\offset+\width*.Lreg(\base) + .Lreg=.Lreg+1 + .endr +.endm + +#define SAVE_GPRS(start, end, base) OP_REGS std, 8, start, end, base, 0 +#define REST_GPRS(start, end, base) OP_REGS ld, 8, start, end, base, 0 +#define SAVE_GPR(n, base) SAVE_GPRS(n, n, base) +#define REST_GPR(n, base) REST_GPRS(n, n, base) /* prom handles the jump into and return from firmware. The prom args pointer is loaded in r3. */ @@ -246,9 +249,7 @@ prom: stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */ SAVE_GPR(2, r1) - SAVE_GPR(13, r1) - SAVE_8GPRS(14, r1) - SAVE_10GPRS(22, r1) + SAVE_GPRS(13, 31, r1) mfcr r10 std r10,8*32(r1) mfmsr r10 @@ -283,9 +284,7 @@ prom: /* Restore other registers */ REST_GPR(2, r1) - REST_GPR(13, r1) - REST_8GPRS(14, r1) - REST_10GPRS(22, r1) + REST_GPRS(13, 31, r1) ld r10,8*32(r1) mtcr r10 diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts b/arch/powerpc/boot/dts/digsy_mtc.dts index 57024a4c1e7d..dfaf974c0ce6 100644 --- a/arch/powerpc/boot/dts/digsy_mtc.dts +++ b/arch/powerpc/boot/dts/digsy_mtc.dts @@ -25,14 +25,6 @@ status = "disabled"; }; - spi@f00 { - msp430@0 { - compatible = "spidev"; - spi-max-frequency = <32000>; - reg = <0>; - }; - }; - psc@2000 { // PSC1 status = "disabled"; }; diff --git a/arch/powerpc/boot/dts/o2d.dtsi b/arch/powerpc/boot/dts/o2d.dtsi index b55a9e5bd828..7e52509fa506 100644 --- a/arch/powerpc/boot/dts/o2d.dtsi +++ b/arch/powerpc/boot/dts/o2d.dtsi @@ -34,12 +34,6 @@ #address-cells = <1>; #size-cells = <0>; cell-index = <0>; - - spidev@0 { - compatible = "spidev"; - spi-max-frequency = <250000>; - reg = <0>; - }; }; psc@2200 { // PSC2 diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S index d6f072865627..d65cd55a6f38 100644 --- a/arch/powerpc/boot/zImage.lds.S +++ b/arch/powerpc/boot/zImage.lds.S @@ -36,12 +36,9 @@ SECTIONS } #ifdef CONFIG_PPC64_BOOT_WRAPPER - . = ALIGN(256); - .got : + .got : ALIGN(256) { - __toc_start = .; - *(.got) - *(.toc) + *(.got .toc) } #endif diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig index 07d87a4044b2..eff933ebbb9e 100644 --- a/arch/powerpc/configs/microwatt_defconfig +++ b/arch/powerpc/configs/microwatt_defconfig @@ -15,6 +15,8 @@ CONFIG_EMBEDDED=y # CONFIG_COMPAT_BRK is not set # CONFIG_SLAB_MERGE_DEFAULT is not set CONFIG_PPC64=y +CONFIG_POWER9_CPU=y +# CONFIG_PPC_64S_HASH_MMU is not set # CONFIG_PPC_KUEP is not set # CONFIG_PPC_KUAP is not set CONFIG_CPU_LITTLE_ENDIAN=y @@ -27,7 +29,6 @@ CONFIG_PPC_MICROWATT=y CONFIG_CPU_FREQ=y CONFIG_HZ_100=y CONFIG_PPC_4K_PAGES=y -# CONFIG_PPC_MEM_KEYS is not set # CONFIG_SECCOMP is not set # CONFIG_MQ_IOSCHED_KYBER is not set # CONFIG_COREDUMP is not set diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 203d0b7f0bb8..c8b0e80d613b 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -26,7 +26,6 @@ CONFIG_PPC64=y CONFIG_NR_CPUS=2048 CONFIG_PPC_SPLPAR=y CONFIG_DTL=y -CONFIG_SCANLOG=m CONFIG_PPC_SMLPAR=y CONFIG_IBMEBUS=y CONFIG_PPC_SVM=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index e64f2242abe1..b571d084c148 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -38,7 +38,6 @@ CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_PARTITION_ADVANCED=y CONFIG_PPC_SPLPAR=y CONFIG_DTL=y -CONFIG_SCANLOG=m CONFIG_PPC_SMLPAR=y CONFIG_IBMEBUS=y CONFIG_PAPR_SCM=m diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S index 948d100a2934..fa6bc440cf4a 100644 --- a/arch/powerpc/crypto/md5-asm.S +++ b/arch/powerpc/crypto/md5-asm.S @@ -38,15 +38,11 @@ #define INITIALIZE \ PPC_STLU r1,-INT_FRAME_SIZE(r1); \ - SAVE_8GPRS(14, r1); /* push registers onto stack */ \ - SAVE_4GPRS(22, r1); \ - SAVE_GPR(26, r1) + SAVE_GPRS(14, 26, r1) /* push registers onto stack */ #define FINALIZE \ - REST_8GPRS(14, r1); /* pop registers from stack */ \ - REST_4GPRS(22, r1); \ - REST_GPR(26, r1); \ - addi r1,r1,INT_FRAME_SIZE; + REST_GPRS(14, 26, r1); /* pop registers from stack */ \ + addi r1,r1,INT_FRAME_SIZE #ifdef __BIG_ENDIAN__ #define LOAD_DATA(reg, off) \ diff --git a/arch/powerpc/crypto/sha1-powerpc-asm.S b/arch/powerpc/crypto/sha1-powerpc-asm.S index 23e248beff71..f0d5ed557ab1 100644 --- a/arch/powerpc/crypto/sha1-powerpc-asm.S +++ b/arch/powerpc/crypto/sha1-powerpc-asm.S @@ -125,8 +125,7 @@ _GLOBAL(powerpc_sha_transform) PPC_STLU r1,-INT_FRAME_SIZE(r1) - SAVE_8GPRS(14, r1) - SAVE_10GPRS(22, r1) + SAVE_GPRS(14, 31, r1) /* Load up A - E */ lwz RA(0),0(r3) /* A */ @@ -184,7 +183,6 @@ _GLOBAL(powerpc_sha_transform) stw RD(0),12(r3) stw RE(0),16(r3) - REST_8GPRS(14, r1) - REST_10GPRS(22, r1) + REST_GPRS(14, 31, r1) addi r1,r1,INT_FRAME_SIZE blr diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 222823861a67..41b8a1e1144a 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -141,11 +141,6 @@ static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv) { } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -void kvmhv_save_host_pmu(void); -void kvmhv_load_host_pmu(void); -void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use); -void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu); - void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu); long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr); diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index fd594fdbd84d..853dc86864f4 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -37,62 +37,62 @@ static __inline__ void arch_atomic_set(atomic_t *v, int i) __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m<>"(v->counter) : "r"(i)); } -#define ATOMIC_OP(op, asm_op) \ +#define ATOMIC_OP(op, asm_op, suffix, sign, ...) \ static __inline__ void arch_atomic_##op(int a, atomic_t *v) \ { \ int t; \ \ __asm__ __volatile__( \ "1: lwarx %0,0,%3 # atomic_" #op "\n" \ - #asm_op " %0,%2,%0\n" \ + #asm_op "%I2" suffix " %0,%0,%2\n" \ " stwcx. %0,0,%3 \n" \ " bne- 1b\n" \ : "=&r" (t), "+m" (v->counter) \ - : "r" (a), "r" (&v->counter) \ - : "cc"); \ + : "r"#sign (a), "r" (&v->counter) \ + : "cc", ##__VA_ARGS__); \ } \ -#define ATOMIC_OP_RETURN_RELAXED(op, asm_op) \ +#define ATOMIC_OP_RETURN_RELAXED(op, asm_op, suffix, sign, ...) \ static inline int arch_atomic_##op##_return_relaxed(int a, atomic_t *v) \ { \ int t; \ \ __asm__ __volatile__( \ "1: lwarx %0,0,%3 # atomic_" #op "_return_relaxed\n" \ - #asm_op " %0,%2,%0\n" \ + #asm_op "%I2" suffix " %0,%0,%2\n" \ " stwcx. %0,0,%3\n" \ " bne- 1b\n" \ : "=&r" (t), "+m" (v->counter) \ - : "r" (a), "r" (&v->counter) \ - : "cc"); \ + : "r"#sign (a), "r" (&v->counter) \ + : "cc", ##__VA_ARGS__); \ \ return t; \ } -#define ATOMIC_FETCH_OP_RELAXED(op, asm_op) \ +#define ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign, ...) \ static inline int arch_atomic_fetch_##op##_relaxed(int a, atomic_t *v) \ { \ int res, t; \ \ __asm__ __volatile__( \ "1: lwarx %0,0,%4 # atomic_fetch_" #op "_relaxed\n" \ - #asm_op " %1,%3,%0\n" \ + #asm_op "%I3" suffix " %1,%0,%3\n" \ " stwcx. %1,0,%4\n" \ " bne- 1b\n" \ : "=&r" (res), "=&r" (t), "+m" (v->counter) \ - : "r" (a), "r" (&v->counter) \ - : "cc"); \ + : "r"#sign (a), "r" (&v->counter) \ + : "cc", ##__VA_ARGS__); \ \ return res; \ } -#define ATOMIC_OPS(op, asm_op) \ - ATOMIC_OP(op, asm_op) \ - ATOMIC_OP_RETURN_RELAXED(op, asm_op) \ - ATOMIC_FETCH_OP_RELAXED(op, asm_op) +#define ATOMIC_OPS(op, asm_op, suffix, sign, ...) \ + ATOMIC_OP(op, asm_op, suffix, sign, ##__VA_ARGS__) \ + ATOMIC_OP_RETURN_RELAXED(op, asm_op, suffix, sign, ##__VA_ARGS__)\ + ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign, ##__VA_ARGS__) -ATOMIC_OPS(add, add) -ATOMIC_OPS(sub, subf) +ATOMIC_OPS(add, add, "c", I, "xer") +ATOMIC_OPS(sub, sub, "c", I, "xer") #define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed #define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed @@ -101,13 +101,13 @@ ATOMIC_OPS(sub, subf) #define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed #undef ATOMIC_OPS -#define ATOMIC_OPS(op, asm_op) \ - ATOMIC_OP(op, asm_op) \ - ATOMIC_FETCH_OP_RELAXED(op, asm_op) +#define ATOMIC_OPS(op, asm_op, suffix, sign) \ + ATOMIC_OP(op, asm_op, suffix, sign) \ + ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign) -ATOMIC_OPS(and, and) -ATOMIC_OPS(or, or) -ATOMIC_OPS(xor, xor) +ATOMIC_OPS(and, and, ".", K) +ATOMIC_OPS(or, or, "", K) +ATOMIC_OPS(xor, xor, "", K) #define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed #define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed @@ -118,71 +118,6 @@ ATOMIC_OPS(xor, xor) #undef ATOMIC_OP_RETURN_RELAXED #undef ATOMIC_OP -static __inline__ void arch_atomic_inc(atomic_t *v) -{ - int t; - - __asm__ __volatile__( -"1: lwarx %0,0,%2 # atomic_inc\n\ - addic %0,%0,1\n" -" stwcx. %0,0,%2 \n\ - bne- 1b" - : "=&r" (t), "+m" (v->counter) - : "r" (&v->counter) - : "cc", "xer"); -} -#define arch_atomic_inc arch_atomic_inc - -static __inline__ int arch_atomic_inc_return_relaxed(atomic_t *v) -{ - int t; - - __asm__ __volatile__( -"1: lwarx %0,0,%2 # atomic_inc_return_relaxed\n" -" addic %0,%0,1\n" -" stwcx. %0,0,%2\n" -" bne- 1b" - : "=&r" (t), "+m" (v->counter) - : "r" (&v->counter) - : "cc", "xer"); - - return t; -} - -static __inline__ void arch_atomic_dec(atomic_t *v) -{ - int t; - - __asm__ __volatile__( -"1: lwarx %0,0,%2 # atomic_dec\n\ - addic %0,%0,-1\n" -" stwcx. %0,0,%2\n\ - bne- 1b" - : "=&r" (t), "+m" (v->counter) - : "r" (&v->counter) - : "cc", "xer"); -} -#define arch_atomic_dec arch_atomic_dec - -static __inline__ int arch_atomic_dec_return_relaxed(atomic_t *v) -{ - int t; - - __asm__ __volatile__( -"1: lwarx %0,0,%2 # atomic_dec_return_relaxed\n" -" addic %0,%0,-1\n" -" stwcx. %0,0,%2\n" -" bne- 1b" - : "=&r" (t), "+m" (v->counter) - : "r" (&v->counter) - : "cc", "xer"); - - return t; -} - -#define arch_atomic_inc_return_relaxed arch_atomic_inc_return_relaxed -#define arch_atomic_dec_return_relaxed arch_atomic_dec_return_relaxed - #define arch_atomic_cmpxchg(v, o, n) \ (arch_cmpxchg(&((v)->counter), (o), (n))) #define arch_atomic_cmpxchg_relaxed(v, o, n) \ @@ -241,50 +176,20 @@ static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) "1: lwarx %0,0,%1 # atomic_fetch_add_unless\n\ cmpw 0,%0,%3 \n\ beq 2f \n\ - add %0,%2,%0 \n" + add%I2c %0,%0,%2 \n" " stwcx. %0,0,%1 \n\ bne- 1b \n" PPC_ATOMIC_EXIT_BARRIER -" subf %0,%2,%0 \n\ +" sub%I2c %0,%0,%2 \n\ 2:" : "=&r" (t) - : "r" (&v->counter), "r" (a), "r" (u) - : "cc", "memory"); + : "r" (&v->counter), "rI" (a), "r" (u) + : "cc", "memory", "xer"); return t; } #define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless -/** - * atomic_inc_not_zero - increment unless the number is zero - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1, so long as @v is non-zero. - * Returns non-zero if @v was non-zero, and zero otherwise. - */ -static __inline__ int arch_atomic_inc_not_zero(atomic_t *v) -{ - int t1, t2; - - __asm__ __volatile__ ( - PPC_ATOMIC_ENTRY_BARRIER -"1: lwarx %0,0,%2 # atomic_inc_not_zero\n\ - cmpwi 0,%0,0\n\ - beq- 2f\n\ - addic %1,%0,1\n" -" stwcx. %1,0,%2\n\ - bne- 1b\n" - PPC_ATOMIC_EXIT_BARRIER - "\n\ -2:" - : "=&r" (t1), "=&r" (t2) - : "r" (&v->counter) - : "cc", "xer", "memory"); - - return t1; -} -#define arch_atomic_inc_not_zero(v) arch_atomic_inc_not_zero((v)) - /* * Atomically test *v and decrement if it is greater than 0. * The function returns the old value of *v minus 1, even if diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 11847b6a244e..a05d8c62cbea 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -71,19 +71,61 @@ static inline void fn(unsigned long mask, \ __asm__ __volatile__ ( \ prefix \ "1:" PPC_LLARX "%0,0,%3,0\n" \ - stringify_in_c(op) "%0,%0,%2\n" \ + #op "%I2 %0,%0,%2\n" \ PPC_STLCX "%0,0,%3\n" \ "bne- 1b\n" \ : "=&r" (old), "+m" (*p) \ - : "r" (mask), "r" (p) \ + : "rK" (mask), "r" (p) \ : "cc", "memory"); \ } DEFINE_BITOP(set_bits, or, "") -DEFINE_BITOP(clear_bits, andc, "") -DEFINE_BITOP(clear_bits_unlock, andc, PPC_RELEASE_BARRIER) DEFINE_BITOP(change_bits, xor, "") +static __always_inline bool is_rlwinm_mask_valid(unsigned long x) +{ + if (!x) + return false; + if (x & 1) + x = ~x; // make the mask non-wrapping + x += x & -x; // adding the low set bit results in at most one bit set + + return !(x & (x - 1)); +} + +#define DEFINE_CLROP(fn, prefix) \ +static inline void fn(unsigned long mask, volatile unsigned long *_p) \ +{ \ + unsigned long old; \ + unsigned long *p = (unsigned long *)_p; \ + \ + if (IS_ENABLED(CONFIG_PPC32) && \ + __builtin_constant_p(mask) && is_rlwinm_mask_valid(~mask)) {\ + asm volatile ( \ + prefix \ + "1:" "lwarx %0,0,%3\n" \ + "rlwinm %0,%0,0,%2\n" \ + "stwcx. %0,0,%3\n" \ + "bne- 1b\n" \ + : "=&r" (old), "+m" (*p) \ + : "n" (~mask), "r" (p) \ + : "cc", "memory"); \ + } else { \ + asm volatile ( \ + prefix \ + "1:" PPC_LLARX "%0,0,%3,0\n" \ + "andc %0,%0,%2\n" \ + PPC_STLCX "%0,0,%3\n" \ + "bne- 1b\n" \ + : "=&r" (old), "+m" (*p) \ + : "r" (mask), "r" (p) \ + : "cc", "memory"); \ + } \ +} + +DEFINE_CLROP(clear_bits, "") +DEFINE_CLROP(clear_bits_unlock, PPC_RELEASE_BARRIER) + static inline void arch_set_bit(int nr, volatile unsigned long *addr) { set_bits(BIT_MASK(nr), addr + BIT_WORD(nr)); @@ -116,12 +158,12 @@ static inline unsigned long fn( \ __asm__ __volatile__ ( \ prefix \ "1:" PPC_LLARX "%0,0,%3,%4\n" \ - stringify_in_c(op) "%1,%0,%2\n" \ + #op "%I2 %1,%0,%2\n" \ PPC_STLCX "%1,0,%3\n" \ "bne- 1b\n" \ postfix \ : "=&r" (old), "=&r" (t) \ - : "r" (mask), "r" (p), "i" (IS_ENABLED(CONFIG_PPC64) ? eh : 0) \ + : "rK" (mask), "r" (p), "i" (IS_ENABLED(CONFIG_PPC64) ? eh : 0) \ : "cc", "memory"); \ return (old & mask); \ } @@ -130,11 +172,42 @@ DEFINE_TESTOP(test_and_set_bits, or, PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, 0) DEFINE_TESTOP(test_and_set_bits_lock, or, "", PPC_ACQUIRE_BARRIER, 1) -DEFINE_TESTOP(test_and_clear_bits, andc, PPC_ATOMIC_ENTRY_BARRIER, - PPC_ATOMIC_EXIT_BARRIER, 0) DEFINE_TESTOP(test_and_change_bits, xor, PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, 0) +static inline unsigned long test_and_clear_bits(unsigned long mask, volatile unsigned long *_p) +{ + unsigned long old, t; + unsigned long *p = (unsigned long *)_p; + + if (IS_ENABLED(CONFIG_PPC32) && + __builtin_constant_p(mask) && is_rlwinm_mask_valid(~mask)) { + asm volatile ( + PPC_ATOMIC_ENTRY_BARRIER + "1:" "lwarx %0,0,%3\n" + "rlwinm %1,%0,0,%2\n" + "stwcx. %1,0,%3\n" + "bne- 1b\n" + PPC_ATOMIC_EXIT_BARRIER + : "=&r" (old), "=&r" (t) + : "n" (~mask), "r" (p) + : "cc", "memory"); + } else { + asm volatile ( + PPC_ATOMIC_ENTRY_BARRIER + "1:" PPC_LLARX "%0,0,%3,0\n" + "andc %1,%0,%2\n" + PPC_STLCX "%1,0,%3\n" + "bne- 1b\n" + PPC_ATOMIC_EXIT_BARRIER + : "=&r" (old), "=&r" (t) + : "r" (mask), "r" (p) + : "cc", "memory"); + } + + return (old & mask); +} + static inline int arch_test_and_set_bit(unsigned long nr, volatile unsigned long *addr) { diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 9f38040f0641..678f9c9d89b6 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -12,50 +12,10 @@ #include <linux/jump_label.h> extern struct static_key_false disable_kuap_key; -extern struct static_key_false disable_kuep_key; - -static __always_inline bool kuap_is_disabled(void) -{ - return !IS_ENABLED(CONFIG_PPC_KUAP) || static_branch_unlikely(&disable_kuap_key); -} static __always_inline bool kuep_is_disabled(void) { - return !IS_ENABLED(CONFIG_PPC_KUEP) || static_branch_unlikely(&disable_kuep_key); -} - -static inline void kuep_lock(void) -{ - if (kuep_is_disabled()) - return; - - update_user_segments(mfsr(0) | SR_NX); - /* - * This isync() shouldn't be necessary as the kernel is not excepted to - * run any instruction in userspace soon after the update of segments, - * but hash based cores (at least G3) seem to exhibit a random - * behaviour when the 'isync' is not there. 603 cores don't have this - * behaviour so don't do the 'isync' as it saves several CPU cycles. - */ - if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) - isync(); /* Context sync required after mtsr() */ -} - -static inline void kuep_unlock(void) -{ - if (kuep_is_disabled()) - return; - - update_user_segments(mfsr(0) & ~SR_NX); - /* - * This isync() shouldn't be necessary as a 'rfi' will soon be executed - * to return to userspace, but hash based cores (at least G3) seem to - * exhibit a random behaviour when the 'isync' is not there. 603 cores - * don't have this behaviour so don't do the 'isync' as it saves several - * CPU cycles. - */ - if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) - isync(); /* Context sync required after mtsr() */ + return !IS_ENABLED(CONFIG_PPC_KUEP); } #ifdef CONFIG_PPC_KUAP @@ -65,6 +25,11 @@ static inline void kuep_unlock(void) #define KUAP_NONE (~0UL) #define KUAP_ALL (~1UL) +static __always_inline bool kuap_is_disabled(void) +{ + return static_branch_unlikely(&disable_kuap_key); +} + static inline void kuap_lock_one(unsigned long addr) { mtsr(mfsr(addr) | SR_KS, addr); @@ -92,7 +57,7 @@ static inline void kuap_unlock_all(void) void kuap_lock_all_ool(void); void kuap_unlock_all_ool(void); -static inline void kuap_lock(unsigned long addr, bool ool) +static inline void kuap_lock_addr(unsigned long addr, bool ool) { if (likely(addr != KUAP_ALL)) kuap_lock_one(addr); @@ -112,33 +77,31 @@ static inline void kuap_unlock(unsigned long addr, bool ool) kuap_unlock_all_ool(); } -static inline void kuap_save_and_lock(struct pt_regs *regs) +static inline void __kuap_lock(void) { - unsigned long kuap = current->thread.kuap; +} - if (kuap_is_disabled()) - return; +static inline void __kuap_save_and_lock(struct pt_regs *regs) +{ + unsigned long kuap = current->thread.kuap; regs->kuap = kuap; if (unlikely(kuap == KUAP_NONE)) return; current->thread.kuap = KUAP_NONE; - kuap_lock(kuap, false); + kuap_lock_addr(kuap, false); } static inline void kuap_user_restore(struct pt_regs *regs) { } -static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) +static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) { - if (kuap_is_disabled()) - return; - if (unlikely(kuap != KUAP_NONE)) { current->thread.kuap = KUAP_NONE; - kuap_lock(kuap, false); + kuap_lock_addr(kuap, false); } if (likely(regs->kuap == KUAP_NONE)) @@ -149,29 +112,18 @@ static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) kuap_unlock(regs->kuap, false); } -static inline unsigned long kuap_get_and_assert_locked(void) +static inline unsigned long __kuap_get_and_assert_locked(void) { unsigned long kuap = current->thread.kuap; - if (kuap_is_disabled()) - return KUAP_NONE; - WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != KUAP_NONE); return kuap; } -static inline void kuap_assert_locked(void) -{ - kuap_get_and_assert_locked(); -} - -static __always_inline void allow_user_access(void __user *to, const void __user *from, - u32 size, unsigned long dir) +static __always_inline void __allow_user_access(void __user *to, const void __user *from, + u32 size, unsigned long dir) { - if (kuap_is_disabled()) - return; - BUILD_BUG_ON(!__builtin_constant_p(dir)); if (!(dir & KUAP_WRITE)) @@ -181,42 +133,33 @@ static __always_inline void allow_user_access(void __user *to, const void __user kuap_unlock_one((__force u32)to); } -static __always_inline void prevent_user_access(unsigned long dir) +static __always_inline void __prevent_user_access(unsigned long dir) { u32 kuap = current->thread.kuap; - if (kuap_is_disabled()) - return; - BUILD_BUG_ON(!__builtin_constant_p(dir)); if (!(dir & KUAP_WRITE)) return; current->thread.kuap = KUAP_NONE; - kuap_lock(kuap, true); + kuap_lock_addr(kuap, true); } -static inline unsigned long prevent_user_access_return(void) +static inline unsigned long __prevent_user_access_return(void) { unsigned long flags = current->thread.kuap; - if (kuap_is_disabled()) - return KUAP_NONE; - if (flags != KUAP_NONE) { current->thread.kuap = KUAP_NONE; - kuap_lock(flags, true); + kuap_lock_addr(flags, true); } return flags; } -static inline void restore_user_access(unsigned long flags) +static inline void __restore_user_access(unsigned long flags) { - if (kuap_is_disabled()) - return; - if (flags != KUAP_NONE) { current->thread.kuap = flags; kuap_unlock(flags, true); @@ -224,13 +167,10 @@ static inline void restore_user_access(unsigned long flags) } static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { unsigned long kuap = regs->kuap; - if (kuap_is_disabled()) - return false; - if (!is_write || kuap == KUAP_ALL) return false; if (kuap == KUAP_NONE) diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index f5be185cbdf8..7be27862329f 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -64,7 +64,82 @@ struct ppc_bat { #define SR_KP 0x20000000 /* User key */ #define SR_KS 0x40000000 /* Supervisor key */ -#ifndef __ASSEMBLY__ +#ifdef __ASSEMBLY__ + +#include <asm/asm-offsets.h> + +.macro uus_addi sr reg1 reg2 imm + .if NUM_USER_SEGMENTS > \sr + addi \reg1,\reg2,\imm + .endif +.endm + +.macro uus_mtsr sr reg1 + .if NUM_USER_SEGMENTS > \sr + mtsr \sr, \reg1 + .endif +.endm + +/* + * This isync() shouldn't be necessary as the kernel is not excepted to run + * any instruction in userspace soon after the update of segments and 'rfi' + * instruction is used to return to userspace, but hash based cores + * (at least G3) seem to exhibit a random behaviour when the 'isync' is not + * there. 603 cores don't have this behaviour so don't do the 'isync' as it + * saves several CPU cycles. + */ +.macro uus_isync +#ifdef CONFIG_PPC_BOOK3S_604 +BEGIN_MMU_FTR_SECTION + isync +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) +#endif +.endm + +.macro update_user_segments_by_4 tmp1 tmp2 tmp3 tmp4 + uus_addi 1, \tmp2, \tmp1, 0x111 + uus_addi 2, \tmp3, \tmp1, 0x222 + uus_addi 3, \tmp4, \tmp1, 0x333 + + uus_mtsr 0, \tmp1 + uus_mtsr 1, \tmp2 + uus_mtsr 2, \tmp3 + uus_mtsr 3, \tmp4 + + uus_addi 4, \tmp1, \tmp1, 0x444 + uus_addi 5, \tmp2, \tmp2, 0x444 + uus_addi 6, \tmp3, \tmp3, 0x444 + uus_addi 7, \tmp4, \tmp4, 0x444 + + uus_mtsr 4, \tmp1 + uus_mtsr 5, \tmp2 + uus_mtsr 6, \tmp3 + uus_mtsr 7, \tmp4 + + uus_addi 8, \tmp1, \tmp1, 0x444 + uus_addi 9, \tmp2, \tmp2, 0x444 + uus_addi 10, \tmp3, \tmp3, 0x444 + uus_addi 11, \tmp4, \tmp4, 0x444 + + uus_mtsr 8, \tmp1 + uus_mtsr 9, \tmp2 + uus_mtsr 10, \tmp3 + uus_mtsr 11, \tmp4 + + uus_addi 12, \tmp1, \tmp1, 0x444 + uus_addi 13, \tmp2, \tmp2, 0x444 + uus_addi 14, \tmp3, \tmp3, 0x444 + uus_addi 15, \tmp4, \tmp4, 0x444 + + uus_mtsr 12, \tmp1 + uus_mtsr 13, \tmp2 + uus_mtsr 14, \tmp3 + uus_mtsr 15, \tmp4 + + uus_isync +.endm + +#else /* * This macro defines the mapping from contexts to VSIDs (virtual @@ -100,9 +175,14 @@ struct hash_pte { typedef struct { unsigned long id; + unsigned long sr0; void __user *vdso; } mm_context_t; +#ifdef CONFIG_PPC_KUEP +#define INIT_MM_CONTEXT(mm) .context.sr0 = SR_NX +#endif + void update_bats(void); static inline void cleanup_cpu_mmu_context(void) { } diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 674fe0e890dc..a7a0572f3846 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -99,10 +99,6 @@ * Defines the address of the vmemap area, in its own region on * hash table CPUs. */ -#ifdef CONFIG_PPC_MM_SLICES -#define HAVE_ARCH_UNMAPPED_AREA -#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN -#endif /* CONFIG_PPC_MM_SLICES */ /* PTEIDX nibble */ #define _PTEIDX_SECONDARY 0x8 diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 170339969b7c..69fcf63eec94 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -229,6 +229,11 @@ static inline u64 current_thread_iamr(void) #ifdef CONFIG_PPC_KUAP +static __always_inline bool kuap_is_disabled(void) +{ + return !mmu_has_feature(MMU_FTR_BOOK3S_KUAP); +} + static inline void kuap_user_restore(struct pt_regs *regs) { bool restore_amr = false, restore_iamr = false; @@ -268,40 +273,38 @@ static inline void kuap_user_restore(struct pt_regs *regs) */ } -static inline void kuap_kernel_restore(struct pt_regs *regs, - unsigned long amr) +static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { - if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { - if (unlikely(regs->amr != amr)) { - isync(); - mtspr(SPRN_AMR, regs->amr); - /* - * No isync required here because we are about to rfi - * back to previous context before any user accesses - * would be made, which is a CSI. - */ - } - } + if (likely(regs->amr == amr)) + return; + + isync(); + mtspr(SPRN_AMR, regs->amr); /* + * No isync required here because we are about to rfi + * back to previous context before any user accesses + * would be made, which is a CSI. + * * No need to restore IAMR when returning to kernel space. */ } -static inline unsigned long kuap_get_and_assert_locked(void) +static inline unsigned long __kuap_get_and_assert_locked(void) { - if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { - unsigned long amr = mfspr(SPRN_AMR); - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) /* kuap_check_amr() */ - WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED); - return amr; - } - return 0; + unsigned long amr = mfspr(SPRN_AMR); + + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) /* kuap_check_amr() */ + WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED); + return amr; } -static inline void kuap_assert_locked(void) +/* Do nothing, book3s/64 does that in ASM */ +static inline void __kuap_lock(void) +{ +} + +static inline void __kuap_save_and_lock(struct pt_regs *regs) { - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) - WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED); } /* @@ -339,11 +342,8 @@ static inline void set_kuap(unsigned long value) isync(); } -static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, - bool is_write) +static inline bool __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { - if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) - return false; /* * For radix this will be a storage protection fault (DSISR_PROTFAULT). * For hash this will be a key fault (DSISR_KEYFAULT) diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 3004f3323144..21f780942911 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -523,8 +523,14 @@ void slb_save_contents(struct slb_entry *slb_ptr); void slb_dump_contents(struct slb_entry *slb_ptr); extern void slb_vmalloc_update(void); -extern void slb_set_size(u16 size); void preload_new_slb_context(unsigned long start, unsigned long sp); + +#ifdef CONFIG_PPC_64S_HASH_MMU +void slb_set_size(u16 size); +#else +static inline void slb_set_size(u16 size) { } +#endif + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index c02f42d1031e..ba5b1becf518 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -4,6 +4,12 @@ #include <asm/page.h> +#ifdef CONFIG_HUGETLB_PAGE +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA +#endif +#define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN + #ifndef __ASSEMBLY__ /* * Page size definition @@ -62,6 +68,9 @@ extern struct patb_entry *partition_tb; #define PRTS_MASK 0x1f /* process table size field */ #define PRTB_MASK 0x0ffffffffffff000UL +/* Number of supported LPID bits */ +extern unsigned int mmu_lpid_bits; + /* Number of supported PID bits */ extern unsigned int mmu_pid_bits; @@ -76,10 +85,8 @@ extern unsigned long __ro_after_init radix_mem_block_size; #define PRTB_SIZE_SHIFT (mmu_pid_bits + 4) #define PRTB_ENTRIES (1ul << mmu_pid_bits) -/* - * Power9 currently only support 64K partition table size. - */ -#define PATB_SIZE_SHIFT 16 +#define PATB_SIZE_SHIFT (mmu_lpid_bits + 4) +#define PATB_ENTRIES (1ul << mmu_lpid_bits) typedef unsigned long mm_context_id_t; struct spinlock; @@ -98,7 +105,9 @@ typedef struct { * from EA and new context ids to build the new VAs. */ mm_context_id_t id; +#ifdef CONFIG_PPC_64S_HASH_MMU mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE]; +#endif }; /* Number of bits in the mm_cpumask */ @@ -110,7 +119,9 @@ typedef struct { /* Number of user space windows opened in process mm_context */ atomic_t vas_windows; +#ifdef CONFIG_PPC_64S_HASH_MMU struct hash_mm_context *hash_context; +#endif void __user *vdso; /* @@ -133,6 +144,7 @@ typedef struct { #endif } mm_context_t; +#ifdef CONFIG_PPC_64S_HASH_MMU static inline u16 mm_ctx_user_psize(mm_context_t *ctx) { return ctx->hash_context->user_psize; @@ -193,8 +205,15 @@ static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx) extern int mmu_linear_psize; extern int mmu_virtual_psize; extern int mmu_vmalloc_psize; -extern int mmu_vmemmap_psize; extern int mmu_io_psize; +#else /* CONFIG_PPC_64S_HASH_MMU */ +#ifdef CONFIG_PPC_64K_PAGES +#define mmu_virtual_psize MMU_PAGE_64K +#else +#define mmu_virtual_psize MMU_PAGE_4K +#endif +#endif +extern int mmu_vmemmap_psize; /* MMU initialization */ void mmu_early_init_devtree(void); @@ -233,12 +252,13 @@ static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base, * know which translations we will pick. Hence go with hash * restrictions. */ - return hash__setup_initial_memory_limit(first_memblock_base, - first_memblock_size); + if (!early_radix_enabled()) + hash__setup_initial_memory_limit(first_memblock_base, + first_memblock_size); } #ifdef CONFIG_PPC_PSERIES -extern void radix_init_pseries(void); +void __init radix_init_pseries(void); #else static inline void radix_init_pseries(void) { } #endif @@ -255,6 +275,7 @@ static inline void radix_init_pseries(void) { } void cleanup_cpu_mmu_context(void); #endif +#ifdef CONFIG_PPC_64S_HASH_MMU static inline int get_user_context(mm_context_t *ctx, unsigned long ea) { int index = ea >> MAX_EA_BITS_PER_CONTEXT; @@ -274,6 +295,7 @@ static inline unsigned long get_user_vsid(mm_context_t *ctx, return get_vsid(context, ea, ssize); } +#endif #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */ diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h index 3b95769739c7..8b762f282190 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h @@ -112,8 +112,14 @@ static inline void hash__flush_tlb_kernel_range(unsigned long start, struct mmu_gather; extern void hash__tlb_flush(struct mmu_gather *tlb); +void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr); + +#ifdef CONFIG_PPC_64S_HASH_MMU /* Private function for use by PCI IO mapping code */ extern void __flush_hash_table_range(unsigned long start, unsigned long end); extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr); +#else +static inline void __flush_hash_table_range(unsigned long start, unsigned long end) { } +#endif #endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H */ diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index 215973b4cb26..d2e80f178b6d 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -14,7 +14,6 @@ enum { TLB_INVAL_SCOPE_LPID = 1, /* invalidate TLBs for current LPID */ }; -#ifdef CONFIG_PPC_NATIVE static inline void tlbiel_all(void) { /* @@ -30,9 +29,6 @@ static inline void tlbiel_all(void) else hash__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL); } -#else -static inline void tlbiel_all(void) { BUG(); } -#endif static inline void tlbiel_all_lpid(bool radix) { diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h index ad130e15a126..e8269434ecbe 100644 --- a/arch/powerpc/include/asm/book3s/pgtable.h +++ b/arch/powerpc/include/asm/book3s/pgtable.h @@ -25,6 +25,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot); #define __HAVE_PHYS_MEM_ACCESS_PROT +#if defined(CONFIG_PPC32) || defined(CONFIG_PPC_64S_HASH_MMU) /* * This gets called at the end of handling a page fault, when * the kernel has put a new PTE into the page table for the process. @@ -35,6 +36,9 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, * waiting for the inevitable extra hash-table miss exception. */ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep); +#else +static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) {} +#endif #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/include/asm/btext.h b/arch/powerpc/include/asm/btext.h index 461b0f193864..860f8868f11e 100644 --- a/arch/powerpc/include/asm/btext.h +++ b/arch/powerpc/include/asm/btext.h @@ -23,12 +23,12 @@ extern void btext_unmap(void); extern void btext_drawchar(char c); extern void btext_drawstring(const char *str); -extern void btext_drawhex(unsigned long v); -extern void btext_drawtext(const char *c, unsigned int len); +void __init btext_drawhex(unsigned long v); +void __init btext_drawtext(const char *c, unsigned int len); -extern void btext_clearscreen(void); -extern void btext_flushscreen(void); -extern void btext_flushline(void); +void __init btext_clearscreen(void); +void __init btext_flushscreen(void); +void __init btext_flushline(void); #endif /* __KERNEL__ */ #endif /* __PPC_BTEXT_H */ diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 4ba834599c4d..e26080539c31 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -24,20 +24,20 @@ bool is_offset_in_branch_range(long offset); bool is_offset_in_cond_branch_range(long offset); -int create_branch(struct ppc_inst *instr, const u32 *addr, +int create_branch(ppc_inst_t *instr, const u32 *addr, unsigned long target, int flags); -int create_cond_branch(struct ppc_inst *instr, const u32 *addr, +int create_cond_branch(ppc_inst_t *instr, const u32 *addr, unsigned long target, int flags); int patch_branch(u32 *addr, unsigned long target, int flags); -int patch_instruction(u32 *addr, struct ppc_inst instr); -int raw_patch_instruction(u32 *addr, struct ppc_inst instr); +int patch_instruction(u32 *addr, ppc_inst_t instr); +int raw_patch_instruction(u32 *addr, ppc_inst_t instr); static inline unsigned long patch_site_addr(s32 *site) { return (unsigned long)site + *site; } -static inline int patch_instruction_site(s32 *site, struct ppc_inst instr) +static inline int patch_instruction_site(s32 *site, ppc_inst_t instr) { return patch_instruction((u32 *)patch_site_addr(site), instr); } @@ -58,18 +58,26 @@ static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned return modify_instruction((unsigned int *)patch_site_addr(site), clr, set); } -int instr_is_relative_branch(struct ppc_inst instr); -int instr_is_relative_link_branch(struct ppc_inst instr); +static inline unsigned int branch_opcode(ppc_inst_t instr) +{ + return ppc_inst_primary_opcode(instr) & 0x3F; +} + +static inline int instr_is_branch_iform(ppc_inst_t instr) +{ + return branch_opcode(instr) == 18; +} + +static inline int instr_is_branch_bform(ppc_inst_t instr) +{ + return branch_opcode(instr) == 16; +} + +int instr_is_relative_branch(ppc_inst_t instr); +int instr_is_relative_link_branch(ppc_inst_t instr); unsigned long branch_target(const u32 *instr); -int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src); -extern bool is_conditional_branch(struct ppc_inst instr); -#ifdef CONFIG_PPC_BOOK3E_64 -void __patch_exception(int exc, unsigned long addr); -#define patch_exception(exc, name) do { \ - extern unsigned int name; \ - __patch_exception((exc), (unsigned long)&name); \ -} while (0) -#endif +int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src); +bool is_conditional_branch(ppc_inst_t instr); #define OP_RT_RA_MASK 0xffff0000UL #define LIS_R2 (PPC_RAW_LIS(_R2, 0)) diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h index bda45788cfcc..9ee192a6c5d7 100644 --- a/arch/powerpc/include/asm/cpm2.h +++ b/arch/powerpc/include/asm/cpm2.h @@ -1133,8 +1133,8 @@ enum cpm_clk { CPM_CLK_DUMMY }; -extern int cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode); -extern int cpm2_smc_clk_setup(enum cpm_clk_target target, int clock); +int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode); +int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock); #define CPM_PIN_INPUT 0 #define CPM_PIN_OUTPUT 1 @@ -1143,7 +1143,7 @@ extern int cpm2_smc_clk_setup(enum cpm_clk_target target, int clock); #define CPM_PIN_GPIO 4 #define CPM_PIN_OPENDRAIN 8 -void cpm2_set_pin(int port, int pin, int flags); +void __init cpm2_set_pin(int port, int pin, int flags); #endif /* __CPM2__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 9844b3ded187..0cce5dc7fb1c 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -85,7 +85,7 @@ extern struct pnv_idle_states_t *pnv_idle_states; extern int nr_pnv_idle_states; unsigned long pnv_cpu_offline(unsigned int cpu); -int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags); +int __init validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags); static inline void report_invalid_psscr_val(u64 psscr_val, int err) { switch (err) { diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index b167186aaee4..f26c430f3982 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -32,44 +32,11 @@ extern cpumask_t threads_core_mask; #define threads_core_mask (*get_cpu_mask(0)) #endif -/* cpu_thread_mask_to_cores - Return a cpumask of one per cores - * hit by the argument - * - * @threads: a cpumask of online threads - * - * This function returns a cpumask which will have one online cpu's - * bit set for each core that has at least one thread set in the argument. - * - * This can typically be used for things like IPI for tlb invalidations - * since those need to be done only once per core/TLB - */ -static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads) -{ - cpumask_t tmp, res; - int i, cpu; - - cpumask_clear(&res); - for (i = 0; i < NR_CPUS; i += threads_per_core) { - cpumask_shift_left(&tmp, &threads_core_mask, i); - if (cpumask_intersects(threads, &tmp)) { - cpu = cpumask_next_and(-1, &tmp, cpu_online_mask); - if (cpu < nr_cpu_ids) - cpumask_set_cpu(cpu, &res); - } - } - return res; -} - static inline int cpu_nr_cores(void) { return nr_cpu_ids >> threads_shift; } -static inline cpumask_t cpu_online_cores_map(void) -{ - return cpu_thread_mask_to_cores(cpu_online_mask); -} - #ifdef CONFIG_SMP int cpu_core_index_of_thread(int cpu); int cpu_first_thread_of_core(int core); diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index b1a5bba2e0b9..bd513fd49be9 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -460,7 +460,7 @@ static inline void eeh_readsl(const volatile void __iomem *addr, void * buf, } -void eeh_cache_debugfs_init(void); +void __init eeh_cache_debugfs_init(void); #endif /* CONFIG_PPC64 */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index 40cdcb2fb057..b1ef1e92c34a 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -149,6 +149,10 @@ exc_##label##_book3e: addi r11,r13,PACA_EXTLB; \ TLB_MISS_RESTORE(r11) +#ifndef __ASSEMBLY__ +extern unsigned int interrupt_base_book3e; +#endif + #define SET_IVOR(vector_number, vector_offset) \ LOAD_REG_ADDR(r3,interrupt_base_book3e);\ ori r3,r3,vector_offset@l; \ diff --git a/arch/powerpc/include/asm/fadump-internal.h b/arch/powerpc/include/asm/fadump-internal.h index 8d61c8f3fec4..52189928ec08 100644 --- a/arch/powerpc/include/asm/fadump-internal.h +++ b/arch/powerpc/include/asm/fadump-internal.h @@ -137,10 +137,10 @@ struct fadump_ops { }; /* Helper functions */ -s32 fadump_setup_cpu_notes_buf(u32 num_cpus); +s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus); void fadump_free_cpu_notes_buf(void); -u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs); -void fadump_update_elfcore_header(char *bufp); +u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs); +void __init fadump_update_elfcore_header(char *bufp); bool is_fadump_boot_mem_contiguous(void); bool is_fadump_reserved_mem_contiguous(void); diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 97a3bd9ffeb9..9b702d2b80fb 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -80,8 +80,6 @@ enum { FW_FEATURE_POWERNV_ALWAYS = 0, FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, - FW_FEATURE_NATIVE_POSSIBLE = 0, - FW_FEATURE_NATIVE_ALWAYS = 0, FW_FEATURE_POSSIBLE = #ifdef CONFIG_PPC_PSERIES FW_FEATURE_PSERIES_POSSIBLE | @@ -92,9 +90,6 @@ enum { #ifdef CONFIG_PPC_PS3 FW_FEATURE_PS3_POSSIBLE | #endif -#ifdef CONFIG_PPC_NATIVE - FW_FEATURE_NATIVE_ALWAYS | -#endif 0, FW_FEATURE_ALWAYS = #ifdef CONFIG_PPC_PSERIES @@ -106,9 +101,6 @@ enum { #ifdef CONFIG_PPC_PS3 FW_FEATURE_PS3_ALWAYS & #endif -#ifdef CONFIG_PPC_NATIVE - FW_FEATURE_NATIVE_ALWAYS & -#endif FW_FEATURE_POSSIBLE, #else /* CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/floppy.h b/arch/powerpc/include/asm/floppy.h index 7af9a68fd949..f8ce178b43b7 100644 --- a/arch/powerpc/include/asm/floppy.h +++ b/arch/powerpc/include/asm/floppy.h @@ -134,17 +134,19 @@ static int hard_dma_setup(char *addr, unsigned long size, int mode, int io) int dir; doing_vdma = 0; - dir = (mode == DMA_MODE_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE; + dir = (mode == DMA_MODE_READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; if (bus_addr && (addr != prev_addr || size != prev_size || dir != prev_dir)) { /* different from last time -- unmap prev */ - pci_unmap_single(isa_bridge_pcidev, bus_addr, prev_size, prev_dir); + dma_unmap_single(&isa_bridge_pcidev->dev, bus_addr, prev_size, + prev_dir); bus_addr = 0; } if (!bus_addr) /* need to map it */ - bus_addr = pci_map_single(isa_bridge_pcidev, addr, size, dir); + bus_addr = dma_map_single(&isa_bridge_pcidev->dev, addr, size, + dir); /* remember this one as prev */ prev_addr = addr; diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index 242204e12993..d73153b0275d 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -98,13 +98,9 @@ start_text: . = sname##_len; #define USE_FIXED_SECTION(sname) \ - fs_label = start_##sname; \ - fs_start = sname##_start; \ use_ftsec sname; #define USE_TEXT_SECTION() \ - fs_label = start_text; \ - fs_start = text_start; \ .text #define CLOSE_FIXED_SECTION(sname) \ @@ -161,13 +157,15 @@ name: * - ABS_ADDR is used to find the absolute address of any symbol, from within * a fixed section. */ -#define DEFINE_FIXED_SYMBOL(label) \ - label##_absolute = (label - fs_label + fs_start) +// define label as being _in_ sname +#define DEFINE_FIXED_SYMBOL(label, sname) \ + label##_absolute = (label - start_ ## sname + sname ## _start) #define FIXED_SYMBOL_ABS_ADDR(label) \ (label##_absolute) -#define ABS_ADDR(label) (label - fs_label + fs_start) +// find label from _within_ sname +#define ABS_ADDR(label, sname) (label - start_ ## sname + sname ## _start) #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index f18c543bc01d..962708fa1017 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -15,7 +15,7 @@ extern bool hugetlb_disabled; -void hugetlbpage_init_default(void); +void __init hugetlbpage_init_default(void); int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len); diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index abebfbee5b1c..84d39fd42f71 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -10,7 +10,6 @@ #define _PPC_BOOK3S_64_HW_BREAKPOINT_H #include <asm/cpu_has_feature.h> -#include <asm/inst.h> #ifdef __KERNEL__ struct arch_hw_breakpoint { @@ -56,11 +55,11 @@ static inline int nr_wp_slots(void) return cpu_has_feature(CPU_FTR_DAWR1) ? 2 : 1; } -bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr, +bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr, unsigned long ea, int type, int size, struct arch_hw_breakpoint *info); -void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, +void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr, int *type, int *size, unsigned long *ea); #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 21cc571ea9c2..a58fb4aa6c81 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -61,7 +61,7 @@ static inline void __hard_irq_enable(void) { - if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + if (IS_ENABLED(CONFIG_BOOKE_OR_40x)) wrtee(MSR_EE); else if (IS_ENABLED(CONFIG_PPC_8xx)) wrtspr(SPRN_EIE); @@ -73,7 +73,7 @@ static inline void __hard_irq_enable(void) static inline void __hard_irq_disable(void) { - if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + if (IS_ENABLED(CONFIG_BOOKE_OR_40x)) wrtee(0); else if (IS_ENABLED(CONFIG_PPC_8xx)) wrtspr(SPRN_EID); @@ -85,7 +85,7 @@ static inline void __hard_irq_disable(void) static inline void __hard_EE_RI_disable(void) { - if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + if (IS_ENABLED(CONFIG_BOOKE_OR_40x)) wrtee(0); else if (IS_ENABLED(CONFIG_PPC_8xx)) wrtspr(SPRN_NRI); @@ -97,7 +97,7 @@ static inline void __hard_EE_RI_disable(void) static inline void __hard_RI_enable(void) { - if (IS_ENABLED(CONFIG_BOOKE) || IS_ENABLED(CONFIG_40x)) + if (IS_ENABLED(CONFIG_BOOKE_OR_40x)) return; if (IS_ENABLED(CONFIG_PPC_8xx)) @@ -224,6 +224,42 @@ static inline bool arch_irqs_disabled(void) return arch_irqs_disabled_flags(arch_local_save_flags()); } +static inline void set_pmi_irq_pending(void) +{ + /* + * Invoked from PMU callback functions to set PMI bit in the paca. + * This has to be called with irq's disabled (via hard_irq_disable()). + */ + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(mfmsr() & MSR_EE); + + get_paca()->irq_happened |= PACA_IRQ_PMI; +} + +static inline void clear_pmi_irq_pending(void) +{ + /* + * Invoked from PMU callback functions to clear the pending PMI bit + * in the paca. + */ + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(mfmsr() & MSR_EE); + + get_paca()->irq_happened &= ~PACA_IRQ_PMI; +} + +static inline bool pmi_irq_pending(void) +{ + /* + * Invoked from PMU callback functions to check if there is a pending + * PMI bit in the paca. + */ + if (get_paca()->irq_happened & PACA_IRQ_PMI) + return true; + + return false; +} + #ifdef CONFIG_PPC_BOOK3S /* * To support disabling and enabling of irq with PMI, set of @@ -306,18 +342,57 @@ static inline bool lazy_irq_pending_nocheck(void) return __lazy_irq_pending(local_paca->irq_happened); } +bool power_pmu_wants_prompt_pmi(void); + +/* + * This is called by asynchronous interrupts to check whether to + * conditionally re-enable hard interrupts after having cleared + * the source of the interrupt. They are kept disabled if there + * is a different soft-masked interrupt pending that requires hard + * masking. + */ +static inline bool should_hard_irq_enable(void) +{ +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON(irq_soft_mask_return() == IRQS_ENABLED); + WARN_ON(mfmsr() & MSR_EE); +#endif +#ifdef CONFIG_PERF_EVENTS + /* + * If the PMU is not running, there is not much reason to enable + * MSR[EE] in irq handlers because any interrupts would just be + * soft-masked. + * + * TODO: Add test for 64e + */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !power_pmu_wants_prompt_pmi()) + return false; + + if (get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK) + return false; + + return true; +#else + return false; +#endif +} + /* - * This is called by asynchronous interrupts to conditionally - * re-enable hard interrupts after having cleared the source - * of the interrupt. They are kept disabled if there is a different - * soft-masked interrupt pending that requires hard masking. + * Do the hard enabling, only call this if should_hard_irq_enable is true. */ -static inline void may_hard_irq_enable(void) +static inline void do_hard_irq_enable(void) { - if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)) { - get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS; - __hard_irq_enable(); - } +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON(irq_soft_mask_return() == IRQS_ENABLED); + WARN_ON(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK); + WARN_ON(mfmsr() & MSR_EE); +#endif + /* + * This allows PMI interrupts (and watchdog soft-NMIs) through. + * There is no other reason to enable this way. + */ + get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS; + __hard_irq_enable(); } static inline bool arch_irq_disabled_regs(struct pt_regs *regs) @@ -398,7 +473,7 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs) return !(regs->msr & MSR_EE); } -static inline bool may_hard_irq_enable(void) +static inline bool should_hard_irq_enable(void) { return false; } @@ -408,6 +483,10 @@ static inline void do_hard_irq_enable(void) BUILD_BUG(); } +static inline void clear_pmi_irq_pending(void) { } +static inline void set_pmi_irq_pending(void) { } +static inline bool pmi_irq_pending(void) { return false; } + static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val) { } diff --git a/arch/powerpc/include/asm/i8259.h b/arch/powerpc/include/asm/i8259.h index d7f08ae49e12..75481d363cd8 100644 --- a/arch/powerpc/include/asm/i8259.h +++ b/arch/powerpc/include/asm/i8259.h @@ -7,7 +7,7 @@ extern void i8259_init(struct device_node *node, unsigned long intack_addr); extern unsigned int i8259_irq(void); -extern struct irq_domain *i8259_get_host(void); +struct irq_domain *__init i8259_get_host(void); #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_I8259_H */ diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h index b11c0e2f9639..80b6d74146c6 100644 --- a/arch/powerpc/include/asm/inst.h +++ b/arch/powerpc/include/asm/inst.h @@ -3,20 +3,21 @@ #define _ASM_POWERPC_INST_H #include <asm/ppc-opcode.h> - -#ifdef CONFIG_PPC64 +#include <asm/reg.h> +#include <asm/disassemble.h> +#include <asm/uaccess.h> #define ___get_user_instr(gu_op, dest, ptr) \ ({ \ long __gui_ret; \ u32 __user *__gui_ptr = (u32 __user *)ptr; \ - struct ppc_inst __gui_inst; \ + ppc_inst_t __gui_inst; \ unsigned int __prefix, __suffix; \ \ __chk_user_ptr(ptr); \ __gui_ret = gu_op(__prefix, __gui_ptr); \ if (__gui_ret == 0) { \ - if ((__prefix >> 26) == OP_PREFIX) { \ + if (IS_ENABLED(CONFIG_PPC64) && (__prefix >> 26) == OP_PREFIX) { \ __gui_ret = gu_op(__suffix, __gui_ptr + 1); \ __gui_inst = ppc_inst_prefix(__prefix, __suffix); \ } else { \ @@ -27,13 +28,6 @@ } \ __gui_ret; \ }) -#else /* !CONFIG_PPC64 */ -#define ___get_user_instr(gu_op, dest, ptr) \ -({ \ - __chk_user_ptr(ptr); \ - gu_op((dest).val, (u32 __user *)(ptr)); \ -}) -#endif /* CONFIG_PPC64 */ #define get_user_instr(x, ptr) ___get_user_instr(get_user, x, ptr) @@ -43,44 +37,46 @@ * Instruction data type for POWER */ -struct ppc_inst { - u32 val; -#ifdef CONFIG_PPC64 - u32 suffix; -#endif -} __packed; - -static inline u32 ppc_inst_val(struct ppc_inst x) +#if defined(CONFIG_PPC64) || defined(__CHECKER__) +static inline u32 ppc_inst_val(ppc_inst_t x) { return x.val; } -static inline int ppc_inst_primary_opcode(struct ppc_inst x) +#define ppc_inst(x) ((ppc_inst_t){ .val = (x) }) + +#else +static inline u32 ppc_inst_val(ppc_inst_t x) { - return ppc_inst_val(x) >> 26; + return x; } +#define ppc_inst(x) (x) +#endif -#define ppc_inst(x) ((struct ppc_inst){ .val = (x) }) +static inline int ppc_inst_primary_opcode(ppc_inst_t x) +{ + return ppc_inst_val(x) >> 26; +} #ifdef CONFIG_PPC64 -#define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y) }) +#define ppc_inst_prefix(x, y) ((ppc_inst_t){ .val = (x), .suffix = (y) }) -static inline u32 ppc_inst_suffix(struct ppc_inst x) +static inline u32 ppc_inst_suffix(ppc_inst_t x) { return x.suffix; } #else -#define ppc_inst_prefix(x, y) ppc_inst(x) +#define ppc_inst_prefix(x, y) ((void)y, ppc_inst(x)) -static inline u32 ppc_inst_suffix(struct ppc_inst x) +static inline u32 ppc_inst_suffix(ppc_inst_t x) { return 0; } #endif /* CONFIG_PPC64 */ -static inline struct ppc_inst ppc_inst_read(const u32 *ptr) +static inline ppc_inst_t ppc_inst_read(const u32 *ptr) { if (IS_ENABLED(CONFIG_PPC64) && (*ptr >> 26) == OP_PREFIX) return ppc_inst_prefix(*ptr, *(ptr + 1)); @@ -88,17 +84,17 @@ static inline struct ppc_inst ppc_inst_read(const u32 *ptr) return ppc_inst(*ptr); } -static inline bool ppc_inst_prefixed(struct ppc_inst x) +static inline bool ppc_inst_prefixed(ppc_inst_t x) { return IS_ENABLED(CONFIG_PPC64) && ppc_inst_primary_opcode(x) == OP_PREFIX; } -static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x) +static inline ppc_inst_t ppc_inst_swab(ppc_inst_t x) { return ppc_inst_prefix(swab32(ppc_inst_val(x)), swab32(ppc_inst_suffix(x))); } -static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) +static inline bool ppc_inst_equal(ppc_inst_t x, ppc_inst_t y) { if (ppc_inst_val(x) != ppc_inst_val(y)) return false; @@ -107,7 +103,7 @@ static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y) return ppc_inst_suffix(x) == ppc_inst_suffix(y); } -static inline int ppc_inst_len(struct ppc_inst x) +static inline int ppc_inst_len(ppc_inst_t x) { return ppc_inst_prefixed(x) ? 8 : 4; } @@ -118,14 +114,14 @@ static inline int ppc_inst_len(struct ppc_inst x) */ static inline u32 *ppc_inst_next(u32 *location, u32 *value) { - struct ppc_inst tmp; + ppc_inst_t tmp; tmp = ppc_inst_read(value); return (void *)location + ppc_inst_len(tmp); } -static inline unsigned long ppc_inst_as_ulong(struct ppc_inst x) +static inline unsigned long ppc_inst_as_ulong(ppc_inst_t x) { if (IS_ENABLED(CONFIG_PPC32)) return ppc_inst_val(x); @@ -135,9 +131,17 @@ static inline unsigned long ppc_inst_as_ulong(struct ppc_inst x) return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x); } +static inline void ppc_inst_write(u32 *ptr, ppc_inst_t x) +{ + if (!ppc_inst_prefixed(x)) + *ptr = ppc_inst_val(x); + else + *(u64 *)ptr = ppc_inst_as_ulong(x); +} + #define PPC_INST_STR_LEN sizeof("00000000 00000000") -static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_inst x) +static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], ppc_inst_t x) { if (ppc_inst_prefixed(x)) sprintf(str, "%08x %08x", ppc_inst_val(x), ppc_inst_suffix(x)); @@ -154,6 +158,27 @@ static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_ins __str; \ }) -int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src); +static inline int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src) +{ + unsigned int val, suffix; + + if (unlikely(!is_kernel_addr((unsigned long)src))) + return -ERANGE; + +/* See https://github.com/ClangBuiltLinux/linux/issues/1521 */ +#if defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 140000 + val = suffix = 0; +#endif + __get_kernel_nofault(&val, src, u32, Efault); + if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { + __get_kernel_nofault(&suffix, src + 1, u32, Efault); + *inst = ppc_inst_prefix(val, suffix); + } else { + *inst = ppc_inst(val); + } + return 0; +Efault: + return -EFAULT; +} #endif /* _ASM_POWERPC_INST_H */ diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index a1d238255f07..fc28f46d2f9d 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -97,6 +97,11 @@ static inline void srr_regs_clobbered(void) local_paca->hsrr_valid = 0; } #else +static inline unsigned long search_kernel_restart_table(unsigned long addr) +{ + return 0; +} + static inline bool is_implicit_soft_masked(struct pt_regs *regs) { return false; @@ -139,39 +144,68 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup if (!arch_irq_disabled_regs(regs)) trace_hardirqs_off(); - if (user_mode(regs)) { - kuep_lock(); - account_cpu_user_entry(); - } else { + if (user_mode(regs)) + kuap_lock(); + else kuap_save_and_lock(regs); - } + + if (user_mode(regs)) + account_cpu_user_entry(); #endif #ifdef CONFIG_PPC64 - if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED) + bool trace_enable = false; + + if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS)) { + if (irq_soft_mask_set_return(IRQS_ALL_DISABLED) == IRQS_ENABLED) + trace_enable = true; + } else { + irq_soft_mask_set(IRQS_ALL_DISABLED); + } + + /* + * If the interrupt was taken with HARD_DIS clear, then enable MSR[EE]. + * Asynchronous interrupts get here with HARD_DIS set (see below), so + * this enables MSR[EE] for synchronous interrupts. IRQs remain + * soft-masked. The interrupt handler may later call + * interrupt_cond_local_irq_enable() to achieve a regular process + * context. + */ + if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + BUG_ON(!(regs->msr & MSR_EE)); + __hard_irq_enable(); + } else { + __hard_RI_enable(); + } + + /* Do this when RI=1 because it can cause SLB faults */ + if (trace_enable) trace_hardirqs_off(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; if (user_mode(regs)) { + kuap_lock(); CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit_irqoff(); account_cpu_user_entry(); account_stolen_time(); } else { + kuap_save_and_lock(regs); /* * CT_WARN_ON comes here via program_check_exception, * so avoid recursion. */ if (TRAP(regs) != INTERRUPT_PROGRAM) { CT_WARN_ON(ct_state() != CONTEXT_KERNEL); - BUG_ON(is_implicit_soft_masked(regs)); + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + BUG_ON(is_implicit_soft_masked(regs)); } -#ifdef CONFIG_PPC_BOOK3S + /* Move this under a debugging check */ - if (arch_irq_disabled_regs(regs)) + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && + arch_irq_disabled_regs(regs)) BUG_ON(search_kernel_restart_table(regs->nip)); -#endif } if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE)); @@ -200,13 +234,20 @@ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) { +#ifdef CONFIG_PPC64 + /* Ensure interrupt_enter_prepare does not enable MSR[EE] */ + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; +#endif + interrupt_enter_prepare(regs, state); #ifdef CONFIG_PPC_BOOK3S_64 + /* + * RI=1 is set by interrupt_enter_prepare, so this thread flags access + * has to come afterward (it can cause SLB faults). + */ if (cpu_has_feature(CPU_FTR_CTRL) && !test_thread_local_flags(_TLF_RUNLATCH)) __ppc64_runlatch_on(); #endif - - interrupt_enter_prepare(regs, state); irq_enter(); } @@ -276,6 +317,8 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte regs->softe = IRQS_ALL_DISABLED; } + __hard_RI_enable(); + /* Don't do any per-CPU operations until interrupt state is fixed */ if (nmi_disables_ftrace(regs)) { @@ -373,6 +416,8 @@ interrupt_handler long func(struct pt_regs *regs) \ { \ long ret; \ \ + __hard_RI_enable(); \ + \ ret = ____##func (regs); \ \ return ret; \ @@ -564,7 +609,7 @@ DECLARE_INTERRUPT_HANDLER(kernel_bad_stack); /* slb.c */ DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault); -DECLARE_INTERRUPT_HANDLER(do_bad_slb_fault); +DECLARE_INTERRUPT_HANDLER(do_bad_segment_interrupt); /* hash_utils.c */ DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault); diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index c361212ac160..d7912b66c874 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -275,7 +275,7 @@ extern void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction, unsigned long attrs); -extern void iommu_init_early_pSeries(void); +void __init iommu_init_early_pSeries(void); extern void iommu_init_early_dart(struct pci_controller_ops *controller_ops); extern void iommu_init_early_pasemi(void); diff --git a/arch/powerpc/include/asm/ipic.h b/arch/powerpc/include/asm/ipic.h index 0524df31a7e6..b47ca7dc7199 100644 --- a/arch/powerpc/include/asm/ipic.h +++ b/arch/powerpc/include/asm/ipic.h @@ -65,7 +65,7 @@ enum ipic_mcp_irq { IPIC_MCP_MU = 7, }; -extern void ipic_set_default_priority(void); +void __init ipic_set_default_priority(void); extern u32 ipic_get_mcp_status(void); extern void ipic_clear_mcp_status(u32 mask); diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 2b3278534bc1..13f0409dd617 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -36,7 +36,7 @@ extern int distribute_irqs; struct pt_regs; -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x /* * Per-cpu stacks for handling critical, debug and machine check * level interrupts. diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index c6f250eca3fb..8ebdd23d987c 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -84,7 +84,7 @@ extern int crash_shutdown_register(crash_shutdown_t handler); extern int crash_shutdown_unregister(crash_shutdown_t handler); extern void crash_kexec_secondary(struct pt_regs *regs); -extern int overlaps_crashkernel(unsigned long start, unsigned long size); +int __init overlaps_crashkernel(unsigned long start, unsigned long size); extern void reserve_crashkernel(void); extern void machine_kexec_mask_interrupts(void); diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 1df763002726..fb2237809d63 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -14,6 +14,10 @@ #include <asm/nohash/32/kup-8xx.h> #endif +#ifdef CONFIG_BOOKE_OR_40x +#include <asm/nohash/kup-booke.h> +#endif + #ifdef CONFIG_PPC_BOOK3S_32 #include <asm/book3s/32/kup.h> #endif @@ -32,34 +36,29 @@ extern bool disable_kuap; #include <linux/pgtable.h> -#ifdef CONFIG_PPC_KUEP +void setup_kup(void); void setup_kuep(bool disabled); -#else -static inline void setup_kuep(bool disabled) { } -#endif /* CONFIG_PPC_KUEP */ - -#ifndef CONFIG_PPC_BOOK3S_32 -static inline void kuep_lock(void) { } -static inline void kuep_unlock(void) { } -#endif #ifdef CONFIG_PPC_KUAP void setup_kuap(bool disabled); #else static inline void setup_kuap(bool disabled) { } +static __always_inline bool kuap_is_disabled(void) { return true; } + static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { return false; } -static inline void kuap_assert_locked(void) { } -static inline void kuap_save_and_lock(struct pt_regs *regs) { } +static inline void __kuap_assert_locked(void) { } +static inline void __kuap_lock(void) { } +static inline void __kuap_save_and_lock(struct pt_regs *regs) { } static inline void kuap_user_restore(struct pt_regs *regs) { } -static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { } +static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { } -static inline unsigned long kuap_get_and_assert_locked(void) +static inline unsigned long __kuap_get_and_assert_locked(void) { return 0; } @@ -70,20 +69,99 @@ static inline unsigned long kuap_get_and_assert_locked(void) * platforms. */ #ifndef CONFIG_PPC_BOOK3S_64 -static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) { } -static inline void prevent_user_access(unsigned long dir) { } -static inline unsigned long prevent_user_access_return(void) { return 0UL; } -static inline void restore_user_access(unsigned long flags) { } +static inline void __allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) { } +static inline void __prevent_user_access(unsigned long dir) { } +static inline unsigned long __prevent_user_access_return(void) { return 0UL; } +static inline void __restore_user_access(unsigned long flags) { } #endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* CONFIG_PPC_KUAP */ -static __always_inline void setup_kup(void) +static __always_inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ + if (kuap_is_disabled()) + return false; + + return __bad_kuap_fault(regs, address, is_write); +} + +static __always_inline void kuap_assert_locked(void) { - setup_kuep(disable_kuep); - setup_kuap(disable_kuap); + if (kuap_is_disabled()) + return; + + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) + __kuap_get_and_assert_locked(); +} + +static __always_inline void kuap_lock(void) +{ + if (kuap_is_disabled()) + return; + + __kuap_lock(); +} + +static __always_inline void kuap_save_and_lock(struct pt_regs *regs) +{ + if (kuap_is_disabled()) + return; + + __kuap_save_and_lock(regs); } +static __always_inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) +{ + if (kuap_is_disabled()) + return; + + __kuap_kernel_restore(regs, amr); +} + +static __always_inline unsigned long kuap_get_and_assert_locked(void) +{ + if (kuap_is_disabled()) + return 0; + + return __kuap_get_and_assert_locked(); +} + +#ifndef CONFIG_PPC_BOOK3S_64 +static __always_inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) +{ + if (kuap_is_disabled()) + return; + + __allow_user_access(to, from, size, dir); +} + +static __always_inline void prevent_user_access(unsigned long dir) +{ + if (kuap_is_disabled()) + return; + + __prevent_user_access(dir); +} + +static __always_inline unsigned long prevent_user_access_return(void) +{ + if (kuap_is_disabled()) + return 0; + + return __prevent_user_access_return(); +} + +static __always_inline void restore_user_access(unsigned long flags) +{ + if (kuap_is_disabled()) + return; + + __restore_user_access(flags); +} +#endif /* CONFIG_PPC_BOOK3S_64 */ + static __always_inline void allow_read_from_user(const void __user *from, unsigned long size) { barrier_nospec(); diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index fbbf3cec92e9..d68d71987d5c 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -79,6 +79,7 @@ #define BOOK3S_INTERRUPT_FP_UNAVAIL 0x800 #define BOOK3S_INTERRUPT_DECREMENTER 0x900 #define BOOK3S_INTERRUPT_HV_DECREMENTER 0x980 +#define BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER 0x1980 #define BOOK3S_INTERRUPT_DOORBELL 0xa00 #define BOOK3S_INTERRUPT_SYSCALL 0xc00 #define BOOK3S_INTERRUPT_TRACE 0xd00 diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 3d31f2c59e43..91c9f937edcd 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -406,6 +406,12 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu) return vcpu->arch.fault_dar; } +/* Expiry time of vcpu DEC relative to host TB */ +static inline u64 kvmppc_dec_expires_host_tb(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.dec_expires - vcpu->arch.vcore->tb_offset; +} + static inline bool is_kvmppc_resume_guest(int r) { return (r == RESUME_GUEST || r == RESUME_GUEST_NV); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index fff391b9b97b..fe07558173ef 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -44,7 +44,6 @@ struct kvm_nested_guest { struct mutex tlb_lock; /* serialize page faults and tlbies */ struct kvm_nested_guest *next; cpumask_t need_tlb_flush; - cpumask_t cpu_in_guest; short prev_cpu[NR_CPUS]; u8 radix; /* is this nested guest radix */ }; @@ -154,7 +153,9 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu) return radix; } -int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr); +unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr); + +int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb); #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ #endif diff --git a/arch/powerpc/include/asm/kvm_guest.h b/arch/powerpc/include/asm/kvm_guest.h index c63105d2c9e7..68e499abdb24 100644 --- a/arch/powerpc/include/asm/kvm_guest.h +++ b/arch/powerpc/include/asm/kvm_guest.h @@ -16,7 +16,7 @@ static inline bool is_kvm_guest(void) return static_branch_unlikely(&kvm_guest); } -int check_kvm_guest(void); +int __init check_kvm_guest(void); #else static inline bool is_kvm_guest(void) { return false; } static inline int check_kvm_guest(void) { return 0; } diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index e4d23193eba7..17263276189e 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -287,7 +287,6 @@ struct kvm_arch { u32 online_vcores; atomic_t hpte_mod_interest; cpumask_t need_tlb_flush; - cpumask_t cpu_in_guest; u8 radix; u8 fwnmi_enabled; u8 secure_guest; @@ -579,6 +578,10 @@ struct kvm_vcpu_arch { ulong cfar; ulong ppr; u32 pspb; + u8 load_ebb; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + u8 load_tm; +#endif ulong fscr; ulong shadow_fscr; ulong ebbhr; @@ -741,7 +744,7 @@ struct kvm_vcpu_arch { struct hrtimer dec_timer; u64 dec_jiffies; - u64 dec_expires; + u64 dec_expires; /* Relative to guest timebase. */ unsigned long pending_exceptions; u8 ceded; u8 prodded; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 671fbd1a765e..33db83b82fbd 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -552,8 +552,7 @@ extern void kvm_hv_vm_activated(void); extern void kvm_hv_vm_deactivated(void); extern bool kvm_hv_mode_active(void); -extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, - struct kvm_nested_guest *nested); +extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu); #else static inline void __init kvm_cma_reserve(void) @@ -760,6 +759,7 @@ void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu); void kvmppc_subcore_enter_guest(void); void kvmppc_subcore_exit_guest(void); long kvmppc_realmode_hmi_handler(void); +long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu); long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel); long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 9c3c9f04129f..e821037f74f0 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -235,8 +235,6 @@ extern struct machdep_calls *machine_id; machine_id == &mach_##name; \ }) -extern void probe_machine(void); - #ifdef CONFIG_PPC_PMAC /* * Power macintoshes have either a CUDA, PMU or SMU controlling diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 8abe8e42e045..5f41565a1e5d 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -157,7 +157,7 @@ DECLARE_PER_CPU(int, next_tlbcam_idx); enum { MMU_FTRS_POSSIBLE = -#if defined(CONFIG_PPC_BOOK3S_64) || defined(CONFIG_PPC_BOOK3S_604) +#if defined(CONFIG_PPC_BOOK3S_604) MMU_FTR_HPTE_TABLE | #endif #ifdef CONFIG_PPC_8xx @@ -184,15 +184,18 @@ enum { MMU_FTR_USE_TLBRSRV | MMU_FTR_USE_PAIRED_MAS | #endif #ifdef CONFIG_PPC_BOOK3S_64 + MMU_FTR_KERNEL_RO | +#ifdef CONFIG_PPC_64S_HASH_MMU MMU_FTR_NO_SLBIE_B | MMU_FTR_16M_PAGE | MMU_FTR_TLBIEL | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE | MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA | - MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA | + MMU_FTR_68_BIT_VA | MMU_FTR_HPTE_TABLE | #endif #ifdef CONFIG_PPC_RADIX_MMU MMU_FTR_TYPE_RADIX | MMU_FTR_GTSE | #endif /* CONFIG_PPC_RADIX_MMU */ +#endif #ifdef CONFIG_PPC_KUAP MMU_FTR_BOOK3S_KUAP | #endif /* CONFIG_PPC_KUAP */ @@ -224,6 +227,13 @@ enum { #define MMU_FTRS_ALWAYS MMU_FTR_TYPE_FSL_E #endif +/* BOOK3S_64 options */ +#if defined(CONFIG_PPC_RADIX_MMU) && !defined(CONFIG_PPC_64S_HASH_MMU) +#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_RADIX +#elif !defined(CONFIG_PPC_RADIX_MMU) && defined(CONFIG_PPC_64S_HASH_MMU) +#define MMU_FTRS_ALWAYS MMU_FTR_HPTE_TABLE +#endif + #ifndef MMU_FTRS_ALWAYS #define MMU_FTRS_ALWAYS 0 #endif @@ -329,7 +339,7 @@ static __always_inline bool radix_enabled(void) return mmu_has_feature(MMU_FTR_TYPE_RADIX); } -static inline bool early_radix_enabled(void) +static __always_inline bool early_radix_enabled(void) { return early_mmu_has_feature(MMU_FTR_TYPE_RADIX); } diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 9ba6b585337f..fd277b15635c 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -71,10 +71,11 @@ static inline void switch_mmu_context(struct mm_struct *prev, } extern int hash__alloc_context_id(void); -extern void hash__reserve_context_id(int id); +void __init hash__reserve_context_id(int id); extern void __destroy_context(int context_id); static inline void mmu_context_init(void) { } +#ifdef CONFIG_PPC_64S_HASH_MMU static inline int alloc_extended_context(struct mm_struct *mm, unsigned long ea) { @@ -100,6 +101,7 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea) return true; return false; } +#endif #else extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h index 0abf2e7fd222..58353c5bd3fb 100644 --- a/arch/powerpc/include/asm/mpic.h +++ b/arch/powerpc/include/asm/mpic.h @@ -472,7 +472,7 @@ extern int mpic_cpu_get_priority(void); extern void mpic_cpu_set_priority(int prio); /* Request IPIs on primary mpic */ -extern void mpic_request_ipis(void); +void __init mpic_request_ipis(void); /* Send a message (IPI) to a given target (cpu number or MSG_*) */ void smp_mpic_message_pass(int target, int msg); diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 882a0bc7887a..c44d97751723 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -20,11 +20,12 @@ static __always_inline bool kuap_is_disabled(void) return static_branch_unlikely(&disable_kuap_key); } -static inline void kuap_save_and_lock(struct pt_regs *regs) +static inline void __kuap_lock(void) { - if (kuap_is_disabled()) - return; +} +static inline void __kuap_save_and_lock(struct pt_regs *regs) +{ regs->kuap = mfspr(SPRN_MD_AP); mtspr(SPRN_MD_AP, MD_APG_KUAP); } @@ -33,21 +34,15 @@ static inline void kuap_user_restore(struct pt_regs *regs) { } -static inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) +static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) { - if (kuap_is_disabled()) - return; - mtspr(SPRN_MD_AP, regs->kuap); } -static inline unsigned long kuap_get_and_assert_locked(void) +static inline unsigned long __kuap_get_and_assert_locked(void) { unsigned long kuap; - if (kuap_is_disabled()) - return MD_APG_INIT; - kuap = mfspr(SPRN_MD_AP); if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) @@ -56,36 +51,21 @@ static inline unsigned long kuap_get_and_assert_locked(void) return kuap; } -static inline void kuap_assert_locked(void) -{ - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && !kuap_is_disabled()) - kuap_get_and_assert_locked(); -} - -static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) +static inline void __allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) { - if (kuap_is_disabled()) - return; - mtspr(SPRN_MD_AP, MD_APG_INIT); } -static inline void prevent_user_access(unsigned long dir) +static inline void __prevent_user_access(unsigned long dir) { - if (kuap_is_disabled()) - return; - mtspr(SPRN_MD_AP, MD_APG_KUAP); } -static inline unsigned long prevent_user_access_return(void) +static inline unsigned long __prevent_user_access_return(void) { unsigned long flags; - if (kuap_is_disabled()) - return MD_APG_INIT; - flags = mfspr(SPRN_MD_AP); mtspr(SPRN_MD_AP, MD_APG_KUAP); @@ -93,20 +73,14 @@ static inline unsigned long prevent_user_access_return(void) return flags; } -static inline void restore_user_access(unsigned long flags) +static inline void __restore_user_access(unsigned long flags) { - if (kuap_is_disabled()) - return; - mtspr(SPRN_MD_AP, flags); } static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { - if (kuap_is_disabled()) - return false; - return !((regs->kuap ^ MD_APG_KUAP) & 0xff000000); } diff --git a/arch/powerpc/include/asm/nohash/32/mmu-44x.h b/arch/powerpc/include/asm/nohash/32/mmu-44x.h index 43ceca128531..2d92a39d8f2e 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-44x.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-44x.h @@ -113,7 +113,6 @@ typedef struct { /* patch sites */ extern s32 patch__tlb_44x_hwater_D, patch__tlb_44x_hwater_I; -extern s32 patch__tlb_44x_kuep, patch__tlb_47x_kuep; #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 997cec973406..0e93a4728c9e 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -39,12 +39,10 @@ * 0 => Kernel => 11 (all accesses performed according as user iaw page definition) * 1 => Kernel+Accessed => 01 (all accesses performed according to page definition) * 2 => User => 11 (all accesses performed according as user iaw page definition) - * 3 => User+Accessed => 00 (all accesses performed as supervisor iaw page definition) for INIT - * => 10 (all accesses performed according to swaped page definition) for KUEP + * 3 => User+Accessed => 10 (all accesses performed according to swaped page definition) for KUEP * 4-15 => Not Used */ -#define MI_APG_INIT 0xdc000000 -#define MI_APG_KUEP 0xde000000 +#define MI_APG_INIT 0xde000000 /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 9d2905a47410..a3313e853e5e 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -313,6 +313,12 @@ extern int __meminit vmemmap_create_mapping(unsigned long start, unsigned long phys); extern void vmemmap_remove_mapping(unsigned long start, unsigned long page_size); +void __patch_exception(int exc, unsigned long addr); +#define patch_exception(exc, name) do { \ + extern unsigned int name; \ + __patch_exception((exc), (unsigned long)&name); \ +} while (0) + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_NOHASH_64_PGTABLE_H */ diff --git a/arch/powerpc/include/asm/nohash/kup-booke.h b/arch/powerpc/include/asm/nohash/kup-booke.h new file mode 100644 index 000000000000..49bb41ed0816 --- /dev/null +++ b/arch/powerpc/include/asm/nohash/kup-booke.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_KUP_BOOKE_H_ +#define _ASM_POWERPC_KUP_BOOKE_H_ + +#include <asm/bug.h> + +#ifdef CONFIG_PPC_KUAP + +#ifdef __ASSEMBLY__ + +.macro kuap_check_amr gpr1, gpr2 +.endm + +#else + +#include <linux/jump_label.h> +#include <linux/sched.h> + +#include <asm/reg.h> + +extern struct static_key_false disable_kuap_key; + +static __always_inline bool kuap_is_disabled(void) +{ + return static_branch_unlikely(&disable_kuap_key); +} + +static inline void __kuap_lock(void) +{ + mtspr(SPRN_PID, 0); + isync(); +} + +static inline void __kuap_save_and_lock(struct pt_regs *regs) +{ + regs->kuap = mfspr(SPRN_PID); + mtspr(SPRN_PID, 0); + isync(); +} + +static inline void kuap_user_restore(struct pt_regs *regs) +{ + if (kuap_is_disabled()) + return; + + mtspr(SPRN_PID, current->thread.pid); + + /* Context synchronisation is performed by rfi */ +} + +static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) +{ + if (regs->kuap) + mtspr(SPRN_PID, current->thread.pid); + + /* Context synchronisation is performed by rfi */ +} + +static inline unsigned long __kuap_get_and_assert_locked(void) +{ + unsigned long kuap = mfspr(SPRN_PID); + + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) + WARN_ON_ONCE(kuap); + + return kuap; +} + +static inline void __allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) +{ + mtspr(SPRN_PID, current->thread.pid); + isync(); +} + +static inline void __prevent_user_access(unsigned long dir) +{ + mtspr(SPRN_PID, 0); + isync(); +} + +static inline unsigned long __prevent_user_access_return(void) +{ + unsigned long flags = mfspr(SPRN_PID); + + mtspr(SPRN_PID, 0); + isync(); + + return flags; +} + +static inline void __restore_user_access(unsigned long flags) +{ + if (flags) { + mtspr(SPRN_PID, current->thread.pid); + isync(); + } +} + +static inline bool +__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ + return !regs->kuap; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* CONFIG_PPC_KUAP */ + +#endif /* _ASM_POWERPC_KUP_BOOKE_H_ */ diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 0b63ba7d5917..a2bc4b95e703 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -1094,6 +1094,7 @@ enum { OPAL_XIVE_IRQ_SHIFT_BUG = 0x00000008, /* P9 DD1.0 workaround */ OPAL_XIVE_IRQ_MASK_VIA_FW = 0x00000010, /* P9 DD1.0 workaround */ OPAL_XIVE_IRQ_EOI_VIA_FW = 0x00000020, /* P9 DD1.0 workaround */ + OPAL_XIVE_IRQ_STORE_EOI2 = 0x00000040, }; /* Flags for OPAL_XIVE_GET/SET_QUEUE_INFO */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 6ea9001de9a9..bfd3142cd0ba 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -314,7 +314,7 @@ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); extern int early_init_dt_scan_recoverable_ranges(unsigned long node, const char *uname, int depth, void *data); -extern void opal_configure_cores(void); +void __init opal_configure_cores(void); extern int opal_get_chars(uint32_t vtermno, char *buf, int count); extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len); diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index dc05a862e72a..295573a82c66 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -97,7 +97,9 @@ struct paca_struct { /* this becomes non-zero. */ u8 kexec_state; /* set when kexec down has irqs off */ #ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU struct slb_shadow *slb_shadow_ptr; +#endif struct dtl_entry *dispatch_log; struct dtl_entry *dispatch_log_end; #endif @@ -110,6 +112,7 @@ struct paca_struct { /* used for most interrupts/exceptions */ u64 exgen[EX_SIZE] __attribute__((aligned(0x80))); +#ifdef CONFIG_PPC_64S_HASH_MMU /* SLB related definitions */ u16 vmalloc_sllp; u8 slb_cache_ptr; @@ -120,6 +123,7 @@ struct paca_struct { u32 slb_used_bitmap; /* Bitmaps for first 32 SLB entries. */ u32 slb_kern_bitmap; u32 slb_cache[SLB_CACHE_ENTRIES]; +#endif #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC_BOOK3E @@ -149,6 +153,7 @@ struct paca_struct { #endif /* CONFIG_PPC_BOOK3E */ #ifdef CONFIG_PPC_BOOK3S +#ifdef CONFIG_PPC_64S_HASH_MMU #ifdef CONFIG_PPC_MM_SLICES unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE]; unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE]; @@ -157,6 +162,7 @@ struct paca_struct { u16 mm_ctx_sllp; #endif #endif +#endif /* * then miscellaneous read-write fields @@ -268,9 +274,11 @@ struct paca_struct { #endif /* CONFIG_PPC_PSERIES */ #ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU /* Capture SLB related old contents in MCE handler. */ struct slb_entry *mce_faulty_slbs; u16 slb_save_cache_ptr; +#endif #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_STACKPROTECTOR unsigned long canary; diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index d1f53260725c..915d6ee4b40a 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -48,7 +48,7 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) } #ifdef CONFIG_PCI -extern void set_pci_dma_ops(const struct dma_map_ops *dma_ops); +void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops); #else /* CONFIG_PCI */ #define set_pci_dma_ops(d) #endif diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index f4c3428e816b..e2221d29fdf9 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -98,7 +98,7 @@ struct power_pmu { #define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ #define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ -extern int register_power_pmu(struct power_pmu *); +int __init register_power_pmu(struct power_pmu *pmu); struct pt_regs; extern unsigned long perf_misc_flags(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index baea657bc868..efad07081cc0 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -249,6 +249,7 @@ #define PPC_INST_COPY 0x7c20060c #define PPC_INST_DCBA 0x7c0005ec #define PPC_INST_DCBA_MASK 0xfc0007fe +#define PPC_INST_DSSALL 0x7e00066c #define PPC_INST_ISEL 0x7c00001e #define PPC_INST_ISEL_MASK 0xfc00003e #define PPC_INST_LSWI 0x7c0004aa @@ -393,6 +394,7 @@ (0x7c000264 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r)) #define PPC_RAW_TLBIEL(rb, rs, ric, prs, r) \ (0x7c000224 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r)) +#define PPC_RAW_TLBIEL_v205(rb, l) (0x7c000224 | ___PPC_RB(rb) | (l << 21)) #define PPC_RAW_TLBSRX_DOT(a, b) (0x7c0006a5 | __PPC_RA0(a) | __PPC_RB(b)) #define PPC_RAW_TLBIVAX(a, b) (0x7c000624 | __PPC_RA0(a) | __PPC_RB(b)) #define PPC_RAW_ERATWE(s, a, w) (0x7c0001a6 | __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w)) @@ -566,6 +568,8 @@ #define PPC_RAW_MTSPR(spr, d) (0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr)) #define PPC_RAW_EIEIO() (0x7c0006ac) +#define PPC_RAW_BRANCH(addr) (PPC_INST_BRANCH | ((addr) & 0x03fffffc)) + /* Deal with instructions that older assemblers aren't aware of */ #define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) #define PPC_CP_ABORT stringify_in_c(.long PPC_RAW_CP_ABORT) @@ -575,6 +579,7 @@ #define PPC_DCBZL(a, b) stringify_in_c(.long PPC_RAW_DCBZL(a, b)) #define PPC_DIVDE(t, a, b) stringify_in_c(.long PPC_RAW_DIVDE(t, a, b)) #define PPC_DIVDEU(t, a, b) stringify_in_c(.long PPC_RAW_DIVDEU(t, a, b)) +#define PPC_DSSALL stringify_in_c(.long PPC_INST_DSSALL) #define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LQARX(t, a, b, eh)) #define PPC_STQCX(t, a, b) stringify_in_c(.long PPC_RAW_STQCX(t, a, b)) #define PPC_MADDHD(t, a, b, c) stringify_in_c(.long PPC_RAW_MADDHD(t, a, b, c)) @@ -602,6 +607,7 @@ stringify_in_c(.long PPC_RAW_TLBIE_5(rb, rs, ric, prs, r)) #define PPC_TLBIEL(rb,rs,ric,prs,r) \ stringify_in_c(.long PPC_RAW_TLBIEL(rb, rs, ric, prs, r)) +#define PPC_TLBIEL_v205(rb, l) stringify_in_c(.long PPC_RAW_TLBIEL_v205(rb, l)) #define PPC_TLBSRX_DOT(a, b) stringify_in_c(.long PPC_RAW_TLBSRX_DOT(a, b)) #define PPC_TLBIVAX(a, b) stringify_in_c(.long PPC_RAW_TLBIVAX(a, b)) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 7be24048b8d1..f21e6bde17a1 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -17,29 +17,40 @@ #define SZL (BITS_PER_LONG/8) /* + * This expands to a sequence of operations with reg incrementing from + * start to end inclusive, of this form: + * + * op reg, (offset + (width * reg))(base) + * + * Note that offset is not the offset of the first operation unless start + * is zero (or width is zero). + */ +.macro OP_REGS op, width, start, end, base, offset + .Lreg=\start + .rept (\end - \start + 1) + \op .Lreg, \offset + \width * .Lreg(\base) + .Lreg=.Lreg+1 + .endr +.endm + +/* * Macros for storing registers into and loading registers from * exception frames. */ #ifdef __powerpc64__ -#define SAVE_GPR(n, base) std n,GPR0+8*(n)(base) -#define REST_GPR(n, base) ld n,GPR0+8*(n)(base) -#define SAVE_NVGPRS(base) SAVE_8GPRS(14, base); SAVE_10GPRS(22, base) -#define REST_NVGPRS(base) REST_8GPRS(14, base); REST_10GPRS(22, base) +#define SAVE_GPRS(start, end, base) OP_REGS std, 8, start, end, base, GPR0 +#define REST_GPRS(start, end, base) OP_REGS ld, 8, start, end, base, GPR0 +#define SAVE_NVGPRS(base) SAVE_GPRS(14, 31, base) +#define REST_NVGPRS(base) REST_GPRS(14, 31, base) #else -#define SAVE_GPR(n, base) stw n,GPR0+4*(n)(base) -#define REST_GPR(n, base) lwz n,GPR0+4*(n)(base) -#define SAVE_NVGPRS(base) SAVE_GPR(13, base); SAVE_8GPRS(14, base); SAVE_10GPRS(22, base) -#define REST_NVGPRS(base) REST_GPR(13, base); REST_8GPRS(14, base); REST_10GPRS(22, base) +#define SAVE_GPRS(start, end, base) OP_REGS stw, 4, start, end, base, GPR0 +#define REST_GPRS(start, end, base) OP_REGS lwz, 4, start, end, base, GPR0 +#define SAVE_NVGPRS(base) SAVE_GPRS(13, 31, base) +#define REST_NVGPRS(base) REST_GPRS(13, 31, base) #endif -#define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base) -#define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base) -#define SAVE_8GPRS(n, base) SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base) -#define SAVE_10GPRS(n, base) SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base) -#define REST_2GPRS(n, base) REST_GPR(n, base); REST_GPR(n+1, base) -#define REST_4GPRS(n, base) REST_2GPRS(n, base); REST_2GPRS(n+2, base) -#define REST_8GPRS(n, base) REST_4GPRS(n, base); REST_4GPRS(n+4, base) -#define REST_10GPRS(n, base) REST_8GPRS(n, base); REST_2GPRS(n+8, base) +#define SAVE_GPR(n, base) SAVE_GPRS(n, n, base) +#define REST_GPR(n, base) REST_GPRS(n, n, base) #define SAVE_FPR(n, base) stfd n,8*TS_FPRWIDTH*(n)(base) #define SAVE_2FPRS(n, base) SAVE_FPR(n, base); SAVE_FPR(n+1, base) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index e39bd0ff69f3..2c8686d9e964 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -157,8 +157,12 @@ struct thread_struct { #ifdef CONFIG_PPC_BOOK3S_32 unsigned long r0, r3, r4, r5, r6, r8, r9, r11; unsigned long lr, ctr; + unsigned long sr0; #endif #endif /* CONFIG_PPC32 */ +#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP) + unsigned long pid; /* value written in PID reg. at interrupt exit */ +#endif /* Debug Registers */ struct debug_reg debug; #ifdef CONFIG_PPC_FPU_REGS @@ -191,8 +195,10 @@ struct thread_struct { int used_vsr; /* set if process has used VSX */ #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE - unsigned long evr[32]; /* upper 32-bits of SPE regs */ - u64 acc; /* Accumulator */ + struct_group(spe, + unsigned long evr[32]; /* upper 32-bits of SPE regs */ + u64 acc; /* Accumulator */ + ); unsigned long spefscr; /* SPE & eFP status */ unsigned long spefscr_last; /* SPEFSCR value on last prctl call or trap return */ @@ -276,6 +282,12 @@ struct thread_struct { #define SPEFSCR_INIT #endif +#ifdef CONFIG_PPC_BOOK3S_32 +#define SR0_INIT .sr0 = IS_ENABLED(CONFIG_PPC_KUEP) ? SR_NX : 0, +#else +#define SR0_INIT +#endif + #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) #define INIT_THREAD { \ .ksp = INIT_SP, \ @@ -283,6 +295,7 @@ struct thread_struct { .kuap = ~0UL, /* KUAP_NONE */ \ .fpexc_mode = MSR_FE0 | MSR_FE1, \ SPEFSCR_INIT \ + SR0_INIT \ } #elif defined(CONFIG_PPC32) #define INIT_THREAD { \ @@ -290,6 +303,7 @@ struct thread_struct { .pgdir = swapper_pg_dir, \ .fpexc_mode = MSR_FE0 | MSR_FE1, \ SPEFSCR_INIT \ + SR0_INIT \ } #else #define INIT_THREAD { \ diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 6e560f035614..42f89e2d8f04 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -291,7 +291,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) static inline bool cpu_has_msr_ri(void) { - return !IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x); + return !IS_ENABLED(CONFIG_BOOKE_OR_40x); } static inline bool regs_is_unrecoverable(struct pt_regs *regs) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index e9d27265253b..2835f6363228 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -18,9 +18,9 @@ #include <asm/feature-fixups.h> /* Pickup Book E specific registers. */ -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x #include <asm/reg_booke.h> -#endif /* CONFIG_BOOKE || CONFIG_40x */ +#endif #ifdef CONFIG_FSL_EMB_PERFMON #include <asm/reg_fsl_emb.h> @@ -1366,6 +1366,18 @@ /* Macros for setting and retrieving special purpose registers */ #ifndef __ASSEMBLY__ + +#if defined(CONFIG_PPC64) || defined(__CHECKER__) +typedef struct { + u32 val; +#ifdef CONFIG_PPC64 + u32 suffix; +#endif +} __packed ppc_inst_t; +#else +typedef u32 ppc_inst_t; +#endif + #define mfmsr() ({unsigned long rval; \ asm volatile("mfmsr %0" : "=r" (rval) : \ : "memory"); rval;}) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 9dc97d2f9d27..82e5b055fa2a 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -264,7 +264,7 @@ extern void rtas_get_rtc_time(struct rtc_time *rtc_time); extern int rtas_set_rtc_time(struct rtc_time *rtc_time); extern unsigned int rtas_busy_delay_time(int status); -extern unsigned int rtas_busy_delay(int status); +bool rtas_busy_delay(int status); extern int early_init_dt_scan_rtas(unsigned long node, const char *uname, int depth, void *data); diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 79cb7a25a5fb..38f79e42bf3c 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -25,16 +25,16 @@ extern char start_virt_trampolines[]; extern char end_virt_trampolines[]; #endif +/* + * This assumes the kernel is never compiled -mcmodel=small or + * the total .toc is always less than 64k. + */ static inline unsigned long kernel_toc_addr(void) { - /* Defined by the linker, see vmlinux.lds.S */ - extern unsigned long __toc_start; - - /* - * The TOC register (r2) points 32kB into the TOC, so that 64kB of - * the TOC can be addressed using a single machine instruction. - */ - return (unsigned long)(&__toc_start) + 0x8000UL; + unsigned long toc_ptr; + + asm volatile("mr %0, 2" : "=r" (toc_ptr)); + return toc_ptr; } static inline int overlaps_interrupt_vector_text(unsigned long start, diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 6c1a7d217d1a..d0d3dd531c7f 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -9,7 +9,6 @@ extern void ppc_printk_progress(char *s, unsigned short hex); extern unsigned int rtas_data; extern unsigned long long memory_limit; -extern bool init_mem_is_free; extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); struct device_node; @@ -32,7 +31,7 @@ void setup_panic(void); extern bool pseries_enable_reloc_on_exc(void); extern void pseries_disable_reloc_on_exc(void); extern void pseries_big_endian_exceptions(void); -extern void pseries_little_endian_exceptions(void); +void __init pseries_little_endian_exceptions(void); #else static inline bool pseries_enable_reloc_on_exc(void) { return false; } static inline void pseries_disable_reloc_on_exc(void) {} @@ -55,7 +54,7 @@ void setup_entry_flush(bool enable); void setup_uaccess_flush(bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); #ifdef CONFIG_PPC_BARRIER_NOSPEC -void setup_barrier_nospec(void); +void __init setup_barrier_nospec(void); #else static inline void setup_barrier_nospec(void) { } #endif @@ -71,11 +70,11 @@ static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void #endif #ifdef CONFIG_PPC_FSL_BOOK3E -void setup_spectre_v2(void); +void __init setup_spectre_v2(void); #else static inline void setup_spectre_v2(void) {} #endif -void do_btb_flush_fixups(void); +void __init do_btb_flush_fixups(void); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h index 4b30a0205c93..2ac6ab903023 100644 --- a/arch/powerpc/include/asm/smu.h +++ b/arch/powerpc/include/asm/smu.h @@ -456,7 +456,7 @@ extern void smu_poll(void); /* * Init routine, presence check.... */ -extern int smu_init(void); +int __init smu_init(void); extern int smu_present(void); struct platform_device; extern struct platform_device *smu_get_ofdev(void); diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 1df867c2e054..50950deedb87 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -145,7 +145,7 @@ union vsx_reg { * otherwise. */ extern int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, - struct ppc_inst instr); + ppc_inst_t instr); /* * Emulate an instruction that can be executed just by updating @@ -162,7 +162,7 @@ void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op); * 0 if it could not be emulated, or -1 for an instruction that * should not be emulated (rfid, mtmsrd clearing MSR_RI, etc.). */ -extern int emulate_step(struct pt_regs *regs, struct ppc_inst instr); +int emulate_step(struct pt_regs *regs, ppc_inst_t instr); /* * Emulate a load or store instruction by reading/writing the diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 9d1fbd8be1c7..1f43ef696033 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -112,6 +112,9 @@ static inline void clear_task_ebb(struct task_struct *t) #endif } +void kvmppc_save_user_regs(void); +void kvmppc_save_current_sprs(void); + extern int set_thread_tidr(struct task_struct *t); #endif /* _ASM_POWERPC_SWITCH_TO_H */ diff --git a/arch/powerpc/include/asm/task_size_64.h b/arch/powerpc/include/asm/task_size_64.h index c993482237ed..38fdf8041d12 100644 --- a/arch/powerpc/include/asm/task_size_64.h +++ b/arch/powerpc/include/asm/task_size_64.h @@ -44,11 +44,7 @@ */ #define TASK_SIZE_USER32 (0x0000000100000000UL - (1 * PAGE_SIZE)) -#define TASK_SIZE_OF(tsk) \ - (test_tsk_thread_flag(tsk, TIF_32BIT) ? TASK_SIZE_USER32 : \ - TASK_SIZE_USER64) - -#define TASK_SIZE TASK_SIZE_OF(current) +#define TASK_SIZE (is_32bit_task() ? TASK_SIZE_USER32 : TASK_SIZE_USER64) #define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4)) #define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4)) diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 8c2c3dd4ddba..924b2157882f 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -18,6 +18,8 @@ #include <asm/vdso/timebase.h> /* time.c */ +extern u64 decrementer_max; + extern unsigned long tb_ticks_per_jiffy; extern unsigned long tb_ticks_per_usec; extern unsigned long tb_ticks_per_sec; @@ -97,19 +99,16 @@ extern void div128_by_32(u64 dividend_high, u64 dividend_low, extern void secondary_cpu_time_init(void); extern void __init time_init(void); -#ifdef CONFIG_PPC64 -static inline unsigned long test_irq_work_pending(void) -{ - unsigned long x; +DECLARE_PER_CPU(u64, decrementers_next_tb); - asm volatile("lbz %0,%1(13)" - : "=r" (x) - : "i" (offsetof(struct paca_struct, irq_work_pending))); - return x; +static inline u64 timer_get_next_tb(void) +{ + return __this_cpu_read(decrementers_next_tb); } -#endif -DECLARE_PER_CPU(u64, decrementers_next_tb); +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +void timer_rearm_host_dec(u64 now); +#endif /* Convert timebase ticks to nanoseconds */ unsigned long long tb_to_ns(unsigned long long tb_ticks); diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h index 0ea9e70ed78b..b4aa0d88ce2c 100644 --- a/arch/powerpc/include/asm/udbg.h +++ b/arch/powerpc/include/asm/udbg.h @@ -23,14 +23,14 @@ extern void udbg_printf(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); extern void udbg_progress(char *s, unsigned short hex); -extern void udbg_uart_init_mmio(void __iomem *addr, unsigned int stride); -extern void udbg_uart_init_pio(unsigned long port, unsigned int stride); +void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride); +void __init udbg_uart_init_pio(unsigned long port, unsigned int stride); -extern void udbg_uart_setup(unsigned int speed, unsigned int clock); -extern unsigned int udbg_probe_uart_speed(unsigned int clock); +void __init udbg_uart_setup(unsigned int speed, unsigned int clock); +unsigned int __init udbg_probe_uart_speed(unsigned int clock); struct device_node; -extern void udbg_scc_init(int force_scc); +void __init udbg_scc_init(int force_scc); extern int udbg_adb_init(int force_btext); extern void udbg_adb_init_early(void); diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index fe683371336f..a7ae1860115a 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -11,7 +11,6 @@ #include <linux/notifier.h> #include <asm/probes.h> -#include <asm/inst.h> typedef ppc_opcode_t uprobe_opcode_t; diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 0ac9bfddf704..e2e704eca5f6 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -38,13 +38,13 @@ static inline int icp_native_init(void) { return -ENODEV; } /* PAPR ICP */ #ifdef CONFIG_PPC_ICP_HV -extern int icp_hv_init(void); +int __init icp_hv_init(void); #else static inline int icp_hv_init(void) { return -ENODEV; } #endif #ifdef CONFIG_PPC_POWERNV -extern int icp_opal_init(void); +int __init icp_opal_init(void); extern void icp_opal_flush_interrupt(void); #else static inline int icp_opal_init(void) { return -ENODEV; } diff --git a/arch/powerpc/include/asm/xmon.h b/arch/powerpc/include/asm/xmon.h index 68bfb2361f03..f2d44b44f46c 100644 --- a/arch/powerpc/include/asm/xmon.h +++ b/arch/powerpc/include/asm/xmon.h @@ -12,7 +12,7 @@ #ifdef CONFIG_XMON extern void xmon_setup(void); -extern void xmon_register_spus(struct list_head *list); +void __init xmon_register_spus(struct list_head *list); struct pt_regs; extern int xmon(struct pt_regs *excp); extern irqreturn_t xmon_irq(int, void *); diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index b039877c743d..4d7829399570 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -11,6 +11,7 @@ CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC endif +CFLAGS_early_32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index bf96b954a4eb..3e37ece06739 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -105,7 +105,7 @@ static struct aligninfo spe_aligninfo[32] = { * so we don't need the address swizzling. */ static int emulate_spe(struct pt_regs *regs, unsigned int reg, - struct ppc_inst ppc_instr) + ppc_inst_t ppc_instr) { union { u64 ll; @@ -300,7 +300,7 @@ Efault_write: int fix_alignment(struct pt_regs *regs) { - struct ppc_inst instr; + ppc_inst_t instr; struct instruction_op op; int r, type; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index cc05522f50bf..7582f3e3a330 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -54,7 +54,7 @@ #endif #ifdef CONFIG_PPC32 -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x #include "head_booke.h" #endif #endif @@ -139,6 +139,7 @@ int main(void) OFFSET(THR11, thread_struct, r11); OFFSET(THLR, thread_struct, lr); OFFSET(THCTR, thread_struct, ctr); + OFFSET(THSR0, thread_struct, sr0); #endif #ifdef CONFIG_SPE OFFSET(THREAD_EVR0, thread_struct, evr[0]); @@ -218,10 +219,12 @@ int main(void) OFFSET(PACA_EXGEN, paca_struct, exgen); OFFSET(PACA_EXMC, paca_struct, exmc); OFFSET(PACA_EXNMI, paca_struct, exnmi); +#ifdef CONFIG_PPC_64S_HASH_MMU OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr); OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid); OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid); OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area); +#endif OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use); diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 803c2a45b22a..9d9d56b574cc 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -161,7 +161,7 @@ void btext_map(void) boot_text_mapped = 1; } -static int btext_initialize(struct device_node *np) +static int __init btext_initialize(struct device_node *np) { unsigned int width, height, depth, pitch; unsigned long address = 0; @@ -241,8 +241,10 @@ int __init btext_find_display(int allow_nonstdout) rc = btext_initialize(np); printk("result: %d\n", rc); } - if (rc == 0) + if (rc == 0) { + of_node_put(np); break; + } } return rc; } @@ -290,7 +292,7 @@ void btext_update_display(unsigned long phys, int width, int height, } EXPORT_SYMBOL(btext_update_display); -void btext_clearscreen(void) +void __init btext_clearscreen(void) { unsigned int *base = (unsigned int *)calc_base(0, 0); unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * @@ -308,7 +310,7 @@ void btext_clearscreen(void) rmci_maybe_off(); } -void btext_flushscreen(void) +void __init btext_flushscreen(void) { unsigned int *base = (unsigned int *)calc_base(0, 0); unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * @@ -327,7 +329,7 @@ void btext_flushscreen(void) __asm__ __volatile__ ("sync" ::: "memory"); } -void btext_flushline(void) +void __init btext_flushline(void) { unsigned int *base = (unsigned int *)calc_base(0, g_loc_Y << 4); unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * @@ -542,7 +544,7 @@ void btext_drawstring(const char *c) btext_drawchar(*c++); } -void btext_drawtext(const char *c, unsigned int len) +void __init btext_drawtext(const char *c, unsigned int len) { if (!boot_text_mapped) return; @@ -550,7 +552,7 @@ void btext_drawtext(const char *c, unsigned int len) btext_drawchar(*c++); } -void btext_drawhex(unsigned long v) +void __init btext_drawhex(unsigned long v) { if (!boot_text_mapped) return; diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index cf1be75b7833..00b0992be3e7 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -710,7 +710,7 @@ static struct kobj_attribute cache_shared_cpu_list_attr = __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL); /* Attributes which should always be created -- the kobject/sysfs core - * does this automatically via kobj_type->default_attrs. This is the + * does this automatically via kobj_type->default_groups. This is the * minimum data required to uniquely identify a cache. */ static struct attribute *cache_index_default_attrs[] = { @@ -720,6 +720,7 @@ static struct attribute *cache_index_default_attrs[] = { &cache_shared_cpu_list_attr.attr, NULL, }; +ATTRIBUTE_GROUPS(cache_index_default); /* Attributes which should be created if the cache device node has the * right properties -- see cacheinfo_create_index_opt_attrs @@ -738,7 +739,7 @@ static const struct sysfs_ops cache_index_ops = { static struct kobj_type cache_index_type = { .release = cache_index_release, .sysfs_ops = &cache_index_ops, - .default_attrs = cache_index_default_attrs, + .default_groups = cache_index_default_groups, }; static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c index 3cca88ee96d7..3dc61e203f37 100644 --- a/arch/powerpc/kernel/cpu_setup_power.c +++ b/arch/powerpc/kernel/cpu_setup_power.c @@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void) static void init_PMU(void) { mtspr(SPRN_MMCRA, 0); - mtspr(SPRN_MMCR0, 0); + mtspr(SPRN_MMCR0, MMCR0_FC); mtspr(SPRN_MMCR1, 0); mtspr(SPRN_MMCR2, 0); } @@ -123,7 +123,7 @@ static void init_PMU_ISA31(void) { mtspr(SPRN_MMCR3, 0); mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); - mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); + mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT); } /* @@ -137,6 +137,7 @@ void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t) return; mtspr(SPRN_LPID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH); } @@ -150,6 +151,7 @@ void __restore_cpu_power7(void) return; mtspr(SPRN_LPID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH); } @@ -164,6 +166,7 @@ void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t) return; mtspr(SPRN_LPID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */ init_HFSCR(); @@ -184,6 +187,7 @@ void __restore_cpu_power8(void) return; mtspr(SPRN_LPID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */ init_HFSCR(); @@ -202,6 +206,7 @@ void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t) mtspr(SPRN_PSSCR, 0); mtspr(SPRN_LPID, 0); mtspr(SPRN_PID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); @@ -223,6 +228,7 @@ void __restore_cpu_power9(void) mtspr(SPRN_PSSCR, 0); mtspr(SPRN_LPID, 0); mtspr(SPRN_PID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); @@ -242,6 +248,7 @@ void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t) mtspr(SPRN_PSSCR, 0); mtspr(SPRN_LPID, 0); mtspr(SPRN_PID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); @@ -264,6 +271,7 @@ void __restore_cpu_power10(void) mtspr(SPRN_PSSCR, 0); mtspr(SPRN_LPID, 0); mtspr(SPRN_PID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 5545c9cd17c1..f55c6fb34a3a 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -27,7 +27,8 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception) ppc_msgsync(); - may_hard_irq_enable(); + if (should_hard_irq_enable()) + do_hard_irq_enable(); kvmppc_clear_host_ipi(smp_processor_id()); __this_cpu_inc(irq_stat.doorbell_irqs); diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index ba527fb52993..7d1b2c4a4891 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -80,6 +80,7 @@ static void __restore_cpu_cpufeatures(void) mtspr(SPRN_LPCR, system_registers.lpcr); if (hv_mode) { mtspr(SPRN_LPID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_HFSCR, system_registers.hfscr); mtspr(SPRN_PCR, system_registers.pcr); } @@ -216,6 +217,7 @@ static int __init feat_enable_hv(struct dt_cpu_feature *f) } mtspr(SPRN_LPID, 0); + mtspr(SPRN_AMOR, ~0); lpcr = mfspr(SPRN_LPCR); lpcr &= ~LPCR_LPES0; /* HV external interrupts */ @@ -271,6 +273,9 @@ static int __init feat_enable_mmu_hash(struct dt_cpu_feature *f) { u64 lpcr; + if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) + return 0; + lpcr = mfspr(SPRN_LPCR); lpcr &= ~LPCR_ISL; @@ -290,6 +295,9 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f) { u64 lpcr; + if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) + return 0; + lpcr = mfspr(SPRN_LPCR); lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR); mtspr(SPRN_LPCR, lpcr); @@ -303,15 +311,15 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f) static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f) { -#ifdef CONFIG_PPC_RADIX_MMU + if (!IS_ENABLED(CONFIG_PPC_RADIX_MMU)) + return 0; + + cur_cpu_spec->mmu_features |= MMU_FTR_KERNEL_RO; cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; - cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE; cur_cpu_spec->mmu_features |= MMU_FTR_GTSE; cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU; return 1; -#endif - return 0; } static int __init feat_enable_dscr(struct dt_cpu_feature *f) @@ -336,7 +344,7 @@ static int __init feat_enable_dscr(struct dt_cpu_feature *f) return 1; } -static void hfscr_pmu_enable(void) +static void __init hfscr_pmu_enable(void) { u64 hfscr = mfspr(SPRN_HFSCR); hfscr |= PPC_BIT(60); @@ -351,7 +359,7 @@ static void init_pmu_power8(void) } mtspr(SPRN_MMCRA, 0); - mtspr(SPRN_MMCR0, 0); + mtspr(SPRN_MMCR0, MMCR0_FC); mtspr(SPRN_MMCR1, 0); mtspr(SPRN_MMCR2, 0); mtspr(SPRN_MMCRS, 0); @@ -390,7 +398,7 @@ static void init_pmu_power9(void) mtspr(SPRN_MMCRC, 0); mtspr(SPRN_MMCRA, 0); - mtspr(SPRN_MMCR0, 0); + mtspr(SPRN_MMCR0, MMCR0_FC); mtspr(SPRN_MMCR1, 0); mtspr(SPRN_MMCR2, 0); } @@ -426,7 +434,7 @@ static void init_pmu_power10(void) mtspr(SPRN_MMCR3, 0); mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); - mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); + mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT); } static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f) diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index 9bdaaf7fddc9..2f9dbf8ad2ee 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -280,7 +280,7 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v) } DEFINE_SHOW_ATTRIBUTE(eeh_addr_cache); -void eeh_cache_debugfs_init(void) +void __init eeh_cache_debugfs_init(void) { debugfs_create_file_unsafe("eeh_address_cache", 0400, arch_debugfs_dir, NULL, diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 350dab18e137..422f80b5b27b 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -905,18 +905,19 @@ void eeh_handle_normal_event(struct eeh_pe *pe) } #endif /* CONFIG_STACKTRACE */ + eeh_for_each_pe(pe, tmp_pe) + eeh_pe_for_each_dev(tmp_pe, edev, tmp) + edev->mode &= ~EEH_DEV_NO_HANDLER; + eeh_pe_update_time_stamp(pe); pe->freeze_count++; if (pe->freeze_count > eeh_max_freezes) { pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n", pe->phb->global_number, pe->addr, pe->freeze_count); - result = PCI_ERS_RESULT_DISCONNECT; - } - eeh_for_each_pe(pe, tmp_pe) - eeh_pe_for_each_dev(tmp_pe, edev, tmp) - edev->mode &= ~EEH_DEV_NO_HANDLER; + goto recover_failed; + } /* Walk the various device drivers attached to this slot through * a reset sequence, giving each an opportunity to do what it needs @@ -928,39 +929,38 @@ void eeh_handle_normal_event(struct eeh_pe *pe) * the error. Override the result if necessary to have partially * hotplug for this case. */ - if (result != PCI_ERS_RESULT_DISCONNECT) { - pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n", - pe->freeze_count, eeh_max_freezes); - pr_info("EEH: Notify device drivers to shutdown\n"); - eeh_set_channel_state(pe, pci_channel_io_frozen); - eeh_set_irq_state(pe, false); - eeh_pe_report("error_detected(IO frozen)", pe, - eeh_report_error, &result); - if ((pe->type & EEH_PE_PHB) && - result != PCI_ERS_RESULT_NONE && - result != PCI_ERS_RESULT_NEED_RESET) - result = PCI_ERS_RESULT_NEED_RESET; - } + pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n", + pe->freeze_count, eeh_max_freezes); + pr_info("EEH: Notify device drivers to shutdown\n"); + eeh_set_channel_state(pe, pci_channel_io_frozen); + eeh_set_irq_state(pe, false); + eeh_pe_report("error_detected(IO frozen)", pe, + eeh_report_error, &result); + if (result == PCI_ERS_RESULT_DISCONNECT) + goto recover_failed; + + /* + * Error logged on a PHB are always fences which need a full + * PHB reset to clear so force that to happen. + */ + if ((pe->type & EEH_PE_PHB) && result != PCI_ERS_RESULT_NONE) + result = PCI_ERS_RESULT_NEED_RESET; /* Get the current PCI slot state. This can take a long time, * sometimes over 300 seconds for certain systems. */ - if (result != PCI_ERS_RESULT_DISCONNECT) { - rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); - if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { - pr_warn("EEH: Permanent failure\n"); - result = PCI_ERS_RESULT_DISCONNECT; - } + rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY * 1000); + if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { + pr_warn("EEH: Permanent failure\n"); + goto recover_failed; } /* Since rtas may enable MMIO when posting the error log, * don't post the error log until after all dev drivers * have been informed. */ - if (result != PCI_ERS_RESULT_DISCONNECT) { - pr_info("EEH: Collect temporary log\n"); - eeh_slot_error_detail(pe, EEH_LOG_TEMP); - } + pr_info("EEH: Collect temporary log\n"); + eeh_slot_error_detail(pe, EEH_LOG_TEMP); /* If all device drivers were EEH-unaware, then shut * down all of the device drivers, and hope they @@ -970,9 +970,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Reset with hotplug activity\n"); rc = eeh_reset_device(pe, bus, NULL, false); if (rc) { - pr_warn("%s: Unable to reset, err=%d\n", - __func__, rc); - result = PCI_ERS_RESULT_DISCONNECT; + pr_warn("%s: Unable to reset, err=%d\n", __func__, rc); + goto recover_failed; } } @@ -980,10 +979,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe) if (result == PCI_ERS_RESULT_CAN_RECOVER) { pr_info("EEH: Enable I/O for affected devices\n"); rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + if (rc < 0) + goto recover_failed; - if (rc < 0) { - result = PCI_ERS_RESULT_DISCONNECT; - } else if (rc) { + if (rc) { result = PCI_ERS_RESULT_NEED_RESET; } else { pr_info("EEH: Notify device drivers to resume I/O\n"); @@ -991,15 +990,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe) eeh_report_mmio_enabled, &result); } } - - /* If all devices reported they can proceed, then re-enable DMA */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { pr_info("EEH: Enabled DMA for affected devices\n"); rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); + if (rc < 0) + goto recover_failed; - if (rc < 0) { - result = PCI_ERS_RESULT_DISCONNECT; - } else if (rc) { + if (rc) { result = PCI_ERS_RESULT_NEED_RESET; } else { /* @@ -1017,16 +1014,15 @@ void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Reset without hotplug activity\n"); rc = eeh_reset_device(pe, bus, &rmv_data, true); if (rc) { - pr_warn("%s: Cannot reset, err=%d\n", - __func__, rc); - result = PCI_ERS_RESULT_DISCONNECT; - } else { - result = PCI_ERS_RESULT_NONE; - eeh_set_channel_state(pe, pci_channel_io_normal); - eeh_set_irq_state(pe, true); - eeh_pe_report("slot_reset", pe, eeh_report_reset, - &result); + pr_warn("%s: Cannot reset, err=%d\n", __func__, rc); + goto recover_failed; } + + result = PCI_ERS_RESULT_NONE; + eeh_set_channel_state(pe, pci_channel_io_normal); + eeh_set_irq_state(pe, true); + eeh_pe_report("slot_reset", pe, eeh_report_reset, + &result); } if ((result == PCI_ERS_RESULT_RECOVERED) || @@ -1054,45 +1050,47 @@ void eeh_handle_normal_event(struct eeh_pe *pe) } pr_info("EEH: Recovery successful.\n"); - } else { - /* - * About 90% of all real-life EEH failures in the field - * are due to poorly seated PCI cards. Only 10% or so are - * due to actual, failed cards. - */ - pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" - "Please try reseating or replacing it\n", - pe->phb->global_number, pe->addr); + goto out; + } - eeh_slot_error_detail(pe, EEH_LOG_PERM); +recover_failed: + /* + * About 90% of all real-life EEH failures in the field + * are due to poorly seated PCI cards. Only 10% or so are + * due to actual, failed cards. + */ + pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" + "Please try reseating or replacing it\n", + pe->phb->global_number, pe->addr); - /* Notify all devices that they're about to go down. */ - eeh_set_channel_state(pe, pci_channel_io_perm_failure); - eeh_set_irq_state(pe, false); - eeh_pe_report("error_detected(permanent failure)", pe, - eeh_report_failure, NULL); + eeh_slot_error_detail(pe, EEH_LOG_PERM); - /* Mark the PE to be removed permanently */ - eeh_pe_state_mark(pe, EEH_PE_REMOVED); + /* Notify all devices that they're about to go down. */ + eeh_set_channel_state(pe, pci_channel_io_perm_failure); + eeh_set_irq_state(pe, false); + eeh_pe_report("error_detected(permanent failure)", pe, + eeh_report_failure, NULL); - /* - * Shut down the device drivers for good. We mark - * all removed devices correctly to avoid access - * the their PCI config any more. - */ - if (pe->type & EEH_PE_VF) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - } else { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + /* Mark the PE to be removed permanently */ + eeh_pe_state_mark(pe, EEH_PE_REMOVED); - pci_lock_rescan_remove(); - pci_hp_remove_devices(bus); - pci_unlock_rescan_remove(); - /* The passed PE should no longer be used */ - return; - } + /* + * Shut down the device drivers for good. We mark + * all removed devices correctly to avoid access + * the their PCI config any more. + */ + if (pe->type & EEH_PE_VF) { + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + + pci_lock_rescan_remove(); + pci_hp_remove_devices(bus); + pci_unlock_rescan_remove(); + /* The passed PE should no longer be used */ + return; } out: diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 61fdd53cdd9a..7748c278d13c 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -73,13 +73,39 @@ prepare_transfer_to_handler: _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) #endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */ +#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32) + .globl __kuep_lock +__kuep_lock: + lwz r9, THREAD+THSR0(r2) + update_user_segments_by_4 r9, r10, r11, r12 + blr + +__kuep_unlock: + lwz r9, THREAD+THSR0(r2) + rlwinm r9,r9,0,~SR_NX + update_user_segments_by_4 r9, r10, r11, r12 + blr + +.macro kuep_lock + bl __kuep_lock +.endm +.macro kuep_unlock + bl __kuep_unlock +.endm +#else +.macro kuep_lock +.endm +.macro kuep_unlock +.endm +#endif + .globl transfer_to_syscall transfer_to_syscall: stw r11, GPR1(r1) stw r11, 0(r1) mflr r12 stw r12, _LINK(r1) -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ #endif lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ @@ -90,10 +116,10 @@ transfer_to_syscall: stw r12,8(r1) stw r2,_TRAP(r1) SAVE_GPR(0, r1) - SAVE_4GPRS(3, r1) - SAVE_2GPRS(7, r1) + SAVE_GPRS(3, 8, r1) addi r2,r10,-THREAD SAVE_NVGPRS(r1) + kuep_lock /* Calling convention has r9 = orig r0, r10 = regs */ addi r10,r1,STACK_FRAME_OVERHEAD @@ -110,6 +136,7 @@ ret_from_syscall: cmplwi cr0,r5,0 bne- 2f #endif /* CONFIG_PPC_47x */ + kuep_unlock lwz r4,_LINK(r1) lwz r5,_CCR(r1) mtlr r4 @@ -139,7 +166,7 @@ syscall_exit_finish: mtxer r5 lwz r0,GPR0(r1) lwz r3,GPR3(r1) - REST_8GPRS(4,r1) + REST_GPRS(4, 11, r1) lwz r12,GPR12(r1) b 1b @@ -232,9 +259,9 @@ fast_exception_return: beq 3f /* if not, we've got problems */ #endif -2: REST_4GPRS(3, r11) +2: REST_GPRS(3, 6, r11) lwz r10,_CCR(r11) - REST_2GPRS(1, r11) + REST_GPRS(1, 2, r11) mtcr r10 lwz r10,_LINK(r11) mtlr r10 @@ -273,6 +300,7 @@ interrupt_return: beq .Lkernel_interrupt_return bl interrupt_exit_user_prepare cmpwi r3,0 + kuep_unlock bne- .Lrestore_nvgprs .Lfast_user_interrupt_return: @@ -298,16 +326,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) * the reliable stack unwinder later on. Clear it. */ stw r0,8(r1) - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) + REST_GPRS(7, 12, r1) mtcr r3 mtlr r4 mtctr r5 mtspr SPRN_XER,r6 - REST_4GPRS(2, r1) - REST_GPR(6, r1) + REST_GPRS(2, 6, r1) REST_GPR(0, r1) REST_GPR(1, r1) rfi @@ -341,8 +367,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) lwz r6,_CCR(r1) li r0,0 - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) + REST_GPRS(7, 12, r1) mtlr r3 mtctr r4 @@ -354,7 +379,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) */ stw r0,8(r1) - REST_4GPRS(2, r1) + REST_GPRS(2, 5, r1) bne- cr1,1f /* emulate stack store */ mtcr r6 @@ -430,8 +455,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) bne interrupt_return; \ lwz r0,GPR0(r1); \ lwz r2,GPR2(r1); \ - REST_4GPRS(3, r1); \ - REST_2GPRS(7, r1); \ + REST_GPRS(3, 8, r1); \ lwz r10,_XER(r1); \ lwz r11,_CTR(r1); \ mtspr SPRN_XER,r10; \ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 70cff7b49e17..9581906b5ee9 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -180,7 +180,7 @@ _GLOBAL(_switch) #endif ld r8,KSP(r4) /* new stack pointer */ -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION b 2f END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) @@ -232,7 +232,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) slbmte r7,r0 isync 2: -#endif /* CONFIG_PPC_BOOK3S_64 */ +#endif /* CONFIG_PPC_64S_HASH_MMU */ clrrdi r7, r8, THREAD_SHIFT /* base of new stack */ /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index 93b0f3ec8fb0..d4b8aff20815 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -37,7 +37,7 @@ static int __init early_init_dt_scan_epapr(unsigned long node, return -1; for (i = 0; i < (len / 4); i++) { - struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i])); + ppc_inst_t inst = ppc_inst(be32_to_cpu(insts[i])); patch_instruction(epapr_hypercall_start + i, inst); #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) patch_instruction(epapr_ev_idle_start + i, inst); diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 711c66b76df1..67dc4e3179a0 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -198,8 +198,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) stdcx. r0,0,r1 /* to clear the reservation */ - REST_4GPRS(2, r1) - REST_4GPRS(6, r1) + REST_GPRS(2, 9, r1) ld r10,_CTR(r1) ld r11,_XER(r1) @@ -375,9 +374,7 @@ ret_from_mc_except: exc_##n##_common: \ std r0,GPR0(r1); /* save r0 in stackframe */ \ std r2,GPR2(r1); /* save r2 in stackframe */ \ - SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ - SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ - std r9,GPR9(r1); /* save r9 in stackframe */ \ + SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \ std r10,_NIP(r1); /* save SRR0 to stackframe */ \ std r11,_MSR(r1); /* save SRR1 to stackframe */ \ beq 2f; /* if from kernel mode */ \ @@ -1061,9 +1058,7 @@ bad_stack_book3e: std r11,_ESR(r1) std r0,GPR0(r1); /* save r0 in stackframe */ \ std r2,GPR2(r1); /* save r2 in stackframe */ \ - SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ - SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ - std r9,GPR9(r1); /* save r9 in stackframe */ \ + SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \ ld r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */ \ ld r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */ \ mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \ @@ -1077,8 +1072,7 @@ bad_stack_book3e: std r10,_LINK(r1) std r11,_CTR(r1) std r12,_XER(r1) - SAVE_10GPRS(14,r1) - SAVE_8GPRS(24,r1) + SAVE_GPRS(14, 31, r1) lhz r12,PACA_TRAP_SAVE(r13) std r12,_TRAP(r1) addi r11,r1,INT_FRAME_SIZE diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index eaf1f72131a1..55caeee37c08 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -48,7 +48,7 @@ .balign IFETCH_ALIGN_BYTES; \ .global name; \ _ASM_NOKPROBE_SYMBOL(name); \ - DEFINE_FIXED_SYMBOL(name); \ + DEFINE_FIXED_SYMBOL(name, text); \ name: #define TRAMP_REAL_BEGIN(name) \ @@ -76,31 +76,18 @@ name: ld reg,PACAKBASE(r13); /* get high part of &label */ \ ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label) -#define __LOAD_HANDLER(reg, label) \ +#define __LOAD_HANDLER(reg, label, section) \ ld reg,PACAKBASE(r13); \ - ori reg,reg,(ABS_ADDR(label))@l + ori reg,reg,(ABS_ADDR(label, section))@l /* * Branches from unrelocated code (e.g., interrupts) to labels outside * head-y require >64K offsets. */ -#define __LOAD_FAR_HANDLER(reg, label) \ +#define __LOAD_FAR_HANDLER(reg, label, section) \ ld reg,PACAKBASE(r13); \ - ori reg,reg,(ABS_ADDR(label))@l; \ - addis reg,reg,(ABS_ADDR(label))@h - -/* - * Branch to label using its 0xC000 address. This results in instruction - * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned - * on using mtmsr rather than rfid. - * - * This could set the 0xc bits for !RELOCATABLE as an immediate, rather than - * load KBASE for a slight optimisation. - */ -#define BRANCH_TO_C000(reg, label) \ - __LOAD_FAR_HANDLER(reg, label); \ - mtctr reg; \ - bctr + ori reg,reg,(ABS_ADDR(label, section))@l; \ + addis reg,reg,(ABS_ADDR(label, section))@h /* * Interrupt code generation macros @@ -111,9 +98,10 @@ name: #define IAREA .L_IAREA_\name\() /* PACA save area */ #define IVIRT .L_IVIRT_\name\() /* Has virt mode entry point */ #define IISIDE .L_IISIDE_\name\() /* Uses SRR0/1 not DAR/DSISR */ +#define ICFAR .L_ICFAR_\name\() /* Uses CFAR */ +#define ICFAR_IF_HVMODE .L_ICFAR_IF_HVMODE_\name\() /* Uses CFAR if HV */ #define IDAR .L_IDAR_\name\() /* Uses DAR (or SRR0) */ #define IDSISR .L_IDSISR_\name\() /* Uses DSISR (or SRR1) */ -#define ISET_RI .L_ISET_RI_\name\() /* Run common code w/ MSR[RI]=1 */ #define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */ #define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */ #define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */ @@ -151,15 +139,18 @@ do_define_int n .ifndef IISIDE IISIDE=0 .endif + .ifndef ICFAR + ICFAR=1 + .endif + .ifndef ICFAR_IF_HVMODE + ICFAR_IF_HVMODE=0 + .endif .ifndef IDAR IDAR=0 .endif .ifndef IDSISR IDSISR=0 .endif - .ifndef ISET_RI - ISET_RI=1 - .endif .ifndef IBRANCH_TO_COMMON IBRANCH_TO_COMMON=1 .endif @@ -291,9 +282,21 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) HMT_MEDIUM std r10,IAREA+EX_R10(r13) /* save r10 - r12 */ + .if ICFAR BEGIN_FTR_SECTION mfspr r10,SPRN_CFAR END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + .elseif ICFAR_IF_HVMODE +BEGIN_FTR_SECTION + BEGIN_FTR_SECTION_NESTED(69) + mfspr r10,SPRN_CFAR + END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69) +FTR_SECTION_ELSE + BEGIN_FTR_SECTION_NESTED(69) + li r10,0 + END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69) +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) + .endif .if \ool .if !\virt b tramp_real_\name @@ -309,9 +312,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) BEGIN_FTR_SECTION std r9,IAREA+EX_PPR(r13) END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + .if ICFAR || ICFAR_IF_HVMODE BEGIN_FTR_SECTION std r10,IAREA+EX_CFAR(r13) END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + .endif INTERRUPT_TO_KERNEL mfctr r10 std r10,IAREA+EX_CTR(r13) @@ -376,7 +381,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) * This switches to virtual mode and sets MSR[RI]. */ .macro __GEN_COMMON_ENTRY name -DEFINE_FIXED_SYMBOL(\name\()_common_real) +DEFINE_FIXED_SYMBOL(\name\()_common_real, text) \name\()_common_real: .if IKVM_REAL KVMTEST \name kvm_interrupt @@ -399,7 +404,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) .endif .balign IFETCH_ALIGN_BYTES -DEFINE_FIXED_SYMBOL(\name\()_common_virt) +DEFINE_FIXED_SYMBOL(\name\()_common_virt, text) \name\()_common_virt: .if IKVM_VIRT KVMTEST \name kvm_interrupt @@ -413,7 +418,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt) * want to run in real mode. */ .macro __GEN_REALMODE_COMMON_ENTRY name -DEFINE_FIXED_SYMBOL(\name\()_common_real) +DEFINE_FIXED_SYMBOL(\name\()_common_real, text) \name\()_common_real: .if IKVM_REAL KVMTEST \name kvm_interrupt @@ -512,11 +517,6 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) stb r10,PACASRR_VALID(r13) .endif - .if ISET_RI - li r10,MSR_RI - mtmsrd r10,1 /* Set MSR_RI */ - .endif - .if ISTACK .if IKUAP kuap_save_amr_and_lock r9, r10, cr1, cr0 @@ -568,14 +568,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) .endif BEGIN_FTR_SECTION + .if ICFAR || ICFAR_IF_HVMODE ld r10,IAREA+EX_CFAR(r13) + .else + li r10,0 + .endif std r10,ORIG_GPR3(r1) END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r10,IAREA+EX_CTR(r13) std r10,_CTR(r1) std r2,GPR2(r1) /* save r2 in stackframe */ - SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */ - SAVE_2GPRS(7, r1) /* save r7, r8 in stackframe */ + SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe */ mflr r9 /* Get LR, later save to stack */ ld r2,PACATOC(r13) /* get kernel TOC into r2 */ std r9,_LINK(r1) @@ -693,8 +696,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) mtlr r9 ld r9,_CCR(r1) mtcr r9 - REST_8GPRS(2, r1) - REST_4GPRS(10, r1) + REST_GPRS(2, 13, r1) REST_GPR(0, r1) /* restore original r1. */ ld r1,GPR1(r1) @@ -850,12 +852,12 @@ SOFT_MASK_TABLE(0xc000000000003000, 0xc000000000004000) #ifdef CONFIG_RELOCATABLE TRAMP_VIRT_BEGIN(system_call_vectored_tramp) - __LOAD_HANDLER(r10, system_call_vectored_common) + __LOAD_HANDLER(r10, system_call_vectored_common, virt_trampolines) mtctr r10 bctr TRAMP_VIRT_BEGIN(system_call_vectored_sigill_tramp) - __LOAD_HANDLER(r10, system_call_vectored_sigill) + __LOAD_HANDLER(r10, system_call_vectored_sigill, virt_trampolines) mtctr r10 bctr #endif @@ -902,11 +904,6 @@ INT_DEFINE_BEGIN(system_reset) IVEC=0x100 IAREA=PACA_EXNMI IVIRT=0 /* no virt entry point */ - /* - * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is - * being used, so a nested NMI exception would corrupt it. - */ - ISET_RI=0 ISTACK=0 IKVM_REAL=1 INT_DEFINE_END(system_reset) @@ -964,7 +961,9 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake) /* We are waking up from idle, so may clobber any volatile register */ cmpwi cr1,r5,2 bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */ - BRANCH_TO_C000(r12, DOTSYM(idle_return_gpr_loss)) + __LOAD_FAR_HANDLER(r12, DOTSYM(idle_return_gpr_loss), real_trampolines) + mtctr r12 + bctr #endif #ifdef CONFIG_PPC_PSERIES @@ -979,16 +978,14 @@ TRAMP_REAL_BEGIN(system_reset_fwnmi) EXC_COMMON_BEGIN(system_reset_common) __GEN_COMMON_ENTRY system_reset /* - * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able - * to recover, but nested NMI will notice in_nmi and not recover - * because of the use of the NMI stack. in_nmi reentrancy is tested in - * system_reset_exception. + * Increment paca->in_nmi. When the interrupt entry wrapper later + * enable MSR_RI, then SLB or MCE will be able to recover, but a nested + * NMI will notice in_nmi and not recover because of the use of the NMI + * stack. in_nmi reentrancy is tested in system_reset_exception. */ lhz r10,PACA_IN_NMI(r13) addi r10,r10,1 sth r10,PACA_IN_NMI(r13) - li r10,MSR_RI - mtmsrd r10,1 mr r10,r1 ld r1,PACA_NMI_EMERG_SP(r13) @@ -1062,12 +1059,6 @@ INT_DEFINE_BEGIN(machine_check_early) IAREA=PACA_EXMC IVIRT=0 /* no virt entry point */ IREALMODE_COMMON=1 - /* - * MSR_RI is not enabled, because PACA_EXMC is being used, so a - * nested machine check corrupts it. machine_check_common enables - * MSR_RI. - */ - ISET_RI=0 ISTACK=0 IDAR=1 IDSISR=1 @@ -1078,7 +1069,6 @@ INT_DEFINE_BEGIN(machine_check) IVEC=0x200 IAREA=PACA_EXMC IVIRT=0 /* no virt entry point */ - ISET_RI=0 IDAR=1 IDSISR=1 IKVM_REAL=1 @@ -1148,9 +1138,6 @@ EXC_COMMON_BEGIN(machine_check_early_common) BEGIN_FTR_SECTION bl enable_machine_check END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - li r10,MSR_RI - mtmsrd r10,1 - addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_early std r3,RESULT(r1) /* Save result */ @@ -1238,10 +1225,6 @@ EXC_COMMON_BEGIN(machine_check_common) * save area: PACA_EXMC instead of PACA_EXGEN. */ GEN_COMMON machine_check - - /* Enable MSR_RI when finished with PACA_EXMC */ - li r10,MSR_RI - mtmsrd r10,1 addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_exception_async b interrupt_return_srr @@ -1369,11 +1352,15 @@ EXC_COMMON_BEGIN(data_access_common) addi r3,r1,STACK_FRAME_OVERHEAD andis. r0,r4,DSISR_DABRMATCH@h bne- 1f +#ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION bl do_hash_fault MMU_FTR_SECTION_ELSE bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +#else + bl do_page_fault +#endif b interrupt_return_srr 1: bl do_break @@ -1416,6 +1403,7 @@ EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) EXC_VIRT_END(data_access_slb, 0x4380, 0x80) EXC_COMMON_BEGIN(data_access_slb_common) GEN_COMMON data_access_slb +#ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ addi r3,r1,STACK_FRAME_OVERHEAD @@ -1428,9 +1416,12 @@ MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */ li r3,-EFAULT ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +#else + li r3,-EFAULT +#endif std r3,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD - bl do_bad_slb_fault + bl do_bad_segment_interrupt b interrupt_return_srr @@ -1462,11 +1453,15 @@ EXC_VIRT_END(instruction_access, 0x4400, 0x80) EXC_COMMON_BEGIN(instruction_access_common) GEN_COMMON instruction_access addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION bl do_hash_fault MMU_FTR_SECTION_ELSE bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +#else + bl do_page_fault +#endif b interrupt_return_srr @@ -1496,6 +1491,7 @@ EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) EXC_COMMON_BEGIN(instruction_access_slb_common) GEN_COMMON instruction_access_slb +#ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ addi r3,r1,STACK_FRAME_OVERHEAD @@ -1508,9 +1504,12 @@ MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */ li r3,-EFAULT ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +#else + li r3,-EFAULT +#endif std r3,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD - bl do_bad_slb_fault + bl do_bad_segment_interrupt b interrupt_return_srr @@ -1536,6 +1535,12 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) * * If soft masked, the masked handler will note the pending interrupt for * replay, and clear MSR[EE] in the interrupted context. + * + * CFAR is not required because this is an asynchronous interrupt that in + * general won't have much bearing on the state of the CPU, with the possible + * exception of crash/debug IPIs, but those are generally moving to use SRESET + * IPIs. Unless this is an HV interrupt and KVM HV is possible, in which case + * it may be exiting the guest and need CFAR to be saved. */ INT_DEFINE_BEGIN(hardware_interrupt) IVEC=0x500 @@ -1543,6 +1548,10 @@ INT_DEFINE_BEGIN(hardware_interrupt) IMASK=IRQS_DISABLED IKVM_REAL=1 IKVM_VIRT=1 + ICFAR=0 +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + ICFAR_IF_HVMODE=1 +#endif INT_DEFINE_END(hardware_interrupt) EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) @@ -1764,6 +1773,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) * If PPC_WATCHDOG is configured, the soft masked handler will actually set * things back up to run soft_nmi_interrupt as a regular interrupt handler * on the emergency stack. + * + * CFAR is not required because this is asynchronous (see hardware_interrupt). + * A watchdog interrupt may like to have CFAR, but usually the interesting + * branch is long gone by that point (e.g., infinite loop). */ INT_DEFINE_BEGIN(decrementer) IVEC=0x900 @@ -1771,6 +1784,7 @@ INT_DEFINE_BEGIN(decrementer) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + ICFAR=0 INT_DEFINE_END(decrementer) EXC_REAL_BEGIN(decrementer, 0x900, 0x80) @@ -1846,6 +1860,8 @@ EXC_COMMON_BEGIN(hdecrementer_common) * If soft masked, the masked handler will note the pending interrupt for * replay, leaving MSR[EE] enabled in the interrupted context because the * doorbells are edge triggered. + * + * CFAR is not required, similarly to hardware_interrupt. */ INT_DEFINE_BEGIN(doorbell_super) IVEC=0xa00 @@ -1853,6 +1869,7 @@ INT_DEFINE_BEGIN(doorbell_super) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + ICFAR=0 INT_DEFINE_END(doorbell_super) EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100) @@ -1904,6 +1921,7 @@ INT_DEFINE_BEGIN(system_call) IVEC=0xc00 IKVM_REAL=1 IKVM_VIRT=1 + ICFAR=0 INT_DEFINE_END(system_call) .macro SYSTEM_CALL virt @@ -1942,12 +1960,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) HMT_MEDIUM .if ! \virt - __LOAD_HANDLER(r10, system_call_common_real) + __LOAD_HANDLER(r10, system_call_common_real, real_vectors) mtctr r10 bctr .else #ifdef CONFIG_RELOCATABLE - __LOAD_HANDLER(r10, system_call_common) + __LOAD_HANDLER(r10, system_call_common, virt_vectors) mtctr r10 bctr #else @@ -2001,7 +2019,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives * outside the head section. */ - __LOAD_FAR_HANDLER(r10, kvmppc_hcall) + __LOAD_FAR_HANDLER(r10, kvmppc_hcall, real_trampolines) mtctr r10 bctr #else @@ -2202,6 +2220,11 @@ EXC_COMMON_BEGIN(hmi_exception_common) * Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt. * This is an asynchronous interrupt in response to a msgsnd doorbell. * Similar to the 0xa00 doorbell but for host rather than guest. + * + * CFAR is not required (similar to doorbell_interrupt), unless KVM HV + * is enabled, in which case it may be a guest exit. Most PowerNV kernels + * include KVM support so it would be nice if this could be dynamically + * patched out if KVM was not currently running any guests. */ INT_DEFINE_BEGIN(h_doorbell) IVEC=0xe80 @@ -2209,6 +2232,9 @@ INT_DEFINE_BEGIN(h_doorbell) IMASK=IRQS_DISABLED IKVM_REAL=1 IKVM_VIRT=1 +#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE + ICFAR=0 +#endif INT_DEFINE_END(h_doorbell) EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20) @@ -2232,6 +2258,9 @@ EXC_COMMON_BEGIN(h_doorbell_common) * Interrupt 0xea0 - Hypervisor Virtualization Interrupt. * This is an asynchronous interrupt in response to an "external exception". * Similar to 0x500 but for host only. + * + * Like h_doorbell, CFAR is only required for KVM HV because this can be + * a guest exit. */ INT_DEFINE_BEGIN(h_virt_irq) IVEC=0xea0 @@ -2239,6 +2268,9 @@ INT_DEFINE_BEGIN(h_virt_irq) IMASK=IRQS_DISABLED IKVM_REAL=1 IKVM_VIRT=1 +#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE + ICFAR=0 +#endif INT_DEFINE_END(h_virt_irq) EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20) @@ -2275,6 +2307,8 @@ EXC_VIRT_NONE(0x4ee0, 0x20) * * If soft masked, the masked handler will note the pending interrupt for * replay, and clear MSR[EE] in the interrupted context. + * + * CFAR is not used by perf interrupts so not required. */ INT_DEFINE_BEGIN(performance_monitor) IVEC=0xf00 @@ -2282,6 +2316,7 @@ INT_DEFINE_BEGIN(performance_monitor) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + ICFAR=0 INT_DEFINE_END(performance_monitor) EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20) @@ -2706,6 +2741,7 @@ EXC_VIRT_NONE(0x5800, 0x100) INT_DEFINE_BEGIN(soft_nmi) IVEC=0x900 ISTACK=0 + ICFAR=0 INT_DEFINE_END(soft_nmi) /* @@ -3025,7 +3061,7 @@ USE_FIXED_SECTION(virt_trampolines) .align 7 .globl __end_interrupts __end_interrupts: -DEFINE_FIXED_SYMBOL(__end_interrupts) +DEFINE_FIXED_SYMBOL(__end_interrupts, virt_trampolines) CLOSE_FIXED_SECTION(real_vectors); CLOSE_FIXED_SECTION(real_trampolines); diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index b7ceb041743c..d03e488cfe9c 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -251,7 +251,7 @@ bool is_fadump_reserved_mem_contiguous(void) } /* Print firmware assisted dump configurations for debugging purpose. */ -static void fadump_show_config(void) +static void __init fadump_show_config(void) { int i; @@ -353,7 +353,7 @@ static __init u64 fadump_calculate_reserve_size(void) * Calculate the total memory size required to be reserved for * firmware-assisted dump registration. */ -static unsigned long get_fadump_area_size(void) +static unsigned long __init get_fadump_area_size(void) { unsigned long size = 0; @@ -462,7 +462,7 @@ static int __init fadump_get_boot_mem_regions(void) * with the given memory range. * False, otherwise. */ -static bool overlaps_reserved_ranges(u64 base, u64 end, int *idx) +static bool __init overlaps_reserved_ranges(u64 base, u64 end, int *idx) { bool ret = false; int i; @@ -737,7 +737,7 @@ void crash_fadump(struct pt_regs *regs, const char *str) fw_dump.ops->fadump_trigger(fdh, str); } -u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) +u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) { struct elf_prstatus prstatus; @@ -752,7 +752,7 @@ u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) return buf; } -void fadump_update_elfcore_header(char *bufp) +void __init fadump_update_elfcore_header(char *bufp) { struct elf_phdr *phdr; @@ -770,7 +770,7 @@ void fadump_update_elfcore_header(char *bufp) return; } -static void *fadump_alloc_buffer(unsigned long size) +static void *__init fadump_alloc_buffer(unsigned long size) { unsigned long count, i; struct page *page; @@ -792,7 +792,7 @@ static void fadump_free_buffer(unsigned long vaddr, unsigned long size) free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL); } -s32 fadump_setup_cpu_notes_buf(u32 num_cpus) +s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus) { /* Allocate buffer to hold cpu crash notes. */ fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); @@ -1447,7 +1447,7 @@ static ssize_t release_mem_store(struct kobject *kobj, } /* Release the reserved memory and disable the FADump */ -static void unregister_fadump(void) +static void __init unregister_fadump(void) { fadump_cleanup(); fadump_release_memory(fw_dump.reserve_dump_area_start, @@ -1547,7 +1547,7 @@ ATTRIBUTE_GROUPS(fadump); DEFINE_SHOW_ATTRIBUTE(fadump_region); -static void fadump_init_files(void) +static void __init fadump_init_files(void) { int rc = 0; @@ -1641,6 +1641,14 @@ int __init setup_fadump(void) else if (fw_dump.reserve_dump_area_size) fw_dump.ops->fadump_init_mem_struct(&fw_dump); + /* + * In case of panic, fadump is triggered via ppc_panic_event() + * panic notifier. Setting crash_kexec_post_notifiers to 'true' + * lets panic() function take crash friendly path before panic + * notifiers are invoked. + */ + crash_kexec_post_notifiers = true; + return 1; } subsys_initcall(setup_fadump); diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index ba4afe3b5a9c..f71f2bbd4de6 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -81,7 +81,12 @@ EXPORT_SYMBOL(store_fp_state) */ _GLOBAL(load_up_fpu) mfmsr r5 +#ifdef CONFIG_PPC_BOOK3S_64 + /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ + ori r5,r5,MSR_FP|MSR_RI +#else ori r5,r5,MSR_FP +#endif #ifdef CONFIG_VSX BEGIN_FTR_SECTION oris r5,r5,MSR_VSX@h diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 349c4a820231..c3286260a7d1 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -115,8 +115,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) stw r10,8(r1) li r10, \trapno stw r10,_TRAP(r1) - SAVE_4GPRS(3, r1) - SAVE_2GPRS(7, r1) + SAVE_GPRS(3, 8, r1) SAVE_NVGPRS(r1) stw r2,GPR2(r1) stw r12,_NIP(r1) @@ -136,6 +135,12 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) andi. r12,r9,MSR_PR bne 777f bl prepare_transfer_to_handler +#ifdef CONFIG_PPC_KUEP + b 778f +777: + bl __kuep_lock +778: +#endif 777: #endif .endm diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 7d72ee5ab387..b6c6d1de5fd5 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -27,6 +27,7 @@ #include <linux/init.h> #include <linux/pgtable.h> +#include <linux/sizes.h> #include <asm/processor.h> #include <asm/page.h> #include <asm/mmu.h> @@ -297,6 +298,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) 3: mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) +#ifdef CONFIG_PPC_KUAP + rlwinm. r9, r9, 0, 0xff + beq 5f /* Kuap fault */ +#endif 4: tophys(r11, r11) rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ @@ -377,6 +382,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) 3: mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) +#ifdef CONFIG_PPC_KUAP + rlwinm. r9, r9, 0, 0xff + beq 5f /* Kuap fault */ +#endif 4: tophys(r11, r11) rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ @@ -650,7 +659,7 @@ start_here: b . /* prevent prefetch past rfi */ /* Set up the initial MMU state so we can do the first level of - * kernel initialization. This maps the first 16 MBytes of memory 1:1 + * kernel initialization. This maps the first 32 MBytes of memory 1:1 * virtual to physical and more importantly sets the cache mode. */ initial_mmu: @@ -687,6 +696,12 @@ initial_mmu: tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ + li r0,62 /* TLB slot 62 */ + addis r4,r4,SZ_16M@h + addis r3,r3,SZ_16M@h + tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ + tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ + isync /* Establish the exception vector base diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 02d2928d1e01..b73a56466903 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -334,6 +334,10 @@ interrupt_base: mfspr r12,SPRN_MMUCR mfspr r13,SPRN_PID /* Get PID */ rlwimi r12,r13,0,24,31 /* Set TID */ +#ifdef CONFIG_PPC_KUAP + cmpwi r13,0 + beq 2f /* KUAP Fault */ +#endif 4: mtspr SPRN_MMUCR,r12 @@ -444,6 +448,10 @@ interrupt_base: mfspr r12,SPRN_MMUCR mfspr r13,SPRN_PID /* Get PID */ rlwimi r12,r13,0,24,31 /* Set TID */ +#ifdef CONFIG_PPC_KUAP + cmpwi r13,0 + beq 2f /* KUAP Fault */ +#endif 4: mtspr SPRN_MMUCR,r12 @@ -532,10 +540,7 @@ finish_tlb_load_44x: andi. r10,r12,_PAGE_USER /* User page ? */ beq 1f /* nope, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ -#ifdef CONFIG_PPC_KUEP -0: rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */ - patch_site 0b, patch__tlb_44x_kuep -#endif + rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */ 1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */ /* Done...restore registers and get out of here. @@ -575,6 +580,10 @@ finish_tlb_load_44x: 3: mfspr r11,SPRN_SPRG3 lwz r11,PGDIR(r11) mfspr r12,SPRN_PID /* Get PID */ +#ifdef CONFIG_PPC_KUAP + cmpwi r12,0 + beq 2f /* KUAP Fault */ +#endif 4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ /* Mask of required permission bits. Note that while we @@ -672,6 +681,10 @@ finish_tlb_load_44x: 3: mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) mfspr r12,SPRN_PID /* Get PID */ +#ifdef CONFIG_PPC_KUAP + cmpwi r12,0 + beq 2f /* KUAP Fault */ +#endif 4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ /* Make up the required permissions */ @@ -747,10 +760,7 @@ finish_tlb_load_47x: andi. r10,r12,_PAGE_USER /* User page ? */ beq 1f /* nope, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ -#ifdef CONFIG_PPC_KUEP -0: rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */ - patch_site 0b, patch__tlb_47x_kuep -#endif + rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */ 1: tlbwe r11,r13,2 /* Done...restore registers and get out of here. diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index f17ae2083733..5c5181e8d5f1 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -126,7 +126,7 @@ __secondary_hold_acknowledge: . = 0x5c .globl __run_at_load __run_at_load: -DEFINE_FIXED_SYMBOL(__run_at_load) +DEFINE_FIXED_SYMBOL(__run_at_load, first_256B) .long RUN_AT_LOAD_DEFAULT #endif @@ -156,7 +156,7 @@ __secondary_hold: /* Tell the master cpu we're here */ /* Relocation is off & we are located at an address less */ /* than 0x100, so only need to grab low order offset. */ - std r24,(ABS_ADDR(__secondary_hold_acknowledge))(0) + std r24,(ABS_ADDR(__secondary_hold_acknowledge, first_256B))(0) sync li r26,0 @@ -164,7 +164,7 @@ __secondary_hold: tovirt(r26,r26) #endif /* All secondary cpus wait here until told to start. */ -100: ld r12,(ABS_ADDR(__secondary_hold_spinloop))(r26) +100: ld r12,(ABS_ADDR(__secondary_hold_spinloop, first_256B))(r26) cmpdi 0,r12,0 beq 100b @@ -649,15 +649,15 @@ __after_prom_start: 3: #endif /* # bytes of memory to copy */ - lis r5,(ABS_ADDR(copy_to_here))@ha - addi r5,r5,(ABS_ADDR(copy_to_here))@l + lis r5,(ABS_ADDR(copy_to_here, text))@ha + addi r5,r5,(ABS_ADDR(copy_to_here, text))@l bl copy_and_flush /* copy the first n bytes */ /* this includes the code being */ /* executed here. */ /* Jump to the copy of this code that we just made */ - addis r8,r3,(ABS_ADDR(4f))@ha - addi r12,r8,(ABS_ADDR(4f))@l + addis r8,r3,(ABS_ADDR(4f, text))@ha + addi r12,r8,(ABS_ADDR(4f, text))@l mtctr r12 bctr @@ -669,8 +669,8 @@ p_end: .8byte _end - copy_to_here * Now copy the rest of the kernel up to _end, add * _end - copy_to_here to the copy limit and run again. */ - addis r8,r26,(ABS_ADDR(p_end))@ha - ld r8,(ABS_ADDR(p_end))@l(r8) + addis r8,r26,(ABS_ADDR(p_end, text))@ha + ld r8,(ABS_ADDR(p_end, text))@l(r8) add r5,r5,r8 5: bl copy_and_flush /* copy the rest */ @@ -904,7 +904,7 @@ _GLOBAL(relative_toc) blr .balign 8 -p_toc: .8byte __toc_start + 0x8000 - 0b +p_toc: .8byte .TOC. - 0b /* * This is where the main kernel code starts. diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 68e5c0a7e99d..fa84744d6b24 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -931,7 +931,11 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) _GLOBAL(load_segment_registers) li r0, NUM_USER_SEGMENTS /* load up user segment register values */ mtctr r0 /* for context 0 */ +#ifdef CONFIG_PPC_KUEP + lis r3, SR_NX@h /* Kp = 0, Ks = 0, VSID = 0 */ +#else li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */ +#endif li r4, 0 3: mtsrin r3, r4 addi r3, r3, 0x111 /* increment VSID */ diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index ef8d1b1c234e..bb6d5d0fc4ac 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -87,8 +87,7 @@ END_BTB_FLUSH_SECTION stw r10, 8(r1) li r10, \trapno stw r10,_TRAP(r1) - SAVE_4GPRS(3, r1) - SAVE_2GPRS(7, r1) + SAVE_GPRS(3, 8, r1) SAVE_NVGPRS(r1) stw r2,GPR2(r1) stw r12,_NIP(r1) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 0a9a0f301474..ac2b4dcf5fd3 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -462,6 +462,12 @@ END_BTB_FLUSH_SECTION mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) +#ifdef CONFIG_PPC_KUAP + mfspr r12, SPRN_MAS1 + rlwinm. r12,r12,0,0x3fff0000 + beq 2f /* KUAP fault */ +#endif + 4: /* Mask of required permission bits. Note that while we * do copy ESR:ST to _PAGE_RW position as trying to write @@ -571,6 +577,12 @@ END_BTB_FLUSH_SECTION mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) +#ifdef CONFIG_PPC_KUAP + mfspr r12, SPRN_MAS1 + rlwinm. r12,r12,0,0x3fff0000 + beq 2f /* KUAP fault */ +#endif + /* Make up the required permissions for user code */ #ifdef CONFIG_PTE_64BIT li r13,_PAGE_PRESENT | _PAGE_BAP_UX @@ -777,6 +789,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */ slwi r10, r12, 1 or r10, r10, r12 + rlwinm r10, r10, 0, ~_PAGE_EXEC /* Clear SX on user pages */ iseleq r12, r12, r10 rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */ mtspr SPRN_MAS3, r13 diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 91a3be14808b..2669f80b3a49 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -523,7 +523,7 @@ static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, struct arch_hw_breakpoint **info, int *hit, - struct ppc_inst instr) + ppc_inst_t instr) { int i; int stepped; @@ -616,7 +616,7 @@ int hw_breakpoint_handler(struct die_args *args) int hit[HBP_NUM_MAX] = {0}; int nr_hit = 0; bool ptrace_bp = false; - struct ppc_inst instr = ppc_inst(0); + ppc_inst_t instr = ppc_inst(0); int type = 0; int size = 0; unsigned long ea; diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c index 42b967e3d85c..a74623025f3a 100644 --- a/arch/powerpc/kernel/hw_breakpoint_constraints.c +++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c @@ -80,7 +80,7 @@ static bool check_dawrx_constraints(struct pt_regs *regs, int type, * Return true if the event is valid wrt dawr configuration, * including extraneous exception. Otherwise return false. */ -bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr, +bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr, unsigned long ea, int type, int size, struct arch_hw_breakpoint *info) { @@ -127,7 +127,7 @@ bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr, return false; } -void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, +void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr, int *type, int *size, unsigned long *ea) { struct instruction_op op; diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 1f835539fda4..4ad79eb638c6 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -82,7 +82,7 @@ void power4_idle(void) return; if (cpu_has_feature(CPU_FTR_ALTIVEC)) - asm volatile("DSSALL ; sync" ::: "memory"); + asm volatile(PPC_DSSALL " ; sync" ::: "memory"); power4_idle_nap(); diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 13cad9297d82..3c097356366b 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -129,7 +129,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM) mtspr SPRN_HID0,r4 BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */ diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 563ebfcd16e2..7cd6ce3ec423 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -81,7 +81,7 @@ notrace long system_call_exception(long r3, long r4, long r5, { syscall_fn f; - kuep_lock(); + kuap_lock(); regs->orig_gpr3 = r3; @@ -406,7 +406,6 @@ again: /* Restore user access locks last */ kuap_user_restore(regs); - kuep_unlock(); return ret; } diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index ec950b08a8dc..92088f848266 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -30,21 +30,23 @@ COMPAT_SYS_CALL_TABLE: .ifc \srr,srr mfspr r11,SPRN_SRR0 ld r12,_NIP(r1) + clrrdi r12,r12,2 100: tdne r11,r12 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) mfspr r11,SPRN_SRR1 ld r12,_MSR(r1) 100: tdne r11,r12 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) .else mfspr r11,SPRN_HSRR0 ld r12,_NIP(r1) + clrrdi r12,r12,2 100: tdne r11,r12 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) mfspr r11,SPRN_HSRR1 ld r12,_MSR(r1) 100: tdne r11,r12 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) .endif #endif .endm @@ -162,10 +164,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * The value of AMR only matters while we're in the kernel. */ mtcr r2 - ld r2,GPR2(r1) - ld r3,GPR3(r1) - ld r13,GPR13(r1) - ld r1,GPR1(r1) + REST_GPRS(2, 3, r1) + REST_GPR(13, r1) + REST_GPR(1, r1) RFSCV_TO_USER b . /* prevent speculative execution */ @@ -183,9 +184,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) mtctr r3 mtlr r4 mtspr SPRN_XER,r5 - REST_10GPRS(2, r1) - REST_2GPRS(12, r1) - ld r1,GPR1(r1) + REST_GPRS(2, 13, r1) + REST_GPR(1, r1) RFI_TO_USER .Lsyscall_vectored_\name\()_rst_end: @@ -374,10 +374,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * The value of AMR only matters while we're in the kernel. */ mtcr r2 - ld r2,GPR2(r1) - ld r3,GPR3(r1) - ld r13,GPR13(r1) - ld r1,GPR1(r1) + REST_GPRS(2, 3, r1) + REST_GPR(13, r1) + REST_GPR(1, r1) RFI_TO_USER b . /* prevent speculative execution */ @@ -388,8 +387,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) mtctr r3 mtspr SPRN_XER,r4 ld r0,GPR0(r1) - REST_8GPRS(4, r1) - ld r12,GPR12(r1) + REST_GPRS(4, 12, r1) b .Lsyscall_restore_regs_cont .Lsyscall_rst_end: @@ -518,17 +516,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) ld r6,_XER(r1) li r0,0 - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) - REST_GPR(13, r1) + REST_GPRS(7, 13, r1) mtcr r3 mtlr r4 mtctr r5 mtspr SPRN_XER,r6 - REST_4GPRS(2, r1) - REST_GPR(6, r1) + REST_GPRS(2, 6, r1) REST_GPR(0, r1) REST_GPR(1, r1) .ifc \srr,srr @@ -625,8 +620,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) ld r6,_CCR(r1) li r0,0 - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) + REST_GPRS(7, 12, r1) mtlr r3 mtctr r4 @@ -638,7 +632,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) */ std r0,STACK_FRAME_OVERHEAD-16(r1) - REST_4GPRS(2, r1) + REST_GPRS(2, 5, r1) bne- cr1,1f /* emulate stack store */ mtcr r6 @@ -703,7 +697,7 @@ interrupt_return_macro hsrr .globl __end_soft_masked __end_soft_masked: -DEFINE_FIXED_SYMBOL(__end_soft_masked) +DEFINE_FIXED_SYMBOL(__end_soft_masked, text) #endif /* CONFIG_PPC_BOOK3S */ #ifdef CONFIG_PPC_BOOK3S diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index c4f1d6b7d992..2cf31a97126c 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -745,7 +745,8 @@ void __do_irq(struct pt_regs *regs) irq = ppc_md.get_irq(); /* We can hard enable interrupts now to allow perf interrupts */ - may_hard_irq_enable(); + if (should_hard_irq_enable()) + do_hard_irq_enable(); /* And finally process it */ if (unlikely(!irq)) @@ -811,7 +812,7 @@ void __init init_IRQ(void) ppc_md.init_IRQ(); } -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x void *critirq_ctx[NR_CPUS] __read_mostly; void *dbgirq_ctx[NR_CPUS] __read_mostly; void *mcheckirq_ctx[NR_CPUS] __read_mostly; diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index bdee7262c080..9f8d0fa7b718 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -48,7 +48,7 @@ static struct hard_trap_info { 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */ { 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */ { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */ -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +#ifdef CONFIG_BOOKE_OR_40x { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */ #if defined(CONFIG_FSL_BOOKE) { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */ @@ -67,7 +67,7 @@ static struct hard_trap_info { 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */ { 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */ #endif -#else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */ +#else /* !CONFIG_BOOKE_OR_40x */ { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */ #if defined(CONFIG_PPC_8xx) { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */ diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 86d77ff056a6..9a492fdec1df 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -124,7 +124,7 @@ int arch_prepare_kprobe(struct kprobe *p) { int ret = 0; struct kprobe *prev; - struct ppc_inst insn = ppc_inst_read(p->addr); + ppc_inst_t insn = ppc_inst_read(p->addr); if ((unsigned long)p->addr & 0x03) { printk("Attempt to register kprobe at an unaligned address\n"); @@ -244,7 +244,7 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe); static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) { int ret; - struct ppc_inst insn = ppc_inst_read(p->ainsn.insn); + ppc_inst_t insn = ppc_inst_read(p->ainsn.insn); /* regs->nip is also adjusted if emulate_step returns 1 */ ret = emulate_step(regs, insn); diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S index 225511d73bef..f2e03ed423d0 100644 --- a/arch/powerpc/kernel/l2cr_6xx.S +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -96,7 +96,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L2CR) /* Stop DST streams */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) @@ -292,7 +292,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR) isync /* Stop DST streams */ - DSSALL + PPC_DSSALL sync /* Get the current enable bit of the L3CR into r4 */ @@ -401,7 +401,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR) _GLOBAL(__flush_disable_L1) /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) sync diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index fd829f7f25a4..2503dd4713b9 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -586,7 +586,7 @@ void machine_check_print_event_info(struct machine_check_event *evt, mc_error_class[evt->error_class] : "Unknown"; printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype); -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU /* Display faulty slb contents for SLB errors. */ if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest) slb_dump_contents(local_paca->mce_faulty_slbs); diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index c2f55fe7092d..71e8f2a92e36 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -77,15 +77,15 @@ static bool mce_in_guest(void) } /* flush SLBs and reload */ -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU void flush_and_reload_slb(void) { - /* Invalidate all SLBs */ - slb_flush_all_realmode(); - if (early_radix_enabled()) return; + /* Invalidate all SLBs */ + slb_flush_all_realmode(); + /* * This probably shouldn't happen, but it may be possible it's * called in early boot before SLB shadows are allocated. @@ -99,7 +99,7 @@ void flush_and_reload_slb(void) void flush_erat(void) { -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) { flush_and_reload_slb(); return; @@ -114,7 +114,7 @@ void flush_erat(void) static int mce_flush(int what) { -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU if (what == MCE_FLUSH_SLB) { flush_and_reload_slb(); return 1; @@ -455,7 +455,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, * in real-mode is tricky and can lead to recursive * faults */ - struct ppc_inst instr; + ppc_inst_t instr; unsigned long pfn, instr_addr; struct instruction_op op; struct pt_regs tmp = *regs; @@ -499,8 +499,10 @@ static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1, /* attempt to correct the error */ switch (table[i].error_type) { case MCE_ERROR_TYPE_SLB: +#ifdef CONFIG_PPC_64S_HASH_MMU if (local_paca->in_mce == 1) slb_save_contents(local_paca->mce_faulty_slbs); +#endif handled = mce_flush(MCE_FLUSH_SLB); break; case MCE_ERROR_TYPE_ERAT: @@ -588,8 +590,10 @@ static int mce_handle_derror(struct pt_regs *regs, /* attempt to correct the error */ switch (table[i].error_type) { case MCE_ERROR_TYPE_SLB: +#ifdef CONFIG_PPC_64S_HASH_MMU if (local_paca->in_mce == 1) slb_save_contents(local_paca->mce_faulty_slbs); +#endif if (mce_flush(MCE_FLUSH_SLB)) handled = 1; break; diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index ed04a3ba66fe..40a583e9d3c7 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -90,16 +90,17 @@ int module_finalize(const Elf_Ehdr *hdr, } static __always_inline void * -__module_alloc(unsigned long size, unsigned long start, unsigned long end) +__module_alloc(unsigned long size, unsigned long start, unsigned long end, bool nowarn) { pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC; + gfp_t gfp = GFP_KERNEL | (nowarn ? __GFP_NOWARN : 0); /* * Don't do huge page allocations for modules yet until more testing * is done. STRICT_MODULE_RWX may require extra work to support this * too. */ - return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, prot, + return __vmalloc_node_range(size, 1, start, end, gfp, prot, VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP, NUMA_NO_NODE, __builtin_return_address(0)); } @@ -114,13 +115,13 @@ void *module_alloc(unsigned long size) /* First try within 32M limit from _etext to avoid branch trampolines */ if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) - ptr = __module_alloc(size, limit, MODULES_END); + ptr = __module_alloc(size, limit, MODULES_END, true); if (!ptr) - ptr = __module_alloc(size, MODULES_VADDR, MODULES_END); + ptr = __module_alloc(size, MODULES_VADDR, MODULES_END, false); return ptr; #else - return __module_alloc(size, VMALLOC_START, VMALLOC_END); + return __module_alloc(size, VMALLOC_START, VMALLOC_END, false); #endif } diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index f417afc08d33..a491ad481d85 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -273,6 +273,31 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, } #ifdef CONFIG_DYNAMIC_FTRACE +int module_trampoline_target(struct module *mod, unsigned long addr, + unsigned long *target) +{ + unsigned int jmp[4]; + + /* Find where the trampoline jumps to */ + if (copy_from_kernel_nofault(jmp, (void *)addr, sizeof(jmp))) + return -EFAULT; + + /* verify that this is what we expect it to be */ + if ((jmp[0] & 0xffff0000) != PPC_RAW_LIS(_R12, 0) || + (jmp[1] & 0xffff0000) != PPC_RAW_ADDI(_R12, _R12, 0) || + jmp[2] != PPC_RAW_MTCTR(_R12) || + jmp[3] != PPC_RAW_BCTR()) + return -EINVAL; + + addr = (jmp[1] & 0xffff) | ((jmp[0] & 0xffff) << 16); + if (addr & 0x8000) + addr -= 0x10000; + + *target = addr; + + return 0; +} + int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs) { module->arch.tramp = do_plt_call(module->core_layout.base, @@ -281,6 +306,14 @@ int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs) if (!module->arch.tramp) return -ENOENT; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + module->arch.tramp_regs = do_plt_call(module->core_layout.base, + (unsigned long)ftrace_regs_caller, + sechdrs, module); + if (!module->arch.tramp_regs) + return -ENOENT; +#endif + return 0; } #endif diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 3c8d9bbb51cf..0d9f9cd41e13 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -540,7 +540,7 @@ static struct pstore_info nvram_pstore_info = { .write = nvram_pstore_write, }; -static int nvram_pstore_init(void) +static int __init nvram_pstore_init(void) { int rc = 0; @@ -562,7 +562,7 @@ static int nvram_pstore_init(void) return rc; } #else -static int nvram_pstore_init(void) +static int __init nvram_pstore_init(void) { return -1; } @@ -755,7 +755,7 @@ static unsigned char __init nvram_checksum(struct nvram_header *p) * Per the criteria passed via nvram_remove_partition(), should this * partition be removed? 1=remove, 0=keep */ -static int nvram_can_remove_partition(struct nvram_partition *part, +static int __init nvram_can_remove_partition(struct nvram_partition *part, const char *name, int sig, const char *exceptions[]) { if (part->header.signature != sig) diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index ce1903064031..3b1c2236cbee 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -153,7 +153,7 @@ static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *ad int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) { - struct ppc_inst branch_op_callback, branch_emulate_step, temp; + ppc_inst_t branch_op_callback, branch_emulate_step, temp; unsigned long op_callback_addr, emulate_step_addr; kprobe_opcode_t *buff; long b_offset; @@ -228,12 +228,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) /* * 3. load instruction to be emulated into relevant register, and */ - if (IS_ENABLED(CONFIG_PPC64)) { - temp = ppc_inst_read(p->ainsn.insn); - patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); - } else { - patch_imm_load_insns((unsigned long)p->ainsn.insn, 4, buff + TMPL_INSN_IDX); - } + temp = ppc_inst_read(p->ainsn.insn); + patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); /* * 4. branch back from trampoline @@ -269,7 +265,7 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op) void arch_optimize_kprobes(struct list_head *oplist) { - struct ppc_inst instr; + ppc_inst_t instr; struct optimized_kprobe *op; struct optimized_kprobe *tmp; diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index 19ea3312403c..5c7f0b4b784b 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -10,8 +10,8 @@ #include <asm/asm-offsets.h> #ifdef CONFIG_PPC64 -#define SAVE_30GPRS(base) SAVE_10GPRS(2,base); SAVE_10GPRS(12,base); SAVE_10GPRS(22,base) -#define REST_30GPRS(base) REST_10GPRS(2,base); REST_10GPRS(12,base); REST_10GPRS(22,base) +#define SAVE_30GPRS(base) SAVE_GPRS(2, 31, base) +#define REST_30GPRS(base) REST_GPRS(2, 31, base) #define TEMPLATE_FOR_IMM_LOAD_INSNS nop; nop; nop; nop; nop #else #define SAVE_30GPRS(base) stmw r2, GPR2(base) diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 4208b4044d12..39da688a9455 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -139,8 +139,7 @@ static struct lppaca * __init new_lppaca(int cpu, unsigned long limit) } #endif /* CONFIG_PPC_PSERIES */ -#ifdef CONFIG_PPC_BOOK3S_64 - +#ifdef CONFIG_PPC_64S_HASH_MMU /* * 3 persistent SLBs are allocated here. The buffer will be zero * initially, hence will all be invaild until we actually write them. @@ -169,8 +168,7 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit) return s; } - -#endif /* CONFIG_PPC_BOOK3S_64 */ +#endif /* CONFIG_PPC_64S_HASH_MMU */ #ifdef CONFIG_PPC_PSERIES /** @@ -226,7 +224,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) new_paca->kexec_state = KEXEC_STATE_NONE; new_paca->__current = &init_task; new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL; -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU new_paca->slb_shadow_ptr = NULL; #endif @@ -307,7 +305,7 @@ void __init allocate_paca(int cpu) #ifdef CONFIG_PPC_PSERIES paca->lppaca_ptr = new_lppaca(cpu, limit); #endif -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU paca->slb_shadow_ptr = new_slb_shadow(cpu, limit); #endif #ifdef CONFIG_PPC_PSERIES @@ -328,7 +326,7 @@ void __init free_unused_pacas(void) paca_nr_cpu_ids = nr_cpu_ids; paca_ptrs_size = new_ptrs_size; -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU if (early_radix_enabled()) { /* Ugly fixup, see new_slb_shadow() */ memblock_phys_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr), @@ -341,9 +339,9 @@ void __init free_unused_pacas(void) paca_ptrs_size + paca_struct_size, nr_cpu_ids); } +#ifdef CONFIG_PPC_64S_HASH_MMU void copy_mm_to_paca(struct mm_struct *mm) { -#ifdef CONFIG_PPC_BOOK3S mm_context_t *context = &mm->context; #ifdef CONFIG_PPC_MM_SLICES @@ -356,7 +354,5 @@ void copy_mm_to_paca(struct mm_struct *mm) get_paca()->mm_ctx_user_psize = context->user_psize; get_paca()->mm_ctx_sllp = context->sllp; #endif -#else /* !CONFIG_PPC_BOOK3S */ - return; -#endif } +#endif /* CONFIG_PPC_64S_HASH_MMU */ diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 6749905932f4..8bc9cf62cd93 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -62,7 +62,7 @@ EXPORT_SYMBOL(isa_mem_base); static const struct dma_map_ops *pci_dma_ops; -void set_pci_dma_ops(const struct dma_map_ops *dma_ops) +void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops) { pci_dma_ops = dma_ops; } diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index b49e1060a3bf..48537964fba1 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -37,7 +37,7 @@ int pcibios_assign_bus_offset = 1; EXPORT_SYMBOL(isa_io_base); EXPORT_SYMBOL(pci_dram_offset); -void pcibios_make_OF_bus_map(void); +void __init pcibios_make_OF_bus_map(void); static void fixup_cpc710_pci64(struct pci_dev* dev); static u8* pci_to_OF_bus_map; @@ -109,7 +109,7 @@ make_one_node_map(struct device_node* node, u8 pci_bus) } } -void +void __init pcibios_make_OF_bus_map(void) { int i; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 406d7ee9e322..984813a4d5dc 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -628,7 +628,7 @@ static void do_break_handler(struct pt_regs *regs) { struct arch_hw_breakpoint null_brk = {0}; struct arch_hw_breakpoint *info; - struct ppc_inst instr = ppc_inst(0); + ppc_inst_t instr = ppc_inst(0); int type = 0; int size = 0; unsigned long ea; @@ -1156,6 +1156,40 @@ static inline void save_sprs(struct thread_struct *t) #endif } +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +void kvmppc_save_user_regs(void) +{ + unsigned long usermsr; + + if (!current->thread.regs) + return; + + usermsr = current->thread.regs->msr; + + if (usermsr & MSR_FP) + save_fpu(current); + + if (usermsr & MSR_VEC) + save_altivec(current); + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (usermsr & MSR_TM) { + current->thread.tm_tfhar = mfspr(SPRN_TFHAR); + current->thread.tm_tfiar = mfspr(SPRN_TFIAR); + current->thread.tm_texasr = mfspr(SPRN_TEXASR); + current->thread.regs->msr &= ~MSR_TM; + } +#endif +} +EXPORT_SYMBOL_GPL(kvmppc_save_user_regs); + +void kvmppc_save_current_sprs(void) +{ + save_sprs(¤t->thread); +} +EXPORT_SYMBOL_GPL(kvmppc_save_current_sprs); +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ + static inline void restore_sprs(struct thread_struct *old_thread, struct thread_struct *new_thread) { @@ -1206,7 +1240,7 @@ struct task_struct *__switch_to(struct task_struct *prev, { struct thread_struct *new_thread, *old_thread; struct task_struct *last; -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU struct ppc64_tlb_batch *batch; #endif @@ -1215,7 +1249,7 @@ struct task_struct *__switch_to(struct task_struct *prev, WARN_ON(!irqs_disabled()); -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU batch = this_cpu_ptr(&ppc64_tlb_batch); if (batch->active) { current_thread_info()->local_flags |= _TLF_LAZY_MMU; @@ -1281,9 +1315,9 @@ struct task_struct *__switch_to(struct task_struct *prev, set_return_regs_changed(); /* _switch changes stack (and regs) */ -#ifdef CONFIG_PPC32 - kuap_assert_locked(); -#endif + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + kuap_assert_locked(); + last = _switch(old_thread, new_thread); /* @@ -1294,6 +1328,7 @@ struct task_struct *__switch_to(struct task_struct *prev, */ #ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU /* * This applies to a process that was context switched while inside * arch_enter_lazy_mmu_mode(), to re-activate the batch that was @@ -1305,6 +1340,7 @@ struct task_struct *__switch_to(struct task_struct *prev, batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } +#endif /* * Math facilities are masked out of the child MSR in copy_thread. @@ -1655,7 +1691,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) static void setup_ksp_vsid(struct task_struct *p, unsigned long sp) { -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU unsigned long sp_vsid; unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; @@ -1767,6 +1803,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) p->thread.kuap = KUAP_NONE; #endif +#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP) + p->thread.pid = MMU_NO_CONTEXT; +#endif setup_ksp_vsid(p, sp); @@ -2299,10 +2338,9 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) * the heap, we can put it above 1TB so it is backed by a 1TB * segment. Otherwise the heap will be in the bottom 1TB * which always uses 256MB segments and this may result in a - * performance penalty. We don't need to worry about radix. For - * radix, mmu_highuser_ssize remains unchanged from 256MB. + * performance penalty. */ - if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T)) + if (!radix_enabled() && !is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T)) base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T); #endif diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index ad1230c4f3fe..3d30d40a0e9c 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -231,7 +231,7 @@ static void __init check_cpu_pa_features(unsigned long node) ibm_pa_features, ARRAY_SIZE(ibm_pa_features)); } -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU static void __init init_mmu_slb_size(unsigned long node) { const __be32 *slb_size_ptr; @@ -447,7 +447,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node, */ #ifdef CONFIG_SPARSEMEM -static bool validate_mem_limit(u64 base, u64 *size) +static bool __init validate_mem_limit(u64 base, u64 *size) { u64 max_mem = 1UL << (MAX_PHYSMEM_BITS); @@ -458,7 +458,7 @@ static bool validate_mem_limit(u64 base, u64 *size) return true; } #else -static bool validate_mem_limit(u64 base, u64 *size) +static bool __init validate_mem_limit(u64 base, u64 *size) { return true; } diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 18b04b08b983..0ac5faacc909 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -672,7 +672,7 @@ static inline int __init prom_getproplen(phandle node, const char *pname) return call_prom("getproplen", 2, 1, node, ADDR(pname)); } -static void add_string(char **str, const char *q) +static void __init add_string(char **str, const char *q) { char *p = *str; @@ -682,7 +682,7 @@ static void add_string(char **str, const char *q) *str = p; } -static char *tohex(unsigned int x) +static char *__init tohex(unsigned int x) { static const char digits[] __initconst = "0123456789abcdef"; static char result[9] __prombss; @@ -728,7 +728,7 @@ static int __init prom_setprop(phandle node, const char *nodename, #define prom_islower(c) ('a' <= (c) && (c) <= 'z') #define prom_toupper(c) (prom_islower(c) ? ((c) - 'a' + 'A') : (c)) -static unsigned long prom_strtoul(const char *cp, const char **endp) +static unsigned long __init prom_strtoul(const char *cp, const char **endp) { unsigned long result = 0, base = 10, value; @@ -753,7 +753,7 @@ static unsigned long prom_strtoul(const char *cp, const char **endp) return result; } -static unsigned long prom_memparse(const char *ptr, const char **retptr) +static unsigned long __init prom_memparse(const char *ptr, const char **retptr) { unsigned long ret = prom_strtoul(ptr, retptr); int shift = 0; @@ -1786,7 +1786,7 @@ static void __init prom_close_stdin(void) } #ifdef CONFIG_PPC_SVM -static int prom_rtas_hcall(uint64_t args) +static int __init prom_rtas_hcall(uint64_t args) { register uint64_t arg1 asm("r3") = H_RTAS; register uint64_t arg2 asm("r4") = args; @@ -2991,7 +2991,7 @@ static void __init fixup_device_tree_efika_add_phy(void) /* Check if the phy-handle property exists - bail if it does */ rv = prom_getprop(node, "phy-handle", prop, sizeof(prop)); - if (!rv) + if (rv <= 0) return; /* @@ -3248,7 +3248,7 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4) /* * Perform the Enter Secure Mode ultracall. */ -static int enter_secure_mode(unsigned long kbase, unsigned long fdt) +static int __init enter_secure_mode(unsigned long kbase, unsigned long fdt) { register unsigned long r3 asm("r3") = UV_ESM; register unsigned long r4 asm("r4") = kbase; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index ff80bbad22a5..733e6ef36758 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -492,8 +492,25 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) } EXPORT_SYMBOL(rtas_call); -/* For RTAS_BUSY (-2), delay for 1 millisecond. For an extended busy status - * code of 990n, perform the hinted delay of 10^n (last digit) milliseconds. +/** + * rtas_busy_delay_time() - From an RTAS status value, calculate the + * suggested delay time in milliseconds. + * + * @status: a value returned from rtas_call() or similar APIs which return + * the status of a RTAS function call. + * + * Context: Any context. + * + * Return: + * * 100000 - If @status is 9905. + * * 10000 - If @status is 9904. + * * 1000 - If @status is 9903. + * * 100 - If @status is 9902. + * * 10 - If @status is 9901. + * * 1 - If @status is either 9900 or -2. This is "wrong" for -2, but + * some callers depend on this behavior, and the worst outcome + * is that they will delay for longer than necessary. + * * 0 - If @status is not a busy or extended delay value. */ unsigned int rtas_busy_delay_time(int status) { @@ -513,17 +530,77 @@ unsigned int rtas_busy_delay_time(int status) } EXPORT_SYMBOL(rtas_busy_delay_time); -/* For an RTAS busy status code, perform the hinted delay. */ -unsigned int rtas_busy_delay(int status) +/** + * rtas_busy_delay() - helper for RTAS busy and extended delay statuses + * + * @status: a value returned from rtas_call() or similar APIs which return + * the status of a RTAS function call. + * + * Context: Process context. May sleep or schedule. + * + * Return: + * * true - @status is RTAS_BUSY or an extended delay hint. The + * caller may assume that the CPU has been yielded if necessary, + * and that an appropriate delay for @status has elapsed. + * Generally the caller should reattempt the RTAS call which + * yielded @status. + * + * * false - @status is not @RTAS_BUSY nor an extended delay hint. The + * caller is responsible for handling @status. + */ +bool rtas_busy_delay(int status) { unsigned int ms; + bool ret; - might_sleep(); - ms = rtas_busy_delay_time(status); - if (ms && need_resched()) - msleep(ms); + switch (status) { + case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX: + ret = true; + ms = rtas_busy_delay_time(status); + /* + * The extended delay hint can be as high as 100 seconds. + * Surely any function returning such a status is either + * buggy or isn't going to be significantly slowed by us + * polling at 1HZ. Clamp the sleep time to one second. + */ + ms = clamp(ms, 1U, 1000U); + /* + * The delay hint is an order-of-magnitude suggestion, not + * a minimum. It is fine, possibly even advantageous, for + * us to pause for less time than hinted. For small values, + * use usleep_range() to ensure we don't sleep much longer + * than actually needed. + * + * See Documentation/timers/timers-howto.rst for + * explanation of the threshold used here. In effect we use + * usleep_range() for 9900 and 9901, msleep() for + * 9902-9905. + */ + if (ms <= 20) + usleep_range(ms * 100, ms * 1000); + else + msleep(ms); + break; + case RTAS_BUSY: + ret = true; + /* + * We should call again immediately if there's no other + * work to do. + */ + cond_resched(); + break; + default: + ret = false; + /* + * Not a busy or extended delay status; the caller should + * handle @status itself. Ensure we warn on misuses in + * atomic context regardless. + */ + might_sleep(); + break; + } - return ms; + return ret; } EXPORT_SYMBOL(rtas_busy_delay); @@ -809,13 +886,13 @@ void rtas_os_term(char *str) /** * rtas_activate_firmware() - Activate a new version of firmware. * + * Context: This function may sleep. + * * Activate a new version of partition firmware. The OS must call this * after resuming from a partition hibernation or migration in order * to maintain the ability to perform live firmware updates. It's not * catastrophic for this method to be absent or to fail; just log the * condition in that case. - * - * Context: This function may sleep. */ void rtas_activate_firmware(void) { @@ -890,11 +967,12 @@ int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...) #endif /* CONFIG_PPC_PSERIES */ /** - * Find a specific pseries error log in an RTAS extended event log. + * get_pseries_errorlog() - Find a specific pseries error log in an RTAS + * extended event log. * @log: RTAS error/event log * @section_id: two character section identifier * - * Returns a pointer to the specified errorlog or NULL if not found. + * Return: A pointer to the specified errorlog or NULL if not found. */ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, uint16_t section_id) diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 32ee17753eb4..cf0f42909ddf 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -455,7 +455,7 @@ static void rtas_event_scan(struct work_struct *w) } #ifdef CONFIG_PPC64 -static void retrieve_nvram_error_log(void) +static void __init retrieve_nvram_error_log(void) { unsigned int err_type ; int rc ; @@ -473,12 +473,12 @@ static void retrieve_nvram_error_log(void) } } #else /* CONFIG_PPC64 */ -static void retrieve_nvram_error_log(void) +static void __init retrieve_nvram_error_log(void) { } #endif /* CONFIG_PPC64 */ -static void start_event_scan(void) +static void __init start_event_scan(void) { printk(KERN_DEBUG "RTAS daemon started\n"); pr_debug("rtasd: will sleep for %d milliseconds\n", diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 15fb5ea1b9ea..e159d4093d98 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -44,7 +44,7 @@ static void enable_barrier_nospec(bool enable) do_barrier_nospec_fixups(enable); } -void setup_barrier_nospec(void) +void __init setup_barrier_nospec(void) { bool enable; @@ -132,7 +132,7 @@ early_param("nospectre_v2", handle_nospectre_v2); #endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC_FSL_BOOK3E -void setup_spectre_v2(void) +void __init setup_spectre_v2(void) { if (no_spectrev2 || cpu_mitigations_off()) do_btb_flush_fixups(); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 4f1322b65760..f8da937df918 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -582,7 +582,7 @@ static __init int add_pcspkr(void) device_initcall(add_pcspkr); #endif /* CONFIG_PCSPKR_PLATFORM */ -void probe_machine(void) +static __init void probe_machine(void) { extern struct machdep_calls __machine_desc_start; extern struct machdep_calls __machine_desc_end; diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 84058bbc8fe9..93f22da12abe 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -29,7 +29,7 @@ void setup_tlb_core_data(void); static inline void setup_tlb_core_data(void) { } #endif -#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x void exc_lvl_early_init(void); #else static inline void exc_lvl_early_init(void) { } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 7ec5c47fce0e..a6e9d36d7c01 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -75,7 +75,7 @@ EXPORT_SYMBOL(DMA_MODE_WRITE); notrace void __init machine_init(u64 dt_ptr) { u32 *addr = (u32 *)patch_site_addr(&patch__memset_nocache); - struct ppc_inst insn; + ppc_inst_t insn; /* Configure static keys first, now that we're relocated. */ setup_feature_keys(); @@ -175,7 +175,7 @@ void __init emergency_stack_init(void) } #endif -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x void __init exc_lvl_early_init(void) { unsigned int i, hw_cpu; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6052f5d5ded3..d87f7c1103ce 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -499,7 +499,7 @@ void smp_release_cpus(void) * routines and/or provided to userland */ -static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, +static void __init init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, u32 bsize, u32 sets) { info->size = size; @@ -880,14 +880,23 @@ void __init setup_per_cpu_areas(void) int rc = -EINVAL; /* - * Linear mapping is one of 4K, 1M and 16M. For 4K, no need - * to group units. For larger mappings, use 1M atom which - * should be large enough to contain a number of units. + * BookE and BookS radix are historical values and should be revisited. */ - if (mmu_linear_psize == MMU_PAGE_4K) + if (IS_ENABLED(CONFIG_PPC_BOOK3E)) { + atom_size = SZ_1M; + } else if (radix_enabled()) { atom_size = PAGE_SIZE; - else - atom_size = 1 << 20; + } else if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) { + /* + * Linear mapping is one of 4K, 1M and 16M. For 4K, no need + * to group units. For larger mappings, use 1M atom which + * should be large enough to contain a number of units. + */ + if (mmu_linear_psize == MMU_PAGE_4K) + atom_size = PAGE_SIZE; + else + atom_size = SZ_1M; + } if (pcpu_chosen_fc != PCPU_FC_PAGE) { rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 3e053e2fd6b6..d84c434b2b78 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -527,16 +527,20 @@ static long restore_user_regs(struct pt_regs *regs, regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1)); #ifdef CONFIG_SPE - /* force the process to reload the spe registers from - current->thread when it next does spe instructions */ + /* + * Force the process to reload the spe registers from + * current->thread when it next does spe instructions. + * Since this is user ABI, we must enforce the sizing. + */ + BUILD_BUG_ON(sizeof(current->thread.spe) != ELF_NEVRREG * sizeof(u32)); regs_set_return_msr(regs, regs->msr & ~MSR_SPE); if (msr & MSR_SPE) { /* restore spe registers from the stack */ - unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, - ELF_NEVRREG * sizeof(u32), failed); + unsafe_copy_from_user(¤t->thread.spe, &sr->mc_vregs, + sizeof(current->thread.spe), failed); current->thread.used_spe = true; } else if (current->thread.used_spe) - memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); + memset(¤t->thread.spe, 0, sizeof(current->thread.spe)); /* Always get SPEFSCR back */ unsafe_get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index c23ee842c4c3..b7fd6a72aa76 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -61,6 +61,7 @@ #include <asm/cpu_has_feature.h> #include <asm/ftrace.h> #include <asm/kup.h> +#include <asm/fadump.h> #ifdef DEBUG #include <asm/udbg.h> @@ -621,6 +622,45 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) #endif #ifdef CONFIG_NMI_IPI +static void crash_stop_this_cpu(struct pt_regs *regs) +#else +static void crash_stop_this_cpu(void *dummy) +#endif +{ + /* + * Just busy wait here and avoid marking CPU as offline to ensure + * register data is captured appropriately. + */ + while (1) + cpu_relax(); +} + +void crash_smp_send_stop(void) +{ + static bool stopped = false; + + /* + * In case of fadump, register data for all CPUs is captured by f/w + * on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before + * this rtas call to avoid tricky post processing of those CPUs' + * backtraces. + */ + if (should_fadump_crash()) + return; + + if (stopped) + return; + + stopped = true; + +#ifdef CONFIG_NMI_IPI + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_stop_this_cpu, 1000000); +#else + smp_call_function(crash_stop_this_cpu, NULL, 0); +#endif /* CONFIG_NMI_IPI */ +} + +#ifdef CONFIG_NMI_IPI static void nmi_stop_this_cpu(struct pt_regs *regs) { /* @@ -896,7 +936,8 @@ out: return tg; } -static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start) +static int __init update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, + int cpu, int cpu_group_start) { int first_thread = cpu_first_thread_sibling(cpu); int i; @@ -1635,12 +1676,14 @@ void start_secondary(void *unused) BUG(); } +#ifdef CONFIG_PROFILING int setup_profiling_timer(unsigned int multiplier) { return 0; } +#endif -static void fixup_topology(void) +static void __init fixup_topology(void) { int i; diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index f73f4d72fea4..e0cbd63007f2 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -181,7 +181,7 @@ _GLOBAL(swsusp_arch_resume) #ifdef CONFIG_ALTIVEC /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif sync diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index 96bb20715aa9..9f1903c7f540 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -141,7 +141,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR) _GLOBAL(swsusp_arch_resume) /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) sync diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 08d8072d6e7a..d45a415d5374 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -214,7 +214,7 @@ static ssize_t __used store_dscr_default(struct device *dev, static DEVICE_ATTR(dscr_default, 0600, show_dscr_default, store_dscr_default); -static void sysfs_create_dscr_default(void) +static void __init sysfs_create_dscr_default(void) { if (cpu_has_feature(CPU_FTR_DSCR)) { int cpu; @@ -744,12 +744,12 @@ static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char } static DEVICE_ATTR(svm, 0444, show_svm, NULL); -static void create_svm_file(void) +static void __init create_svm_file(void) { device_create_file(cpu_subsys.dev_root, &dev_attr_svm); } #else -static void create_svm_file(void) +static void __init create_svm_file(void) { } #endif /* CONFIG_PPC_SVM */ @@ -1110,7 +1110,7 @@ EXPORT_SYMBOL_GPL(cpu_remove_dev_attr_group); /* NUMA stuff */ #ifdef CONFIG_NUMA -static void register_nodes(void) +static void __init register_nodes(void) { int i; @@ -1134,7 +1134,7 @@ void sysfs_remove_device_from_node(struct device *dev, int nid) EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node); #else -static void register_nodes(void) +static void __init register_nodes(void) { return; } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index cae8f03a44fe..62361cc7281c 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -88,6 +88,7 @@ static struct clocksource clocksource_timebase = { #define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF u64 decrementer_max = DECREMENTER_DEFAULT_MAX; +EXPORT_SYMBOL_GPL(decrementer_max); /* for KVM HDEC */ static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev); @@ -107,6 +108,7 @@ struct clock_event_device decrementer_clockevent = { EXPORT_SYMBOL(decrementer_clockevent); DEFINE_PER_CPU(u64, decrementers_next_tb); +EXPORT_SYMBOL_GPL(decrementers_next_tb); static DEFINE_PER_CPU(struct clock_event_device, decrementers); #define XSEC_PER_SEC (1024*1024) @@ -496,6 +498,16 @@ EXPORT_SYMBOL(profile_pc); * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... */ #ifdef CONFIG_PPC64 +static inline unsigned long test_irq_work_pending(void) +{ + unsigned long x; + + asm volatile("lbz %0,%1(13)" + : "=r" (x) + : "i" (offsetof(struct paca_struct, irq_work_pending))); + return x; +} + static inline void set_irq_work_pending_flag(void) { asm volatile("stb %0,%1(13)" : : @@ -539,13 +551,44 @@ void arch_irq_work_raise(void) preempt_enable(); } +static void set_dec_or_work(u64 val) +{ + set_dec(val); + /* We may have raced with new irq work */ + if (unlikely(test_irq_work_pending())) + set_dec(1); +} + #else /* CONFIG_IRQ_WORK */ #define test_irq_work_pending() 0 #define clear_irq_work_pending() +static void set_dec_or_work(u64 val) +{ + set_dec(val); +} #endif /* CONFIG_IRQ_WORK */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +void timer_rearm_host_dec(u64 now) +{ + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); + + WARN_ON_ONCE(!arch_irqs_disabled()); + WARN_ON_ONCE(mfmsr() & MSR_EE); + + if (now >= *next_tb) { + local_paca->irq_happened |= PACA_IRQ_DEC; + } else { + now = *next_tb - now; + if (now <= decrementer_max) + set_dec_or_work(now); + } +} +EXPORT_SYMBOL_GPL(timer_rearm_host_dec); +#endif + /* * timer_interrupt - gets called when the decrementer overflows, * with interrupts disabled. @@ -566,22 +609,23 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) return; } - /* Ensure a positive value is written to the decrementer, or else - * some CPUs will continue to take decrementer exceptions. When the - * PPC_WATCHDOG (decrementer based) is configured, keep this at most - * 31 bits, which is about 4 seconds on most systems, which gives - * the watchdog a chance of catching timer interrupt hard lockups. - */ - if (IS_ENABLED(CONFIG_PPC_WATCHDOG)) - set_dec(0x7fffffff); - else - set_dec(decrementer_max); - - /* Conditionally hard-enable interrupts now that the DEC has been - * bumped to its maximum value - */ - may_hard_irq_enable(); + /* Conditionally hard-enable interrupts. */ + if (should_hard_irq_enable()) { + /* + * Ensure a positive value is written to the decrementer, or + * else some CPUs will continue to take decrementer exceptions. + * When the PPC_WATCHDOG (decrementer based) is configured, + * keep this at most 31 bits, which is about 4 seconds on most + * systems, which gives the watchdog a chance of catching timer + * interrupt hard lockups. + */ + if (IS_ENABLED(CONFIG_PPC_WATCHDOG)) + set_dec(0x7fffffff); + else + set_dec(decrementer_max); + do_hard_irq_enable(); + } #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) @@ -606,10 +650,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) } else { now = *next_tb - now; if (now <= decrementer_max) - set_dec(now); - /* We may have raced with new irq work */ - if (test_irq_work_pending()) - set_dec(1); + set_dec_or_work(now); __this_cpu_inc(irq_stat.timer_irqs_others); } @@ -730,7 +771,7 @@ static int __init get_freq(char *name, int cells, unsigned long *val) static void start_cpu_decrementer(void) { -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x unsigned int tcr; /* Clear any pending timer interrupts */ @@ -843,11 +884,7 @@ static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev) { __this_cpu_write(decrementers_next_tb, get_tb() + evt); - set_dec(evt); - - /* We may have raced with new irq work */ - if (test_irq_work_pending()) - set_dec(1); + set_dec_or_work(evt); return 0; } diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 2b91f233b05d..3beecc32940b 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -226,11 +226,8 @@ _GLOBAL(tm_reclaim) /* Sync the userland GPRs 2-12, 14-31 to thread->regs: */ SAVE_GPR(0, r7) /* user r0 */ - SAVE_GPR(2, r7) /* user r2 */ - SAVE_4GPRS(3, r7) /* user r3-r6 */ - SAVE_GPR(8, r7) /* user r8 */ - SAVE_GPR(9, r7) /* user r9 */ - SAVE_GPR(10, r7) /* user r10 */ + SAVE_GPRS(2, 6, r7) /* user r2-r6 */ + SAVE_GPRS(8, 10, r7) /* user r8-r10 */ ld r3, GPR1(r1) /* user r1 */ ld r4, GPR7(r1) /* user r7 */ ld r5, GPR11(r1) /* user r11 */ @@ -445,12 +442,8 @@ restore_gprs: ld r6, THREAD_TM_PPR(r3) REST_GPR(0, r7) /* GPR0 */ - REST_2GPRS(2, r7) /* GPR2-3 */ - REST_GPR(4, r7) /* GPR4 */ - REST_4GPRS(8, r7) /* GPR8-11 */ - REST_2GPRS(12, r7) /* GPR12-13 */ - - REST_NVGPRS(r7) /* GPR14-31 */ + REST_GPRS(2, 4, r7) /* GPR2-4 */ + REST_GPRS(8, 31, r7) /* GPR8-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index d89c5df4f206..80b6285769f2 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -41,10 +41,10 @@ #define NUM_FTRACE_TRAMPS 8 static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; -static struct ppc_inst +static ppc_inst_t ftrace_call_replace(unsigned long ip, unsigned long addr, int link) { - struct ppc_inst op; + ppc_inst_t op; addr = ppc_function_entry((void *)addr); @@ -55,9 +55,9 @@ ftrace_call_replace(unsigned long ip, unsigned long addr, int link) } static int -ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) +ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new) { - struct ppc_inst replaced; + ppc_inst_t replaced; /* * Note: @@ -90,24 +90,24 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) */ static int test_24bit_addr(unsigned long ip, unsigned long addr) { - struct ppc_inst op; + ppc_inst_t op; addr = ppc_function_entry((void *)addr); /* use the create_branch to verify that this offset can be branched */ return create_branch(&op, (u32 *)ip, addr, 0) == 0; } -static int is_bl_op(struct ppc_inst op) +static int is_bl_op(ppc_inst_t op) { return (ppc_inst_val(op) & 0xfc000003) == 0x48000001; } -static int is_b_op(struct ppc_inst op) +static int is_b_op(ppc_inst_t op) { return (ppc_inst_val(op) & 0xfc000003) == 0x48000000; } -static unsigned long find_bl_target(unsigned long ip, struct ppc_inst op) +static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op) { int offset; @@ -127,7 +127,7 @@ __ftrace_make_nop(struct module *mod, { unsigned long entry, ptr, tramp; unsigned long ip = rec->ip; - struct ppc_inst op, pop; + ppc_inst_t op, pop; /* read where this goes */ if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { @@ -221,10 +221,9 @@ static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - struct ppc_inst op; - unsigned int jmp[4]; + ppc_inst_t op; unsigned long ip = rec->ip; - unsigned long tramp; + unsigned long tramp, ptr; if (copy_from_kernel_nofault(&op, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; @@ -238,41 +237,13 @@ __ftrace_make_nop(struct module *mod, /* lets find where the pointer goes */ tramp = find_bl_target(ip, op); - /* - * On PPC32 the trampoline looks like: - * 0x3d, 0x80, 0x00, 0x00 lis r12,sym@ha - * 0x39, 0x8c, 0x00, 0x00 addi r12,r12,sym@l - * 0x7d, 0x89, 0x03, 0xa6 mtctr r12 - * 0x4e, 0x80, 0x04, 0x20 bctr - */ - - pr_devel("ip:%lx jumps to %lx", ip, tramp); - /* Find where the trampoline jumps to */ - if (copy_from_kernel_nofault(jmp, (void *)tramp, sizeof(jmp))) { - pr_err("Failed to read %lx\n", tramp); + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); return -EFAULT; } - pr_devel(" %08x %08x ", jmp[0], jmp[1]); - - /* verify that this is what we expect it to be */ - if (((jmp[0] & 0xffff0000) != 0x3d800000) || - ((jmp[1] & 0xffff0000) != 0x398c0000) || - (jmp[2] != 0x7d8903a6) || - (jmp[3] != 0x4e800420)) { - pr_err("Not a trampoline\n"); - return -EINVAL; - } - - tramp = (jmp[1] & 0xffff) | - ((jmp[0] & 0xffff) << 16); - if (tramp & 0x8000) - tramp -= 0x10000; - - pr_devel(" %lx ", tramp); - - if (tramp != addr) { + if (ptr != addr) { pr_err("Trampoline location %08lx does not match addr\n", tramp); return -EINVAL; @@ -291,7 +262,7 @@ __ftrace_make_nop(struct module *mod, static unsigned long find_ftrace_tramp(unsigned long ip) { int i; - struct ppc_inst instr; + ppc_inst_t instr; /* * We have the compiler generated long_branch tramps at the end @@ -329,9 +300,9 @@ static int add_ftrace_tramp(unsigned long tramp) static int setup_mcount_compiler_tramp(unsigned long tramp) { int i; - struct ppc_inst op; + ppc_inst_t op; unsigned long ptr; - struct ppc_inst instr; + ppc_inst_t instr; static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS]; /* Is this a known long jump tramp? */ @@ -396,7 +367,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) { unsigned long tramp, ip = rec->ip; - struct ppc_inst op; + ppc_inst_t op; /* Read where this goes */ if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { @@ -436,7 +407,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned long ip = rec->ip; - struct ppc_inst old, new; + ppc_inst_t old, new; /* * If the calling address is more that 24 bits away, @@ -489,7 +460,7 @@ int ftrace_make_nop(struct module *mod, */ #ifndef CONFIG_MPROFILE_KERNEL static int -expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) +expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1) { /* * We expect to see: @@ -507,7 +478,7 @@ expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) } #else static int -expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) +expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1) { /* look for patched "NOP" on ppc64 with -mprofile-kernel */ if (!ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()))) @@ -519,8 +490,8 @@ expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - struct ppc_inst op[2]; - struct ppc_inst instr; + ppc_inst_t op[2]; + ppc_inst_t instr; void *ip = (void *)rec->ip; unsigned long entry, ptr, tramp; struct module *mod = rec->arch.mod; @@ -588,8 +559,10 @@ static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { int err; - struct ppc_inst op; + ppc_inst_t op; u32 *ip = (u32 *)rec->ip; + struct module *mod = rec->arch.mod; + unsigned long tramp; /* read where this goes */ if (copy_inst_from_kernel_nofault(&op, ip)) @@ -602,13 +575,23 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } /* If we never set up a trampoline to ftrace_caller, then bail */ - if (!rec->arch.mod->arch.tramp) { +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + if (!mod->arch.tramp || !mod->arch.tramp_regs) { +#else + if (!mod->arch.tramp) { +#endif pr_err("No ftrace trampoline\n"); return -EINVAL; } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + if (rec->flags & FTRACE_FL_REGS) + tramp = mod->arch.tramp_regs; + else +#endif + tramp = mod->arch.tramp; /* create the branch to the trampoline */ - err = create_branch(&op, ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK); + err = create_branch(&op, ip, tramp, BRANCH_SET_LINK); if (err) { pr_err("REL24 out of range!\n"); return -EINVAL; @@ -626,7 +609,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) { - struct ppc_inst op; + ppc_inst_t op; void *ip = (void *)rec->ip; unsigned long tramp, entry, ptr; @@ -674,7 +657,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long ip = rec->ip; - struct ppc_inst old, new; + ppc_inst_t old, new; /* * If the calling address is more that 24 bits away, @@ -713,7 +696,7 @@ static int __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { - struct ppc_inst op; + ppc_inst_t op; unsigned long ip = rec->ip; unsigned long entry, ptr, tramp; struct module *mod = rec->arch.mod; @@ -807,7 +790,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { unsigned long ip = rec->ip; - struct ppc_inst old, new; + ppc_inst_t old, new; /* * If the calling address is more that 24 bits away, @@ -847,7 +830,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); - struct ppc_inst old, new; + ppc_inst_t old, new; int ret; old = ppc_inst_read((u32 *)&ftrace_call); @@ -932,7 +915,7 @@ int ftrace_enable_ftrace_graph_caller(void) unsigned long ip = (unsigned long)(&ftrace_graph_call); unsigned long addr = (unsigned long)(&ftrace_graph_caller); unsigned long stub = (unsigned long)(&ftrace_graph_stub); - struct ppc_inst old, new; + ppc_inst_t old, new; old = ftrace_call_replace(ip, stub, 0); new = ftrace_call_replace(ip, addr, 0); @@ -945,7 +928,7 @@ int ftrace_disable_ftrace_graph_caller(void) unsigned long ip = (unsigned long)(&ftrace_graph_call); unsigned long addr = (unsigned long)(&ftrace_graph_caller); unsigned long stub = (unsigned long)(&ftrace_graph_stub); - struct ppc_inst old, new; + ppc_inst_t old, new; old = ftrace_call_replace(ip, addr, 0); new = ftrace_call_replace(ip, stub, 0); diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S index e023ae59c429..0a02c0cb12d9 100644 --- a/arch/powerpc/kernel/trace/ftrace_32.S +++ b/arch/powerpc/kernel/trace/ftrace_32.S @@ -9,55 +9,135 @@ #include <asm/asm-offsets.h> #include <asm/ftrace.h> #include <asm/export.h> +#include <asm/ptrace.h> _GLOBAL(mcount) _GLOBAL(_mcount) /* * It is required that _mcount on PPC32 must preserve the - * link register. But we have r0 to play with. We use r0 + * link register. But we have r12 to play with. We use r12 * to push the return address back to the caller of mcount * into the ctr register, restore the link register and * then jump back using the ctr register. */ - mflr r0 - mtctr r0 - lwz r0, 4(r1) + mflr r12 + mtctr r12 mtlr r0 bctr +EXPORT_SYMBOL(_mcount) _GLOBAL(ftrace_caller) MCOUNT_SAVE_FRAME /* r3 ends up with link register */ subi r3, r3, MCOUNT_INSN_SIZE + lis r5,function_trace_op@ha + lwz r5,function_trace_op@l(r5) + li r6, 0 .globl ftrace_call ftrace_call: bl ftrace_stub nop + MCOUNT_RESTORE_FRAME +ftrace_caller_common: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_call ftrace_graph_call: b ftrace_graph_stub _GLOBAL(ftrace_graph_stub) #endif - MCOUNT_RESTORE_FRAME /* old link register ends up in ctr reg */ bctr -EXPORT_SYMBOL(_mcount) _GLOBAL(ftrace_stub) blr +_GLOBAL(ftrace_regs_caller) + /* Save the original return address in A's stack frame */ + stw r0,LRSAVE(r1) + + /* Create our stack frame + pt_regs */ + stwu r1,-INT_FRAME_SIZE(r1) + + /* Save all gprs to pt_regs */ + stw r0, GPR0(r1) + stmw r2, GPR2(r1) + + /* Save previous stack pointer (r1) */ + addi r8, r1, INT_FRAME_SIZE + stw r8, GPR1(r1) + + /* Load special regs for save below */ + mfmsr r8 + mfctr r9 + mfxer r10 + mfcr r11 + + /* Get the _mcount() call site out of LR */ + mflr r7 + /* Save it as pt_regs->nip */ + stw r7, _NIP(r1) + /* Save the read LR in pt_regs->link */ + stw r0, _LINK(r1) + + lis r3,function_trace_op@ha + lwz r5,function_trace_op@l(r3) + + /* Calculate ip from nip-4 into r3 for call below */ + subi r3, r7, MCOUNT_INSN_SIZE + + /* Put the original return address in r4 as parent_ip */ + mr r4, r0 + + /* Save special regs */ + stw r8, _MSR(r1) + stw r9, _CTR(r1) + stw r10, _XER(r1) + stw r11, _CCR(r1) + + /* Load &pt_regs in r6 for call below */ + addi r6, r1, STACK_FRAME_OVERHEAD + + /* ftrace_call(r3, r4, r5, r6) */ +.globl ftrace_regs_call +ftrace_regs_call: + bl ftrace_stub + nop + + /* Load ctr with the possibly modified NIP */ + lwz r3, _NIP(r1) + mtctr r3 + + /* Restore gprs */ + lmw r2, GPR2(r1) + + /* Restore possibly modified LR */ + lwz r0, _LINK(r1) + mtlr r0 + + /* Pop our stack frame */ + addi r1, r1, INT_FRAME_SIZE + + b ftrace_caller_common + #ifdef CONFIG_FUNCTION_GRAPH_TRACER _GLOBAL(ftrace_graph_caller) + stwu r1,-48(r1) + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + stw r7, 28(r1) + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + addi r5, r1, 48 - /* load r4 with local address */ - lwz r4, 44(r1) + mfctr r4 /* ftrace_caller has moved local addr here */ + stw r4, 44(r1) + mflr r3 /* ftrace_caller has restored LR from stack */ subi r4, r4, MCOUNT_INSN_SIZE - /* Grab the LR out of the caller stack frame */ - lwz r3,52(r1) - bl prepare_ftrace_return nop @@ -66,9 +146,21 @@ _GLOBAL(ftrace_graph_caller) * Change the LR in the callers stack frame to this. */ stw r3,52(r1) + mtlr r3 + lwz r0,44(r1) + mtctr r0 + + lwz r3, 12(r1) + lwz r4, 16(r1) + lwz r5, 20(r1) + lwz r6, 24(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + lwz r9, 36(r1) + lwz r10,40(r1) + + addi r1, r1, 48 - MCOUNT_RESTORE_FRAME - /* old link register ends up in ctr reg */ bctr _GLOBAL(return_to_handler) diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index f9fd5f743eba..d636fc755f60 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -41,15 +41,14 @@ _GLOBAL(ftrace_regs_caller) /* Save all gprs to pt_regs */ SAVE_GPR(0, r1) - SAVE_10GPRS(2, r1) + SAVE_GPRS(2, 11, r1) /* Ok to continue? */ lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 beq ftrace_no_trace - SAVE_10GPRS(12, r1) - SAVE_10GPRS(22, r1) + SAVE_GPRS(12, 31, r1) /* Save previous stack pointer (r1) */ addi r8, r1, SWITCH_FRAME_SIZE @@ -108,10 +107,8 @@ ftrace_regs_call: #endif /* Restore gprs */ - REST_GPR(0,r1) - REST_10GPRS(2,r1) - REST_10GPRS(12,r1) - REST_10GPRS(22,r1) + REST_GPR(0, r1) + REST_GPRS(2, 31, r1) /* Restore possibly modified LR */ ld r0, _LINK(r1) @@ -157,7 +154,7 @@ _GLOBAL(ftrace_caller) stdu r1, -SWITCH_FRAME_SIZE(r1) /* Save all gprs to pt_regs */ - SAVE_8GPRS(3, r1) + SAVE_GPRS(3, 10, r1) lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 @@ -194,7 +191,7 @@ ftrace_call: mtctr r3 /* Restore gprs */ - REST_8GPRS(3,r1) + REST_GPRS(3, 10, r1) /* Restore callee's TOC */ ld r2, 24(r1) diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 8513aa49614e..d3942de254c6 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -84,7 +84,7 @@ static int udbg_uart_getc(void) return udbg_uart_in(UART_RBR); } -static void udbg_use_uart(void) +static void __init udbg_use_uart(void) { udbg_putc = udbg_uart_putc; udbg_flush = udbg_uart_flush; @@ -92,7 +92,7 @@ static void udbg_use_uart(void) udbg_getc_poll = udbg_uart_getc_poll; } -void udbg_uart_setup(unsigned int speed, unsigned int clock) +void __init udbg_uart_setup(unsigned int speed, unsigned int clock) { unsigned int dll, base_bauds; @@ -121,7 +121,7 @@ void udbg_uart_setup(unsigned int speed, unsigned int clock) udbg_uart_out(UART_FCR, 0x7); } -unsigned int udbg_probe_uart_speed(unsigned int clock) +unsigned int __init udbg_probe_uart_speed(unsigned int clock) { unsigned int dll, dlm, divisor, prescaler, speed; u8 old_lcr; @@ -172,7 +172,7 @@ static void udbg_uart_out_pio(unsigned int reg, u8 data) outb(data, udbg_uart.pio_base + (reg * udbg_uart_stride)); } -void udbg_uart_init_pio(unsigned long port, unsigned int stride) +void __init udbg_uart_init_pio(unsigned long port, unsigned int stride) { if (!port) return; @@ -194,7 +194,7 @@ static void udbg_uart_out_mmio(unsigned int reg, u8 data) } -void udbg_uart_init_mmio(void __iomem *addr, unsigned int stride) +void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride) { if (!addr) return; diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c index ae632569446f..fd9432875ebc 100644 --- a/arch/powerpc/kernel/vecemu.c +++ b/arch/powerpc/kernel/vecemu.c @@ -261,7 +261,7 @@ static unsigned int rfin(unsigned int x) int emulate_altivec(struct pt_regs *regs) { - struct ppc_inst instr; + ppc_inst_t instr; unsigned int i, word; unsigned int va, vb, vc, vd; vector128 *vrs; diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index ba03eedfdcd8..5cc24d8cce94 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -47,6 +47,10 @@ EXPORT_SYMBOL(store_vr_state) */ _GLOBAL(load_up_altivec) mfmsr r5 /* grab the current MSR */ +#ifdef CONFIG_PPC_BOOK3S_64 + /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ + ori r5,r5,MSR_RI +#endif oris r5,r5,MSR_VEC@h MTMSRD(r5) /* enable use of AltiVec now */ isync @@ -126,6 +130,12 @@ _GLOBAL(load_up_vsx) andis. r5,r12,MSR_VEC@h beql+ load_up_altivec /* skip if already loaded */ +#ifdef CONFIG_PPC_BOOK3S_64 + /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ + li r5,MSR_RI + mtmsrd r5,1 +#endif + ld r4,PACACURRENT(r13) addi r4,r4,THREAD /* Get THREAD */ li r6,1 diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 18e42c74abdd..2bcca818136a 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -322,10 +322,6 @@ SECTIONS #ifdef CONFIG_PPC32 .data : AT(ADDR(.data) - LOAD_OFFSET) { DATA_DATA -#ifdef CONFIG_UBSAN - *(.data..Lubsan_data*) - *(.data..Lubsan_type*) -#endif *(.data.rel*) *(SDATA_MAIN) *(.sdata2) @@ -336,24 +332,18 @@ SECTIONS #else .data : AT(ADDR(.data) - LOAD_OFFSET) { DATA_DATA -#ifdef CONFIG_UBSAN - *(.data..Lubsan_data*) - *(.data..Lubsan_type*) -#endif *(.data.rel*) *(.toc1) *(.branch_lt) } - . = ALIGN(256); - .got : AT(ADDR(.got) - LOAD_OFFSET) { - __toc_start = .; + .got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) { + *(.got) #ifndef CONFIG_RELOCATABLE __prom_init_toc_start = .; - arch/powerpc/kernel/prom_init.o*(.toc .got) + arch/powerpc/kernel/prom_init.o*(.toc) __prom_init_toc_end = .; #endif - *(.got) *(.toc) } #endif diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 3fa6d240bade..bfc27496fe7e 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -85,10 +85,37 @@ static DEFINE_PER_CPU(u64, wd_timer_tb); /* SMP checker bits */ static unsigned long __wd_smp_lock; +static unsigned long __wd_reporting; +static unsigned long __wd_nmi_output; static cpumask_t wd_smp_cpus_pending; static cpumask_t wd_smp_cpus_stuck; static u64 wd_smp_last_reset_tb; +/* + * Try to take the exclusive watchdog action / NMI IPI / printing lock. + * wd_smp_lock must be held. If this fails, we should return and wait + * for the watchdog to kick in again (or another CPU to trigger it). + * + * Importantly, if hardlockup_panic is set, wd_try_report failure should + * not delay the panic, because whichever other CPU is reporting will + * call panic. + */ +static bool wd_try_report(void) +{ + if (__wd_reporting) + return false; + __wd_reporting = 1; + return true; +} + +/* End printing after successful wd_try_report. wd_smp_lock not required. */ +static void wd_end_reporting(void) +{ + smp_mb(); /* End printing "critical section" */ + WARN_ON_ONCE(__wd_reporting == 0); + WRITE_ONCE(__wd_reporting, 0); +} + static inline void wd_smp_lock(unsigned long *flags) { /* @@ -128,109 +155,182 @@ static void wd_lockup_ipi(struct pt_regs *regs) else dump_stack(); + /* + * __wd_nmi_output must be set after we printk from NMI context. + * + * printk from NMI context defers printing to the console to irq_work. + * If that NMI was taken in some code that is hard-locked, then irqs + * are disabled so irq_work will never fire. That can result in the + * hard lockup messages being delayed (indefinitely, until something + * else kicks the console drivers). + * + * Setting __wd_nmi_output will cause another CPU to notice and kick + * the console drivers for us. + * + * xchg is not needed here (it could be a smp_mb and store), but xchg + * gives the memory ordering and atomicity required. + */ + xchg(&__wd_nmi_output, 1); + /* Do not panic from here because that can recurse into NMI IPI layer */ } -static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb) +static bool set_cpu_stuck(int cpu) { - cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask); - cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask); + cpumask_set_cpu(cpu, &wd_smp_cpus_stuck); + cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); + /* + * See wd_smp_clear_cpu_pending() + */ + smp_mb(); if (cpumask_empty(&wd_smp_cpus_pending)) { - wd_smp_last_reset_tb = tb; + wd_smp_last_reset_tb = get_tb(); cpumask_andnot(&wd_smp_cpus_pending, &wd_cpus_enabled, &wd_smp_cpus_stuck); + return true; } -} -static void set_cpu_stuck(int cpu, u64 tb) -{ - set_cpumask_stuck(cpumask_of(cpu), tb); + return false; } -static void watchdog_smp_panic(int cpu, u64 tb) +static void watchdog_smp_panic(int cpu) { + static cpumask_t wd_smp_cpus_ipi; // protected by reporting unsigned long flags; + u64 tb, last_reset; int c; wd_smp_lock(&flags); /* Double check some things under lock */ - if ((s64)(tb - wd_smp_last_reset_tb) < (s64)wd_smp_panic_timeout_tb) + tb = get_tb(); + last_reset = wd_smp_last_reset_tb; + if ((s64)(tb - last_reset) < (s64)wd_smp_panic_timeout_tb) goto out; if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) goto out; - if (cpumask_weight(&wd_smp_cpus_pending) == 0) + if (!wd_try_report()) + goto out; + for_each_online_cpu(c) { + if (!cpumask_test_cpu(c, &wd_smp_cpus_pending)) + continue; + if (c == cpu) + continue; // should not happen + + __cpumask_set_cpu(c, &wd_smp_cpus_ipi); + if (set_cpu_stuck(c)) + break; + } + if (cpumask_empty(&wd_smp_cpus_ipi)) { + wd_end_reporting(); goto out; + } + wd_smp_unlock(&flags); pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n", - cpu, cpumask_pr_args(&wd_smp_cpus_pending)); + cpu, cpumask_pr_args(&wd_smp_cpus_ipi)); pr_emerg("CPU %d TB:%lld, last SMP heartbeat TB:%lld (%lldms ago)\n", - cpu, tb, wd_smp_last_reset_tb, - tb_to_ns(tb - wd_smp_last_reset_tb) / 1000000); + cpu, tb, last_reset, tb_to_ns(tb - last_reset) / 1000000); if (!sysctl_hardlockup_all_cpu_backtrace) { /* * Try to trigger the stuck CPUs, unless we are going to * get a backtrace on all of them anyway. */ - for_each_cpu(c, &wd_smp_cpus_pending) { - if (c == cpu) - continue; + for_each_cpu(c, &wd_smp_cpus_ipi) { smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000); + __cpumask_clear_cpu(c, &wd_smp_cpus_ipi); } - } - - /* Take the stuck CPUs out of the watch group */ - set_cpumask_stuck(&wd_smp_cpus_pending, tb); - - wd_smp_unlock(&flags); - - if (sysctl_hardlockup_all_cpu_backtrace) + } else { trigger_allbutself_cpu_backtrace(); - - /* - * Force flush any remote buffers that might be stuck in IRQ context - * and therefore could not run their irq_work. - */ - printk_trigger_flush(); + cpumask_clear(&wd_smp_cpus_ipi); + } if (hardlockup_panic) nmi_panic(NULL, "Hard LOCKUP"); + wd_end_reporting(); + return; out: wd_smp_unlock(&flags); } -static void wd_smp_clear_cpu_pending(int cpu, u64 tb) +static void wd_smp_clear_cpu_pending(int cpu) { if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) { if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) { struct pt_regs *regs = get_irq_regs(); unsigned long flags; - wd_smp_lock(&flags); - pr_emerg("CPU %d became unstuck TB:%lld\n", - cpu, tb); + cpu, get_tb()); print_irqtrace_events(current); if (regs) show_regs(regs); else dump_stack(); + wd_smp_lock(&flags); cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck); wd_smp_unlock(&flags); + } else { + /* + * The last CPU to clear pending should have reset the + * watchdog so we generally should not find it empty + * here if our CPU was clear. However it could happen + * due to a rare race with another CPU taking the + * last CPU out of the mask concurrently. + * + * We can't add a warning for it. But just in case + * there is a problem with the watchdog that is causing + * the mask to not be reset, try to kick it along here. + */ + if (unlikely(cpumask_empty(&wd_smp_cpus_pending))) + goto none_pending; } return; } + + /* + * All other updates to wd_smp_cpus_pending are performed under + * wd_smp_lock. All of them are atomic except the case where the + * mask becomes empty and is reset. This will not happen here because + * cpu was tested to be in the bitmap (above), and a CPU only clears + * its own bit. _Except_ in the case where another CPU has detected a + * hard lockup on our CPU and takes us out of the pending mask. So in + * normal operation there will be no race here, no problem. + * + * In the lockup case, this atomic clear-bit vs a store that refills + * other bits in the accessed word wll not be a problem. The bit clear + * is atomic so it will not cause the store to get lost, and the store + * will never set this bit so it will not overwrite the bit clear. The + * only way for a stuck CPU to return to the pending bitmap is to + * become unstuck itself. + */ cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); + + /* + * Order the store to clear pending with the load(s) to check all + * words in the pending mask to check they are all empty. This orders + * with the same barrier on another CPU. This prevents two CPUs + * clearing the last 2 pending bits, but neither seeing the other's + * store when checking if the mask is empty, and missing an empty + * mask, which ends with a false positive. + */ + smp_mb(); if (cpumask_empty(&wd_smp_cpus_pending)) { unsigned long flags; +none_pending: + /* + * Double check under lock because more than one CPU could see + * a clear mask with the lockless check after clearing their + * pending bits. + */ wd_smp_lock(&flags); if (cpumask_empty(&wd_smp_cpus_pending)) { - wd_smp_last_reset_tb = tb; + wd_smp_last_reset_tb = get_tb(); cpumask_andnot(&wd_smp_cpus_pending, &wd_cpus_enabled, &wd_smp_cpus_stuck); @@ -245,10 +345,21 @@ static void watchdog_timer_interrupt(int cpu) per_cpu(wd_timer_tb, cpu) = tb; - wd_smp_clear_cpu_pending(cpu, tb); + wd_smp_clear_cpu_pending(cpu); if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb) - watchdog_smp_panic(cpu, tb); + watchdog_smp_panic(cpu); + + if (__wd_nmi_output && xchg(&__wd_nmi_output, 0)) { + /* + * Something has called printk from NMI context. It might be + * stuck, so this this triggers a flush that will get that + * printk output to the console. + * + * See wd_lockup_ipi. + */ + printk_trigger_flush(); + } } DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) @@ -267,12 +378,27 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) tb = get_tb(); if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) { + /* + * Taking wd_smp_lock here means it is a soft-NMI lock, which + * means we can't take any regular or irqsafe spin locks while + * holding this lock. This is why timers can't printk while + * holding the lock. + */ wd_smp_lock(&flags); if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) { wd_smp_unlock(&flags); return 0; } - set_cpu_stuck(cpu, tb); + if (!wd_try_report()) { + wd_smp_unlock(&flags); + /* Couldn't report, try again in 100ms */ + mtspr(SPRN_DEC, 100 * tb_ticks_per_usec * 1000); + return 0; + } + + set_cpu_stuck(cpu); + + wd_smp_unlock(&flags); pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n", cpu, (void *)regs->nip); @@ -283,14 +409,21 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) print_irqtrace_events(current); show_regs(regs); - wd_smp_unlock(&flags); + xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi if (sysctl_hardlockup_all_cpu_backtrace) trigger_allbutself_cpu_backtrace(); if (hardlockup_panic) nmi_panic(regs, "Hard LOCKUP"); + + wd_end_reporting(); } + /* + * We are okay to change DEC in soft_nmi_interrupt because the masked + * handler has marked a DEC as pending, so the timer interrupt will be + * replayed as soon as local irqs are enabled again. + */ if (wd_panic_timeout_tb < 0x7fffffff) mtspr(SPRN_DEC, wd_panic_timeout_tb); @@ -318,11 +451,15 @@ void arch_touch_nmi_watchdog(void) { unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000; int cpu = smp_processor_id(); - u64 tb = get_tb(); + u64 tb; + if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) + return; + + tb = get_tb(); if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) { per_cpu(wd_timer_tb, cpu) = tb; - wd_smp_clear_cpu_pending(cpu, tb); + wd_smp_clear_cpu_pending(cpu); } } EXPORT_SYMBOL(arch_touch_nmi_watchdog); @@ -380,7 +517,7 @@ static void stop_watchdog(void *arg) cpumask_clear_cpu(cpu, &wd_cpus_enabled); wd_smp_unlock(&flags); - wd_smp_clear_cpu_pending(cpu, get_tb()); + wd_smp_clear_cpu_pending(cpu); } static int stop_watchdog_on_cpu(unsigned int cpu) diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index a2242017e55f..8b68d9f91a03 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -185,7 +185,7 @@ void __init reserve_crashkernel(void) } } -int overlaps_crashkernel(unsigned long start, unsigned long size) +int __init overlaps_crashkernel(unsigned long start, unsigned long size) { return (start + size) > crashk_res.start && start <= crashk_res.end; } diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index 66678518b938..635b5fc30b53 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -378,7 +378,7 @@ void default_machine_kexec(struct kimage *image) /* NOTREACHED */ } -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU /* Values we need to export to the second kernel via the device tree. */ static unsigned long htab_base; static unsigned long htab_size; @@ -420,4 +420,4 @@ static int __init export_htab_values(void) return 0; } late_initcall(export_htab_values); -#endif /* CONFIG_PPC_BOOK3S_64 */ +#endif /* CONFIG_PPC_64S_HASH_MMU */ diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c index 6b81c852feab..563e9989a5bf 100644 --- a/arch/powerpc/kexec/ranges.c +++ b/arch/powerpc/kexec/ranges.c @@ -296,7 +296,7 @@ int add_initrd_mem_range(struct crash_mem **mem_ranges) return ret; } -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU /** * add_htab_mem_range - Adds htab range to the given memory ranges list, * if it exists diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index ff581d70f20c..f947b77386a9 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -69,6 +69,7 @@ config KVM_BOOK3S_64 select KVM_BOOK3S_64_HANDLER select KVM select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE + select PPC_64S_HASH_MMU select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_PSERIES || PPC_POWERNV) help Support running unmodified book3s_64 and book3s_32 guest kernels @@ -130,6 +131,21 @@ config KVM_BOOK3S_HV_EXIT_TIMING If unsure, say N. +config KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND + bool "Nested L0 host workaround for L1 KVM host PMU handling bug" if EXPERT + depends on KVM_BOOK3S_HV_POSSIBLE + default !EXPERT + help + Old nested HV capable Linux guests have a bug where they don't + reflect the PMU in-use status of their L2 guest to the L0 host + while the L2 PMU registers are live. This can result in loss + of L2 PMU register state, causing perf to not work correctly in + L2 guests. + + Selecting this option for the L0 host implements a workaround for + those buggy L1s which saves the L2 state, at the cost of performance + in all nested-capable guest entry/exit. + config KVM_BOOKE_HV bool diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S index 983b8c18bc31..05e003eb5d90 100644 --- a/arch/powerpc/kvm/book3s_64_entry.S +++ b/arch/powerpc/kvm/book3s_64_entry.S @@ -374,11 +374,16 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) BEGIN_FTR_SECTION mtspr SPRN_DAWRX1,r10 END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) - mtspr SPRN_PID,r10 /* - * Switch to host MMU mode + * Switch to host MMU mode (don't have the real host PID but we aren't + * going back to userspace). */ + hwsync + isync + + mtspr SPRN_PID,r10 + ld r10, HSTATE_KVM_VCPU(r13) ld r10, VCPU_KVM(r10) lwz r10, KVM_HOST_LPID(r10) @@ -389,6 +394,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) ld r10, KVM_HOST_LPCR(r10) mtspr SPRN_LPCR,r10 + isync + /* * Set GUEST_MODE_NONE so the handler won't branch to KVM, and clear * MSR_RI in r12 ([H]SRR1) so the handler won't try to return. diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 16359525a40f..8cebe5542256 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -57,6 +57,8 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, preempt_disable(); + asm volatile("hwsync" ::: "memory"); + isync(); /* switch the lpid first to avoid running host with unallocated pid */ old_lpid = mfspr(SPRN_LPID); if (old_lpid != lpid) @@ -75,6 +77,8 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, ret = __copy_to_user_inatomic((void __user *)to, from, n); pagefault_enable(); + asm volatile("hwsync" ::: "memory"); + isync(); /* switch the pid first to avoid running host with unallocated pid */ if (quadrant == 1 && pid != old_pid) mtspr(SPRN_PID, old_pid); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7b74fc0a986b..f64e45d6c0f4 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -80,6 +80,7 @@ #include <asm/plpar_wrappers.h> #include "book3s.h" +#include "book3s_hv.h" #define CREATE_TRACE_POINTS #include "trace_hv.h" @@ -127,11 +128,6 @@ static bool nested = true; module_param(nested, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(nested, "Enable nested virtualization (only on POWER9)"); -static inline bool nesting_enabled(struct kvm *kvm) -{ - return kvm->arch.nested_enable && kvm_is_radix(kvm); -} - static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); /* @@ -276,22 +272,26 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) * they should never fail.) */ -static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc) +static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc, u64 tb) { unsigned long flags; + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + spin_lock_irqsave(&vc->stoltb_lock, flags); - vc->preempt_tb = mftb(); + vc->preempt_tb = tb; spin_unlock_irqrestore(&vc->stoltb_lock, flags); } -static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc) +static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc, u64 tb) { unsigned long flags; + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + spin_lock_irqsave(&vc->stoltb_lock, flags); if (vc->preempt_tb != TB_NIL) { - vc->stolen_tb += mftb() - vc->preempt_tb; + vc->stolen_tb += tb - vc->preempt_tb; vc->preempt_tb = TB_NIL; } spin_unlock_irqrestore(&vc->stoltb_lock, flags); @@ -301,6 +301,12 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) { struct kvmppc_vcore *vc = vcpu->arch.vcore; unsigned long flags; + u64 now; + + if (cpu_has_feature(CPU_FTR_ARCH_300)) + return; + + now = mftb(); /* * We can test vc->runner without taking the vcore lock, @@ -309,12 +315,12 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) * ever sets it to NULL. */ if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING) - kvmppc_core_end_stolen(vc); + kvmppc_core_end_stolen(vc, now); spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && vcpu->arch.busy_preempt != TB_NIL) { - vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt; + vcpu->arch.busy_stolen += now - vcpu->arch.busy_preempt; vcpu->arch.busy_preempt = TB_NIL; } spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); @@ -324,13 +330,19 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) { struct kvmppc_vcore *vc = vcpu->arch.vcore; unsigned long flags; + u64 now; + + if (cpu_has_feature(CPU_FTR_ARCH_300)) + return; + + now = mftb(); if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING) - kvmppc_core_start_stolen(vc); + kvmppc_core_start_stolen(vc, now); spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) - vcpu->arch.busy_preempt = mftb(); + vcpu->arch.busy_preempt = now; spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); } @@ -675,6 +687,8 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) u64 p; unsigned long flags; + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + spin_lock_irqsave(&vc->stoltb_lock, flags); p = vc->stolen_tb; if (vc->vcore_state != VCORE_INACTIVE && @@ -684,35 +698,30 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) return p; } -static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, - struct kvmppc_vcore *vc) +static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, + unsigned int pcpu, u64 now, + unsigned long stolen) { struct dtl_entry *dt; struct lppaca *vpa; - unsigned long stolen; - unsigned long core_stolen; - u64 now; - unsigned long flags; dt = vcpu->arch.dtl_ptr; vpa = vcpu->arch.vpa.pinned_addr; - now = mftb(); - core_stolen = vcore_stolen_time(vc, now); - stolen = core_stolen - vcpu->arch.stolen_logged; - vcpu->arch.stolen_logged = core_stolen; - spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); - stolen += vcpu->arch.busy_stolen; - vcpu->arch.busy_stolen = 0; - spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); + if (!dt || !vpa) return; - memset(dt, 0, sizeof(struct dtl_entry)); + dt->dispatch_reason = 7; - dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid); - dt->timebase = cpu_to_be64(now + vc->tb_offset); + dt->preempt_reason = 0; + dt->processor_id = cpu_to_be16(pcpu + vcpu->arch.ptid); dt->enqueue_to_dispatch_time = cpu_to_be32(stolen); + dt->ready_to_enqueue_time = 0; + dt->waiting_to_ready_time = 0; + dt->timebase = cpu_to_be64(now); + dt->fault_addr = 0; dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu)); dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr); + ++dt; if (dt == vcpu->arch.dtl.pinned_end) dt = vcpu->arch.dtl.pinned_addr; @@ -723,6 +732,27 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, vcpu->arch.dtl.dirty = true; } +static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, + struct kvmppc_vcore *vc) +{ + unsigned long stolen; + unsigned long core_stolen; + u64 now; + unsigned long flags; + + now = mftb(); + + core_stolen = vcore_stolen_time(vc, now); + stolen = core_stolen - vcpu->arch.stolen_logged; + vcpu->arch.stolen_logged = core_stolen; + spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); + stolen += vcpu->arch.busy_stolen; + vcpu->arch.busy_stolen = 0; + spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); + + __kvmppc_create_dtl_entry(vcpu, vc->pcpu, now + vc->tb_offset, stolen); +} + /* See if there is a doorbell interrupt pending for a vcpu */ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu) { @@ -731,6 +761,8 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu) if (vcpu->arch.doorbell_request) return true; + if (cpu_has_feature(CPU_FTR_ARCH_300)) + return false; /* * Ensure that the read of vcore->dpdes comes after the read * of vcpu->doorbell_request. This barrier matches the @@ -900,13 +932,14 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target) * mode handler is not called but no other threads are in the * source vcore. */ - - spin_lock(&vcore->lock); - if (target->arch.state == KVMPPC_VCPU_RUNNABLE && - vcore->vcore_state != VCORE_INACTIVE && - vcore->runner) - target = vcore->runner; - spin_unlock(&vcore->lock); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + spin_lock(&vcore->lock); + if (target->arch.state == KVMPPC_VCPU_RUNNABLE && + vcore->vcore_state != VCORE_INACTIVE && + vcore->runner) + target = vcore->runner; + spin_unlock(&vcore->lock); + } return kvm_vcpu_yield_to(target); } @@ -1421,6 +1454,43 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +/* + * If the lppaca had pmcregs_in_use clear when we exited the guest, then + * HFSCR_PM is cleared for next entry. If the guest then tries to access + * the PMU SPRs, we get this facility unavailable interrupt. Putting HFSCR_PM + * back in the guest HFSCR will cause the next entry to load the PMU SPRs and + * allow the guest access to continue. + */ +static int kvmppc_pmu_unavailable(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.hfscr_permitted & HFSCR_PM)) + return EMULATE_FAIL; + + vcpu->arch.hfscr |= HFSCR_PM; + + return RESUME_GUEST; +} + +static int kvmppc_ebb_unavailable(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.hfscr_permitted & HFSCR_EBB)) + return EMULATE_FAIL; + + vcpu->arch.hfscr |= HFSCR_EBB; + + return RESUME_GUEST; +} + +static int kvmppc_tm_unavailable(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.hfscr_permitted & HFSCR_TM)) + return EMULATE_FAIL; + + vcpu->arch.hfscr |= HFSCR_TM; + + return RESUME_GUEST; +} + static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, struct task_struct *tsk) { @@ -1451,6 +1521,10 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, run->ready_for_interrupt_injection = 1; switch (vcpu->arch.trap) { /* We're good on these - the host merely wanted to get our attention */ + case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER: + WARN_ON_ONCE(1); /* Should never happen */ + vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER; + fallthrough; case BOOK3S_INTERRUPT_HV_DECREMENTER: vcpu->stat.dec_exits++; r = RESUME_GUEST; @@ -1575,7 +1649,8 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, unsigned long vsid; long err; - if (vcpu->arch.fault_dsisr == HDSISR_CANARY) { + if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) && + unlikely(vcpu->arch.fault_dsisr == HDSISR_CANARY)) { r = RESUME_GUEST; /* Just retry if it's the canary */ break; } @@ -1702,16 +1777,26 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, * to emulate. * Otherwise, we just generate a program interrupt to the guest. */ - case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: + case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: { + u64 cause = vcpu->arch.hfscr >> 56; + r = EMULATE_FAIL; - if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) && - cpu_has_feature(CPU_FTR_ARCH_300)) - r = kvmppc_emulate_doorbell_instr(vcpu); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (cause == FSCR_MSGP_LG) + r = kvmppc_emulate_doorbell_instr(vcpu); + if (cause == FSCR_PM_LG) + r = kvmppc_pmu_unavailable(vcpu); + if (cause == FSCR_EBB_LG) + r = kvmppc_ebb_unavailable(vcpu); + if (cause == FSCR_TM_LG) + r = kvmppc_tm_unavailable(vcpu); + } if (r == EMULATE_FAIL) { kvmppc_core_queue_program(vcpu, SRR1_PROGILL); r = RESUME_GUEST; } break; + } case BOOK3S_INTERRUPT_HV_RM_HARD: r = RESUME_PASSTHROUGH; @@ -1768,6 +1853,12 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) vcpu->stat.ext_intr_exits++; r = RESUME_GUEST; break; + /* These need to go to the nested HV */ + case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER: + vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER; + vcpu->stat.dec_exits++; + r = RESUME_HOST; + break; /* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/ case BOOK3S_INTERRUPT_HMI: case BOOK3S_INTERRUPT_PERFMON: @@ -2096,8 +2187,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, * either vcore->dpdes or doorbell_request. * On POWER8, doorbell_request is 0. */ - *val = get_reg_val(id, vcpu->arch.vcore->dpdes | - vcpu->arch.doorbell_request); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + *val = get_reg_val(id, vcpu->arch.doorbell_request); + else + *val = get_reg_val(id, vcpu->arch.vcore->dpdes); break; case KVM_REG_PPC_VTB: *val = get_reg_val(id, vcpu->arch.vcore->vtb); @@ -2238,8 +2331,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.vcore->arch_compat); break; case KVM_REG_PPC_DEC_EXPIRY: - *val = get_reg_val(id, vcpu->arch.dec_expires + - vcpu->arch.vcore->tb_offset); + *val = get_reg_val(id, vcpu->arch.dec_expires); break; case KVM_REG_PPC_ONLINE: *val = get_reg_val(id, vcpu->arch.online); @@ -2335,7 +2427,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, vcpu->arch.pspb = set_reg_val(id, *val); break; case KVM_REG_PPC_DPDES: - vcpu->arch.vcore->dpdes = set_reg_val(id, *val); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + vcpu->arch.doorbell_request = set_reg_val(id, *val) & 1; + else + vcpu->arch.vcore->dpdes = set_reg_val(id, *val); break; case KVM_REG_PPC_VTB: vcpu->arch.vcore->vtb = set_reg_val(id, *val); @@ -2491,8 +2586,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val)); break; case KVM_REG_PPC_DEC_EXPIRY: - vcpu->arch.dec_expires = set_reg_val(id, *val) - - vcpu->arch.vcore->tb_offset; + vcpu->arch.dec_expires = set_reg_val(id, *val); break; case KVM_REG_PPC_ONLINE: i = set_reg_val(id, *val); @@ -2715,6 +2809,11 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) #endif #endif vcpu->arch.mmcr[0] = MMCR0_FC; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + vcpu->arch.mmcr[0] |= MMCR0_PMCCEXT; + vcpu->arch.mmcra = MMCRA_BHRB_DISABLE; + } + vcpu->arch.ctrl = CTRL_RUNLATCH; /* default to host PVR, since we can't spoof it */ kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR)); @@ -2745,6 +2844,11 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) vcpu->arch.hfscr_permitted = vcpu->arch.hfscr; + /* + * PM, EBB, TM are demand-faulted so start with it clear. + */ + vcpu->arch.hfscr &= ~(HFSCR_PM | HFSCR_EBB | HFSCR_TM); + kvmppc_mmu_book3s_hv_init(vcpu); vcpu->arch.state = KVMPPC_VCPU_NOTREADY; @@ -2869,13 +2973,13 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu) unsigned long dec_nsec, now; now = get_tb(); - if (now > vcpu->arch.dec_expires) { + if (now > kvmppc_dec_expires_host_tb(vcpu)) { /* decrementer has already gone negative */ kvmppc_core_queue_dec(vcpu); kvmppc_core_prepare_to_enter(vcpu); return; } - dec_nsec = tb_to_ns(vcpu->arch.dec_expires - now); + dec_nsec = tb_to_ns(kvmppc_dec_expires_host_tb(vcpu) - now); hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL); vcpu->arch.timer_running = 1; } @@ -2883,14 +2987,14 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu) extern int __kvmppc_vcore_entry(void); static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, - struct kvm_vcpu *vcpu) + struct kvm_vcpu *vcpu, u64 tb) { u64 now; if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) return; spin_lock_irq(&vcpu->arch.tbacct_lock); - now = mftb(); + now = tb; vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) - vcpu->arch.stolen_logged; vcpu->arch.busy_preempt = now; @@ -2945,30 +3049,59 @@ static void kvmppc_release_hwthread(int cpu) tpaca->kvm_hstate.kvm_split_mode = NULL; } +static DEFINE_PER_CPU(struct kvm *, cpu_in_guest); + static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) { struct kvm_nested_guest *nested = vcpu->arch.nested; - cpumask_t *cpu_in_guest; + cpumask_t *need_tlb_flush; int i; + if (nested) + need_tlb_flush = &nested->need_tlb_flush; + else + need_tlb_flush = &kvm->arch.need_tlb_flush; + cpu = cpu_first_tlb_thread_sibling(cpu); - if (nested) { - cpumask_set_cpu(cpu, &nested->need_tlb_flush); - cpu_in_guest = &nested->cpu_in_guest; - } else { - cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush); - cpu_in_guest = &kvm->arch.cpu_in_guest; - } + for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu); + i += cpu_tlb_thread_sibling_step()) + cpumask_set_cpu(i, need_tlb_flush); + /* - * Make sure setting of bit in need_tlb_flush precedes - * testing of cpu_in_guest bits. The matching barrier on - * the other side is the first smp_mb() in kvmppc_run_core(). + * Make sure setting of bit in need_tlb_flush precedes testing of + * cpu_in_guest. The matching barrier on the other side is hwsync + * when switching to guest MMU mode, which happens between + * cpu_in_guest being set to the guest kvm, and need_tlb_flush bit + * being tested. */ smp_mb(); + for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu); - i += cpu_tlb_thread_sibling_step()) - if (cpumask_test_cpu(i, cpu_in_guest)) + i += cpu_tlb_thread_sibling_step()) { + struct kvm *running = *per_cpu_ptr(&cpu_in_guest, i); + + if (running == kvm) smp_call_function_single(i, do_nothing, NULL, 1); + } +} + +static void do_migrate_away_vcpu(void *arg) +{ + struct kvm_vcpu *vcpu = arg; + struct kvm *kvm = vcpu->kvm; + + /* + * If the guest has GTSE, it may execute tlbie, so do a eieio; tlbsync; + * ptesync sequence on the old CPU before migrating to a new one, in + * case we interrupted the guest between a tlbie ; eieio ; + * tlbsync; ptesync sequence. + * + * Otherwise, ptesync is sufficient for ordering tlbiel sequences. + */ + if (kvm->arch.lpcr & LPCR_GTSE) + asm volatile("eieio; tlbsync; ptesync"); + else + asm volatile("ptesync"); } static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) @@ -2994,14 +3127,17 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) * can move around between pcpus. To cope with this, when * a vcpu moves from one pcpu to another, we need to tell * any vcpus running on the same core as this vcpu previously - * ran to flush the TLB. The TLB is shared between threads, - * so we use a single bit in .need_tlb_flush for all 4 threads. + * ran to flush the TLB. */ if (prev_cpu != pcpu) { - if (prev_cpu >= 0 && - cpu_first_tlb_thread_sibling(prev_cpu) != - cpu_first_tlb_thread_sibling(pcpu)) - radix_flush_cpu(kvm, prev_cpu, vcpu); + if (prev_cpu >= 0) { + if (cpu_first_tlb_thread_sibling(prev_cpu) != + cpu_first_tlb_thread_sibling(pcpu)) + radix_flush_cpu(kvm, prev_cpu, vcpu); + + smp_call_function_single(prev_cpu, + do_migrate_away_vcpu, vcpu, 1); + } if (nested) nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu; else @@ -3013,7 +3149,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) { int cpu; struct paca_struct *tpaca; - struct kvm *kvm = vc->kvm; cpu = vc->pcpu; if (vcpu) { @@ -3024,7 +3159,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) cpu += vcpu->arch.ptid; vcpu->cpu = vc->pcpu; vcpu->arch.thread_cpu = cpu; - cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest); } tpaca = paca_ptrs[cpu]; tpaca->kvm_hstate.kvm_vcpu = vcpu; @@ -3125,6 +3259,8 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc) { struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores); + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + vc->vcore_state = VCORE_PREEMPT; vc->pcpu = smp_processor_id(); if (vc->num_threads < threads_per_vcore(vc->kvm)) { @@ -3134,14 +3270,16 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc) } /* Start accumulating stolen time */ - kvmppc_core_start_stolen(vc); + kvmppc_core_start_stolen(vc, mftb()); } static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc) { struct preempted_vcore_list *lp; - kvmppc_core_end_stolen(vc); + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + + kvmppc_core_end_stolen(vc, mftb()); if (!list_empty(&vc->preempt_list)) { lp = &per_cpu(preempted_vcores, vc->pcpu); spin_lock(&lp->lock); @@ -3268,7 +3406,7 @@ static void prepare_threads(struct kvmppc_vcore *vc) vcpu->arch.ret = RESUME_GUEST; else continue; - kvmppc_remove_runnable(vc, vcpu); + kvmppc_remove_runnable(vc, vcpu, mftb()); wake_up(&vcpu->arch.cpu_run); } } @@ -3287,7 +3425,7 @@ static void collect_piggybacks(struct core_info *cip, int target_threads) list_del_init(&pvc->preempt_list); if (pvc->runner == NULL) { pvc->vcore_state = VCORE_INACTIVE; - kvmppc_core_end_stolen(pvc); + kvmppc_core_end_stolen(pvc, mftb()); } spin_unlock(&pvc->lock); continue; @@ -3296,7 +3434,7 @@ static void collect_piggybacks(struct core_info *cip, int target_threads) spin_unlock(&pvc->lock); continue; } - kvmppc_core_end_stolen(pvc); + kvmppc_core_end_stolen(pvc, mftb()); pvc->vcore_state = VCORE_PIGGYBACK; if (cip->total_threads >= target_threads) break; @@ -3340,7 +3478,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) */ spin_unlock(&vc->lock); /* cancel pending dec exception if dec is positive */ - if (now < vcpu->arch.dec_expires && + if (now < kvmppc_dec_expires_host_tb(vcpu) && kvmppc_core_pending_dec(vcpu)) kvmppc_core_dequeue_dec(vcpu); @@ -3363,7 +3501,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) else ++still_running; } else { - kvmppc_remove_runnable(vc, vcpu); + kvmppc_remove_runnable(vc, vcpu, mftb()); wake_up(&vcpu->arch.cpu_run); } } @@ -3372,7 +3510,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) kvmppc_vcore_preempt(vc); } else if (vc->runner) { vc->vcore_state = VCORE_PREEMPT; - kvmppc_core_start_stolen(vc); + kvmppc_core_start_stolen(vc, mftb()); } else { vc->vcore_state = VCORE_INACTIVE; } @@ -3503,7 +3641,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { for_each_runnable_thread(i, vcpu, vc) { vcpu->arch.ret = -EBUSY; - kvmppc_remove_runnable(vc, vcpu); + kvmppc_remove_runnable(vc, vcpu, mftb()); wake_up(&vcpu->arch.cpu_run); } goto out; @@ -3748,7 +3886,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_release_hwthread(pcpu + i); if (sip && sip->napped[i]) kvmppc_ipi_thread(pcpu + i); - cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest); } spin_unlock(&vc->lock); @@ -3770,211 +3907,137 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) trace_kvmppc_run_core(vc, 1); } -static void load_spr_state(struct kvm_vcpu *vcpu) -{ - mtspr(SPRN_DSCR, vcpu->arch.dscr); - mtspr(SPRN_IAMR, vcpu->arch.iamr); - mtspr(SPRN_PSPB, vcpu->arch.pspb); - mtspr(SPRN_FSCR, vcpu->arch.fscr); - mtspr(SPRN_TAR, vcpu->arch.tar); - mtspr(SPRN_EBBHR, vcpu->arch.ebbhr); - mtspr(SPRN_EBBRR, vcpu->arch.ebbrr); - mtspr(SPRN_BESCR, vcpu->arch.bescr); - mtspr(SPRN_TIDR, vcpu->arch.tid); - mtspr(SPRN_AMR, vcpu->arch.amr); - mtspr(SPRN_UAMOR, vcpu->arch.uamor); - - /* - * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI] - * clear (or hstate set appropriately to catch those registers - * being clobbered if we take a MCE or SRESET), so those are done - * later. - */ - - if (!(vcpu->arch.ctrl & 1)) - mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1); -} - -static void store_spr_state(struct kvm_vcpu *vcpu) -{ - vcpu->arch.ctrl = mfspr(SPRN_CTRLF); - - vcpu->arch.iamr = mfspr(SPRN_IAMR); - vcpu->arch.pspb = mfspr(SPRN_PSPB); - vcpu->arch.fscr = mfspr(SPRN_FSCR); - vcpu->arch.tar = mfspr(SPRN_TAR); - vcpu->arch.ebbhr = mfspr(SPRN_EBBHR); - vcpu->arch.ebbrr = mfspr(SPRN_EBBRR); - vcpu->arch.bescr = mfspr(SPRN_BESCR); - vcpu->arch.tid = mfspr(SPRN_TIDR); - vcpu->arch.amr = mfspr(SPRN_AMR); - vcpu->arch.uamor = mfspr(SPRN_UAMOR); - vcpu->arch.dscr = mfspr(SPRN_DSCR); -} - -/* - * Privileged (non-hypervisor) host registers to save. - */ -struct p9_host_os_sprs { - unsigned long dscr; - unsigned long tidr; - unsigned long iamr; - unsigned long amr; - unsigned long fscr; -}; - -static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs) -{ - host_os_sprs->dscr = mfspr(SPRN_DSCR); - host_os_sprs->tidr = mfspr(SPRN_TIDR); - host_os_sprs->iamr = mfspr(SPRN_IAMR); - host_os_sprs->amr = mfspr(SPRN_AMR); - host_os_sprs->fscr = mfspr(SPRN_FSCR); -} - -/* vcpu guest regs must already be saved */ -static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, - struct p9_host_os_sprs *host_os_sprs) -{ - mtspr(SPRN_PSPB, 0); - mtspr(SPRN_UAMOR, 0); - - mtspr(SPRN_DSCR, host_os_sprs->dscr); - mtspr(SPRN_TIDR, host_os_sprs->tidr); - mtspr(SPRN_IAMR, host_os_sprs->iamr); - - if (host_os_sprs->amr != vcpu->arch.amr) - mtspr(SPRN_AMR, host_os_sprs->amr); - - if (host_os_sprs->fscr != vcpu->arch.fscr) - mtspr(SPRN_FSCR, host_os_sprs->fscr); - - /* Save guest CTRL register, set runlatch to 1 */ - if (!(vcpu->arch.ctrl & 1)) - mtspr(SPRN_CTRLT, 1); -} - static inline bool hcall_is_xics(unsigned long req) { return req == H_EOI || req == H_CPPR || req == H_IPI || req == H_IPOLL || req == H_XIRR || req == H_XIRR_X; } -/* - * Guest entry for POWER9 and later CPUs. - */ -static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, - unsigned long lpcr) +static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu) +{ + struct lppaca *lp = vcpu->arch.vpa.pinned_addr; + if (lp) { + u32 yield_count = be32_to_cpu(lp->yield_count) + 1; + lp->yield_count = cpu_to_be32(yield_count); + vcpu->arch.vpa.dirty = 1; + } +} + +/* call our hypervisor to load up HV regs and go */ +static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb) { struct kvmppc_vcore *vc = vcpu->arch.vcore; + unsigned long host_psscr; + unsigned long msr; + struct hv_guest_state hvregs; struct p9_host_os_sprs host_os_sprs; s64 dec; - u64 tb; - int trap, save_pmu; - - WARN_ON_ONCE(vcpu->arch.ceded); + int trap; - dec = mfspr(SPRN_DEC); - tb = mftb(); - if (dec < 0) - return BOOK3S_INTERRUPT_HV_DECREMENTER; - local_paca->kvm_hstate.dec_expires = dec + tb; - if (local_paca->kvm_hstate.dec_expires < time_limit) - time_limit = local_paca->kvm_hstate.dec_expires; + msr = mfmsr(); save_p9_host_os_sprs(&host_os_sprs); - kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */ + /* + * We need to save and restore the guest visible part of the + * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor + * doesn't do this for us. Note only required if pseries since + * this is done in kvmhv_vcpu_entry_p9() below otherwise. + */ + host_psscr = mfspr(SPRN_PSSCR_PR); - kvmppc_subcore_enter_guest(); + kvmppc_msr_hard_disable_set_facilities(vcpu, msr); + if (lazy_irq_pending()) + return 0; - vc->entry_exit_map = 1; - vc->in_guest = 1; + if (unlikely(load_vcpu_state(vcpu, &host_os_sprs))) + msr = mfmsr(); /* TM restore can update msr */ - if (vcpu->arch.vpa.pinned_addr) { - struct lppaca *lp = vcpu->arch.vpa.pinned_addr; - u32 yield_count = be32_to_cpu(lp->yield_count) + 1; - lp->yield_count = cpu_to_be32(yield_count); - vcpu->arch.vpa.dirty = 1; - } - - if (cpu_has_feature(CPU_FTR_TM) || - cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) - kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true); + if (vcpu->arch.psscr != host_psscr) + mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr); -#ifdef CONFIG_PPC_PSERIES - if (kvmhv_on_pseries()) { - barrier(); - if (vcpu->arch.vpa.pinned_addr) { - struct lppaca *lp = vcpu->arch.vpa.pinned_addr; - get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use; - } else { - get_lppaca()->pmcregs_in_use = 1; - } - barrier(); + kvmhv_save_hv_regs(vcpu, &hvregs); + hvregs.lpcr = lpcr; + vcpu->arch.regs.msr = vcpu->arch.shregs.msr; + hvregs.version = HV_GUEST_STATE_VERSION; + if (vcpu->arch.nested) { + hvregs.lpid = vcpu->arch.nested->shadow_lpid; + hvregs.vcpu_token = vcpu->arch.nested_vcpu_id; + } else { + hvregs.lpid = vcpu->kvm->arch.lpid; + hvregs.vcpu_token = vcpu->vcpu_id; } -#endif - kvmhv_load_guest_pmu(vcpu); - - msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); - load_fp_state(&vcpu->arch.fp); -#ifdef CONFIG_ALTIVEC - load_vr_state(&vcpu->arch.vr); -#endif - mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); - - load_spr_state(vcpu); + hvregs.hdec_expiry = time_limit; /* - * When setting DEC, we must always deal with irq_work_raise via NMI vs - * setting DEC. The problem occurs right as we switch into guest mode - * if a NMI hits and sets pending work and sets DEC, then that will - * apply to the guest and not bring us back to the host. + * When setting DEC, we must always deal with irq_work_raise + * via NMI vs setting DEC. The problem occurs right as we + * switch into guest mode if a NMI hits and sets pending work + * and sets DEC, then that will apply to the guest and not + * bring us back to the host. * - * irq_work_raise could check a flag (or possibly LPCR[HDICE] for - * example) and set HDEC to 1? That wouldn't solve the nested hv - * case which needs to abort the hcall or zero the time limit. + * irq_work_raise could check a flag (or possibly LPCR[HDICE] + * for example) and set HDEC to 1? That wouldn't solve the + * nested hv case which needs to abort the hcall or zero the + * time limit. * * XXX: Another day's problem. */ - mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb()); + mtspr(SPRN_DEC, kvmppc_dec_expires_host_tb(vcpu) - *tb); + + mtspr(SPRN_DAR, vcpu->arch.shregs.dar); + mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); + switch_pmu_to_guest(vcpu, &host_os_sprs); + trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs), + __pa(&vcpu->arch.regs)); + kvmhv_restore_hv_return_state(vcpu, &hvregs); + switch_pmu_to_host(vcpu, &host_os_sprs); + vcpu->arch.shregs.msr = vcpu->arch.regs.msr; + vcpu->arch.shregs.dar = mfspr(SPRN_DAR); + vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR); + vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR); + + store_vcpu_state(vcpu); - if (kvmhv_on_pseries()) { - /* - * We need to save and restore the guest visible part of the - * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor - * doesn't do this for us. Note only required if pseries since - * this is done in kvmhv_vcpu_entry_p9() below otherwise. - */ - unsigned long host_psscr; - /* call our hypervisor to load up HV regs and go */ - struct hv_guest_state hvregs; + dec = mfspr(SPRN_DEC); + if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */ + dec = (s32) dec; + *tb = mftb(); + vcpu->arch.dec_expires = dec + (*tb + vc->tb_offset); - host_psscr = mfspr(SPRN_PSSCR_PR); - mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr); - kvmhv_save_hv_regs(vcpu, &hvregs); - hvregs.lpcr = lpcr; - vcpu->arch.regs.msr = vcpu->arch.shregs.msr; - hvregs.version = HV_GUEST_STATE_VERSION; - if (vcpu->arch.nested) { - hvregs.lpid = vcpu->arch.nested->shadow_lpid; - hvregs.vcpu_token = vcpu->arch.nested_vcpu_id; - } else { - hvregs.lpid = vcpu->kvm->arch.lpid; - hvregs.vcpu_token = vcpu->vcpu_id; - } - hvregs.hdec_expiry = time_limit; - mtspr(SPRN_DAR, vcpu->arch.shregs.dar); - mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); - trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs), - __pa(&vcpu->arch.regs)); - kvmhv_restore_hv_return_state(vcpu, &hvregs); - vcpu->arch.shregs.msr = vcpu->arch.regs.msr; - vcpu->arch.shregs.dar = mfspr(SPRN_DAR); - vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR); - vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR); + timer_rearm_host_dec(*tb); + + restore_p9_host_os_sprs(vcpu, &host_os_sprs); + if (vcpu->arch.psscr != host_psscr) mtspr(SPRN_PSSCR_PR, host_psscr); + return trap; +} + +/* + * Guest entry for POWER9 and later CPUs. + */ +static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, + unsigned long lpcr, u64 *tb) +{ + u64 next_timer; + int trap; + + next_timer = timer_get_next_tb(); + if (*tb >= next_timer) + return BOOK3S_INTERRUPT_HV_DECREMENTER; + if (next_timer < time_limit) + time_limit = next_timer; + else if (*tb >= time_limit) /* nested time limit */ + return BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER; + + vcpu->arch.ceded = 0; + + vcpu_vpa_increment_dispatch(vcpu); + + if (kvmhv_on_pseries()) { + trap = kvmhv_vcpu_entry_p9_nested(vcpu, time_limit, lpcr, tb); + /* H_CEDE has to be handled now, not later */ if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && kvmppc_get_gpr(vcpu, 3) == H_CEDE) { @@ -3982,9 +4045,16 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, kvmppc_set_gpr(vcpu, 3, 0); trap = 0; } + } else { + struct kvm *kvm = vcpu->kvm; + kvmppc_xive_push_vcpu(vcpu); - trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr); + + __this_cpu_write(cpu_in_guest, kvm); + trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr, tb); + __this_cpu_write(cpu_in_guest, NULL); + if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && !(vcpu->arch.shregs.msr & MSR_PR)) { unsigned long req = kvmppc_get_gpr(vcpu, 3); @@ -4009,65 +4079,11 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, } kvmppc_xive_pull_vcpu(vcpu); - if (kvm_is_radix(vcpu->kvm)) + if (kvm_is_radix(kvm)) vcpu->arch.slb_max = 0; } - dec = mfspr(SPRN_DEC); - if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */ - dec = (s32) dec; - tb = mftb(); - vcpu->arch.dec_expires = dec + tb; - vcpu->cpu = -1; - vcpu->arch.thread_cpu = -1; - - store_spr_state(vcpu); - - restore_p9_host_os_sprs(vcpu, &host_os_sprs); - - msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); - store_fp_state(&vcpu->arch.fp); -#ifdef CONFIG_ALTIVEC - store_vr_state(&vcpu->arch.vr); -#endif - vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); - - if (cpu_has_feature(CPU_FTR_TM) || - cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) - kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true); - - save_pmu = 1; - if (vcpu->arch.vpa.pinned_addr) { - struct lppaca *lp = vcpu->arch.vpa.pinned_addr; - u32 yield_count = be32_to_cpu(lp->yield_count) + 1; - lp->yield_count = cpu_to_be32(yield_count); - vcpu->arch.vpa.dirty = 1; - save_pmu = lp->pmcregs_in_use; - } - /* Must save pmu if this guest is capable of running nested guests */ - save_pmu |= nesting_enabled(vcpu->kvm); - - kvmhv_save_guest_pmu(vcpu, save_pmu); -#ifdef CONFIG_PPC_PSERIES - if (kvmhv_on_pseries()) { - barrier(); - get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse(); - barrier(); - } -#endif - - vc->entry_exit_map = 0x101; - vc->in_guest = 0; - - mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb()); - /* We may have raced with new irq work */ - if (test_irq_work_pending()) - set_dec(1); - mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso); - - kvmhv_load_host_pmu(); - - kvmppc_subcore_exit_guest(); + vcpu_vpa_increment_dispatch(vcpu); return trap; } @@ -4132,6 +4148,13 @@ static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu) return false; } +static bool kvmppc_vcpu_check_block(struct kvm_vcpu *vcpu) +{ + if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu)) + return true; + return false; +} + /* * Check to see if any of the runnable vcpus on the vcore have pending * exceptions or are no longer ceded @@ -4142,7 +4165,7 @@ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc) int i; for_each_runnable_thread(i, vcpu, vc) { - if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu)) + if (kvmppc_vcpu_check_block(vcpu)) return 1; } @@ -4159,6 +4182,8 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) int do_sleep = 1; u64 block_ns; + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + /* Poll for pending exceptions and ceded state */ cur = start_poll = ktime_get(); if (vc->halt_poll_ns) { @@ -4355,7 +4380,7 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu) for_each_runnable_thread(i, v, vc) { kvmppc_core_prepare_to_enter(v); if (signal_pending(v->arch.run_task)) { - kvmppc_remove_runnable(vc, v); + kvmppc_remove_runnable(vc, v, mftb()); v->stat.signal_exits++; v->run->exit_reason = KVM_EXIT_INTR; v->arch.ret = -EINTR; @@ -4396,7 +4421,7 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu) kvmppc_vcore_end_preempt(vc); if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { - kvmppc_remove_runnable(vc, vcpu); + kvmppc_remove_runnable(vc, vcpu, mftb()); vcpu->stat.signal_exits++; run->exit_reason = KVM_EXIT_INTR; vcpu->arch.ret = -EINTR; @@ -4417,12 +4442,15 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu) int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr) { + struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); struct kvm_run *run = vcpu->run; int trap, r, pcpu; int srcu_idx; struct kvmppc_vcore *vc; struct kvm *kvm = vcpu->kvm; struct kvm_nested_guest *nested = vcpu->arch.nested; + unsigned long flags; + u64 tb; trace_kvmppc_run_vcpu_enter(vcpu); @@ -4433,16 +4461,11 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, vc = vcpu->arch.vcore; vcpu->arch.ceded = 0; vcpu->arch.run_task = current; - vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb()); vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; - vcpu->arch.busy_preempt = TB_NIL; vcpu->arch.last_inst = KVM_INST_FETCH_FAILED; - vc->runnable_threads[0] = vcpu; - vc->n_runnable = 1; - vc->runner = vcpu; /* See if the MMU is ready to go */ - if (!kvm->arch.mmu_ready) { + if (unlikely(!kvm->arch.mmu_ready)) { r = kvmhv_setup_mmu(vcpu); if (r) { run->exit_reason = KVM_EXIT_FAIL_ENTRY; @@ -4457,29 +4480,21 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, kvmppc_update_vpas(vcpu); - init_vcore_to_run(vc); - vc->preempt_tb = TB_NIL; - preempt_disable(); pcpu = smp_processor_id(); - vc->pcpu = pcpu; if (kvm_is_radix(kvm)) kvmppc_prepare_radix_vcpu(vcpu, pcpu); - local_irq_disable(); - hard_irq_disable(); + /* flags save not required, but irq_pmu has no disable/enable API */ + powerpc_local_irq_pmu_save(flags); + if (signal_pending(current)) goto sigpend; - if (lazy_irq_pending() || need_resched() || !kvm->arch.mmu_ready) + if (need_resched() || !kvm->arch.mmu_ready) goto out; if (!nested) { kvmppc_core_prepare_to_enter(vcpu); - if (vcpu->arch.doorbell_request) { - vc->dpdes = 1; - smp_wmb(); - vcpu->arch.doorbell_request = 0; - } if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions)) lpcr |= LPCR_MER; @@ -4490,16 +4505,23 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, goto out; } - kvmppc_clear_host_core(pcpu); + if (vcpu->arch.timer_running) { + hrtimer_try_to_cancel(&vcpu->arch.dec_timer); + vcpu->arch.timer_running = 0; + } - local_paca->kvm_hstate.napping = 0; - local_paca->kvm_hstate.kvm_split_mode = NULL; - kvmppc_start_thread(vcpu, vc); - kvmppc_create_dtl_entry(vcpu, vc); - trace_kvm_guest_enter(vcpu); + tb = mftb(); - vc->vcore_state = VCORE_RUNNING; - trace_kvmppc_run_core(vc, 0); + vcpu->cpu = pcpu; + vcpu->arch.thread_cpu = pcpu; + vc->pcpu = pcpu; + local_paca->kvm_hstate.kvm_vcpu = vcpu; + local_paca->kvm_hstate.ptid = 0; + local_paca->kvm_hstate.fake_suspend = 0; + + __kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0); + + trace_kvm_guest_enter(vcpu); guest_enter_irqoff(); @@ -4510,7 +4532,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, /* Tell lockdep that we're about to enable interrupts */ trace_hardirqs_on(); - trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr); + trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr, &tb); vcpu->arch.trap = trap; trace_hardirqs_off(); @@ -4521,8 +4543,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, set_irq_happened(trap); - kvmppc_set_host_core(pcpu); - context_tracking_guest_exit(); if (!vtime_accounting_enabled_this_cpu()) { local_irq_enable(); @@ -4538,9 +4558,10 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, } vtime_account_guest_exit(); - local_irq_enable(); + vcpu->cpu = -1; + vcpu->arch.thread_cpu = -1; - cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest); + powerpc_local_irq_pmu_restore(flags); preempt_enable(); @@ -4550,7 +4571,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, * by L2 and the L1 decrementer is provided in hdec_expires */ if (kvmppc_core_pending_dec(vcpu) && - ((get_tb() < vcpu->arch.dec_expires) || + ((tb < kvmppc_dec_expires_host_tb(vcpu)) || (trap == BOOK3S_INTERRUPT_SYSCALL && kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED))) kvmppc_core_dequeue_dec(vcpu); @@ -4565,28 +4586,31 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, } vcpu->arch.ret = r; - if (is_kvmppc_resume_guest(r) && vcpu->arch.ceded && - !kvmppc_vcpu_woken(vcpu)) { + if (is_kvmppc_resume_guest(r) && !kvmppc_vcpu_check_block(vcpu)) { kvmppc_set_timer(vcpu); - while (vcpu->arch.ceded && !kvmppc_vcpu_woken(vcpu)) { + + prepare_to_rcuwait(wait); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); if (signal_pending(current)) { vcpu->stat.signal_exits++; run->exit_reason = KVM_EXIT_INTR; vcpu->arch.ret = -EINTR; break; } - spin_lock(&vc->lock); - kvmppc_vcore_blocked(vc); - spin_unlock(&vc->lock); + + if (kvmppc_vcpu_check_block(vcpu)) + break; + + trace_kvmppc_vcore_blocked(vc, 0); + schedule(); + trace_kvmppc_vcore_blocked(vc, 1); } + finish_rcuwait(wait); } vcpu->arch.ceded = 0; - vc->vcore_state = VCORE_INACTIVE; - trace_kvmppc_run_core(vc, 1); - done: - kvmppc_remove_runnable(vc, vcpu); trace_kvmppc_run_vcpu_exit(vcpu); return vcpu->arch.ret; @@ -4596,7 +4620,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, run->exit_reason = KVM_EXIT_INTR; vcpu->arch.ret = -EINTR; out: - local_irq_enable(); + powerpc_local_irq_pmu_restore(flags); preempt_enable(); goto done; } @@ -4606,23 +4630,25 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) struct kvm_run *run = vcpu->run; int r; int srcu_idx; - unsigned long ebb_regs[3] = {}; /* shut up GCC */ - unsigned long user_tar = 0; - unsigned int user_vrsave; struct kvm *kvm; + unsigned long msr; if (!vcpu->arch.sane) { run->exit_reason = KVM_EXIT_INTERNAL_ERROR; return -EINVAL; } + /* No need to go into the guest when all we'll do is come back out */ + if (signal_pending(current)) { + run->exit_reason = KVM_EXIT_INTR; + return -EINTR; + } + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * Don't allow entry with a suspended transaction, because * the guest entry/exit code will lose it. - * If the guest has TM enabled, save away their TM-related SPRs - * (they will get restored by the TM unavailable interrupt). */ -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs && (current->thread.regs->msr & MSR_TM)) { if (MSR_TM_ACTIVE(current->thread.regs->msr)) { @@ -4630,12 +4656,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) run->fail_entry.hardware_entry_failure_reason = 0; return -EINVAL; } - /* Enable TM so we can read the TM SPRs */ - mtmsr(mfmsr() | MSR_TM); - current->thread.tm_tfhar = mfspr(SPRN_TFHAR); - current->thread.tm_tfiar = mfspr(SPRN_TFIAR); - current->thread.tm_texasr = mfspr(SPRN_TEXASR); - current->thread.regs->msr &= ~MSR_TM; } #endif @@ -4650,29 +4670,30 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) kvmppc_core_prepare_to_enter(vcpu); - /* No need to go into the guest when all we'll do is come back out */ - if (signal_pending(current)) { - run->exit_reason = KVM_EXIT_INTR; - return -EINTR; - } - kvm = vcpu->kvm; atomic_inc(&kvm->arch.vcpus_running); /* Order vcpus_running vs. mmu_ready, see kvmppc_alloc_reset_hpt */ smp_mb(); - flush_all_to_thread(current); + msr = 0; + if (IS_ENABLED(CONFIG_PPC_FPU)) + msr |= MSR_FP; + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + msr |= MSR_VEC; + if (cpu_has_feature(CPU_FTR_VSX)) + msr |= MSR_VSX; + if ((cpu_has_feature(CPU_FTR_TM) || + cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) && + (vcpu->arch.hfscr & HFSCR_TM)) + msr |= MSR_TM; + msr = msr_check_and_set(msr); - /* Save userspace EBB and other register values */ - if (cpu_has_feature(CPU_FTR_ARCH_207S)) { - ebb_regs[0] = mfspr(SPRN_EBBHR); - ebb_regs[1] = mfspr(SPRN_EBBRR); - ebb_regs[2] = mfspr(SPRN_BESCR); - user_tar = mfspr(SPRN_TAR); - } - user_vrsave = mfspr(SPRN_VRSAVE); + kvmppc_save_user_regs(); - vcpu->arch.waitp = &vcpu->arch.vcore->wait; + kvmppc_save_current_sprs(); + + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + vcpu->arch.waitp = &vcpu->arch.vcore->wait; vcpu->arch.pgdir = kvm->mm->pgd; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; @@ -4711,15 +4732,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) } } while (is_kvmppc_resume_guest(r)); - /* Restore userspace EBB and other register values */ - if (cpu_has_feature(CPU_FTR_ARCH_207S)) { - mtspr(SPRN_EBBHR, ebb_regs[0]); - mtspr(SPRN_EBBRR, ebb_regs[1]); - mtspr(SPRN_BESCR, ebb_regs[2]); - mtspr(SPRN_TAR, user_tar); - } - mtspr(SPRN_VRSAVE, user_vrsave); - vcpu->arch.state = KVMPPC_VCPU_NOTREADY; atomic_dec(&kvm->arch.vcpus_running); @@ -4861,8 +4873,12 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, unsigned long npages = mem->memory_size >> PAGE_SHIFT; if (change == KVM_MR_CREATE) { - slot->arch.rmap = vzalloc(array_size(npages, - sizeof(*slot->arch.rmap))); + unsigned long size = array_size(npages, sizeof(*slot->arch.rmap)); + + if ((size >> PAGE_SHIFT) > totalram_pages()) + return -ENOMEM; + + slot->arch.rmap = vzalloc(size); if (!slot->arch.rmap) return -ENOMEM; } @@ -5072,6 +5088,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) */ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) { + unsigned long lpcr, lpcr_mask; + if (nesting_enabled(kvm)) kvmhv_release_all_nested(kvm); kvmppc_rmap_reset(kvm); @@ -5081,8 +5099,13 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) kvm->arch.radix = 0; spin_unlock(&kvm->mmu_lock); kvmppc_free_radix(kvm); - kvmppc_update_lpcr(kvm, LPCR_VPM1, - LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR); + + lpcr = LPCR_VPM1; + lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR; + if (cpu_has_feature(CPU_FTR_ARCH_31)) + lpcr_mask |= LPCR_HAIL; + kvmppc_update_lpcr(kvm, lpcr, lpcr_mask); + return 0; } @@ -5092,6 +5115,7 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) */ int kvmppc_switch_mmu_to_radix(struct kvm *kvm) { + unsigned long lpcr, lpcr_mask; int err; err = kvmppc_init_vm_radix(kvm); @@ -5103,8 +5127,17 @@ int kvmppc_switch_mmu_to_radix(struct kvm *kvm) kvm->arch.radix = 1; spin_unlock(&kvm->mmu_lock); kvmppc_free_hpt(&kvm->arch.hpt); - kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR, - LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR); + + lpcr = LPCR_UPRT | LPCR_GTSE | LPCR_HR; + lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + lpcr_mask |= LPCR_HAIL; + if (cpu_has_feature(CPU_FTR_HVMODE) && + (kvm->arch.host_lpcr & LPCR_HAIL)) + lpcr |= LPCR_HAIL; + } + kvmppc_update_lpcr(kvm, lpcr, lpcr_mask); + return 0; } @@ -5126,6 +5159,9 @@ void kvmppc_alloc_host_rm_ops(void) int cpu, core; int size; + if (cpu_has_feature(CPU_FTR_ARCH_300)) + return; + /* Not the first time here ? */ if (kvmppc_host_rm_ops_hv != NULL) return; @@ -5268,6 +5304,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) kvm->arch.mmu_ready = 1; lpcr &= ~LPCR_VPM1; lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR; + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_31) && + (kvm->arch.host_lpcr & LPCR_HAIL)) + lpcr |= LPCR_HAIL; ret = kvmppc_init_vm_radix(kvm); if (ret) { kvmppc_free_lpid(kvm->arch.lpid); @@ -6063,9 +6103,11 @@ static int kvmppc_book3s_init_hv(void) if (r) return r; - r = kvm_init_subcore_bitmap(); - if (r) - return r; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + r = kvm_init_subcore_bitmap(); + if (r) + return r; + } /* * We need a way of accessing the XICS interrupt controller, diff --git a/arch/powerpc/kvm/book3s_hv.h b/arch/powerpc/kvm/book3s_hv.h new file mode 100644 index 000000000000..6b7f07d9026b --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv.h @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Privileged (non-hypervisor) host registers to save. + */ +struct p9_host_os_sprs { + unsigned long iamr; + unsigned long amr; + + unsigned int pmc1; + unsigned int pmc2; + unsigned int pmc3; + unsigned int pmc4; + unsigned int pmc5; + unsigned int pmc6; + unsigned long mmcr0; + unsigned long mmcr1; + unsigned long mmcr2; + unsigned long mmcr3; + unsigned long mmcra; + unsigned long siar; + unsigned long sier1; + unsigned long sier2; + unsigned long sier3; + unsigned long sdar; +}; + +static inline bool nesting_enabled(struct kvm *kvm) +{ + return kvm->arch.nested_enable && kvm_is_radix(kvm); +} + +bool load_vcpu_state(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs); +void store_vcpu_state(struct kvm_vcpu *vcpu); +void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs); +void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs); +void switch_pmu_to_guest(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs); +void switch_pmu_to_host(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs); diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 70b7a8f97153..7d6d91338c3f 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -649,6 +649,8 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu) int ext; unsigned long lpcr; + WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)); + /* Insert EXTERNAL bit into LPCR at the MER bit position */ ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1; lpcr = mfspr(SPRN_LPCR); @@ -682,60 +684,23 @@ static void flush_guest_tlb(struct kvm *kvm) unsigned long rb, set; rb = PPC_BIT(52); /* IS = 2 */ - if (kvm_is_radix(kvm)) { - /* R=1 PRS=1 RIC=2 */ + for (set = 0; set < kvm->arch.tlb_sets; ++set) { + /* R=0 PRS=0 RIC=0 */ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) - : : "r" (rb), "i" (1), "i" (1), "i" (2), + : : "r" (rb), "i" (0), "i" (0), "i" (0), "r" (0) : "memory"); - for (set = 1; set < kvm->arch.tlb_sets; ++set) { - rb += PPC_BIT(51); /* increment set number */ - /* R=1 PRS=1 RIC=0 */ - asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) - : : "r" (rb), "i" (1), "i" (1), "i" (0), - "r" (0) : "memory"); - } - asm volatile("ptesync": : :"memory"); - // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. - asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory"); - } else { - for (set = 0; set < kvm->arch.tlb_sets; ++set) { - /* R=0 PRS=0 RIC=0 */ - asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) - : : "r" (rb), "i" (0), "i" (0), "i" (0), - "r" (0) : "memory"); - rb += PPC_BIT(51); /* increment set number */ - } - asm volatile("ptesync": : :"memory"); - // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. - if (cpu_has_feature(CPU_FTR_ARCH_300)) - asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); + rb += PPC_BIT(51); /* increment set number */ } + asm volatile("ptesync": : :"memory"); } -void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, - struct kvm_nested_guest *nested) +void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu) { - cpumask_t *need_tlb_flush; - - /* - * On POWER9, individual threads can come in here, but the - * TLB is shared between the 4 threads in a core, hence - * invalidating on one thread invalidates for all. - * Thus we make all 4 threads use the same bit. - */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) - pcpu = cpu_first_tlb_thread_sibling(pcpu); - - if (nested) - need_tlb_flush = &nested->need_tlb_flush; - else - need_tlb_flush = &kvm->arch.need_tlb_flush; - - if (cpumask_test_cpu(pcpu, need_tlb_flush)) { + if (cpumask_test_cpu(pcpu, &kvm->arch.need_tlb_flush)) { flush_guest_tlb(kvm); /* Clear the bit after the TLB flush */ - cpumask_clear_cpu(pcpu, need_tlb_flush); + cpumask_clear_cpu(pcpu, &kvm->arch.need_tlb_flush); } } EXPORT_SYMBOL_GPL(kvmppc_check_need_tlb_flush); diff --git a/arch/powerpc/kvm/book3s_hv_hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c index 9af660476314..1ec50c69678b 100644 --- a/arch/powerpc/kvm/book3s_hv_hmi.c +++ b/arch/powerpc/kvm/book3s_hv_hmi.c @@ -20,10 +20,15 @@ void wait_for_subcore_guest_exit(void) /* * NULL bitmap pointer indicates that KVM module hasn't - * been loaded yet and hence no guests are running. + * been loaded yet and hence no guests are running, or running + * on POWER9 or newer CPU. + * * If no KVM is in use, no need to co-ordinate among threads * as all of them will always be in host and no one is going * to modify TB other than the opal hmi handler. + * + * POWER9 and newer don't need this synchronisation. + * * Hence, just return from here. */ if (!local_paca->sibling_subcore_state) diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 4444f83cb133..59d89e4b154a 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -104,7 +104,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtlr r0 blr -_GLOBAL(kvmhv_save_host_pmu) +/* + * void kvmhv_save_host_pmu(void) + */ +kvmhv_save_host_pmu: BEGIN_FTR_SECTION /* Work around P8 PMAE bug */ li r3, -1 @@ -138,14 +141,6 @@ BEGIN_FTR_SECTION std r8, HSTATE_MMCR2(r13) std r9, HSTATE_SIER(r13) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) -BEGIN_FTR_SECTION - mfspr r5, SPRN_MMCR3 - mfspr r6, SPRN_SIER2 - mfspr r7, SPRN_SIER3 - std r5, HSTATE_MMCR3(r13) - std r6, HSTATE_SIER2(r13) - std r7, HSTATE_SIER3(r13) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) mfspr r3, SPRN_PMC1 mfspr r5, SPRN_PMC2 mfspr r6, SPRN_PMC3 diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index ed8a2c9f5629..a2e34efb8d31 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -358,6 +358,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) /* convert TB values/offsets to host (L0) values */ hdec_exp = l2_hv.hdec_expiry - vc->tb_offset; vc->tb_offset += l2_hv.tb_offset; + vcpu->arch.dec_expires += l2_hv.tb_offset; /* set L1 state to L2 state */ vcpu->arch.nested = l2; @@ -374,11 +375,6 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) vcpu->arch.ret = RESUME_GUEST; vcpu->arch.trap = 0; do { - if (mftb() >= hdec_exp) { - vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER; - r = RESUME_HOST; - break; - } r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr); } while (is_kvmppc_resume_guest(r)); @@ -399,6 +395,8 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) if (l2_regs.msr & MSR_TS_MASK) vcpu->arch.shregs.msr |= MSR_TS_S; vc->tb_offset = saved_l1_hv.tb_offset; + /* XXX: is this always the same delta as saved_l1_hv.tb_offset? */ + vcpu->arch.dec_expires -= l2_hv.tb_offset; restore_hv_regs(vcpu, &saved_l1_hv); vcpu->arch.purr += delta_purr; vcpu->arch.spurr += delta_spurr; @@ -582,7 +580,7 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu) if (eaddr & (0xFFFUL << 52)) return H_PARAMETER; - buf = kzalloc(n, GFP_KERNEL); + buf = kzalloc(n, GFP_KERNEL | __GFP_NOWARN); if (!buf) return H_NO_MEM; diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index 961b3d70483c..a28e5b3daabd 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -4,8 +4,439 @@ #include <asm/asm-prototypes.h> #include <asm/dbell.h> #include <asm/kvm_ppc.h> +#include <asm/pmc.h> #include <asm/ppc-opcode.h> +#include "book3s_hv.h" + +static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra) +{ + if (!(mmcr0 & MMCR0_FC)) + goto do_freeze; + if (mmcra & MMCRA_SAMPLE_ENABLE) + goto do_freeze; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (!(mmcr0 & MMCR0_PMCCEXT)) + goto do_freeze; + if (!(mmcra & MMCRA_BHRB_DISABLE)) + goto do_freeze; + } + return; + +do_freeze: + mmcr0 = MMCR0_FC; + mmcra = 0; + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + mmcr0 |= MMCR0_PMCCEXT; + mmcra = MMCRA_BHRB_DISABLE; + } + + mtspr(SPRN_MMCR0, mmcr0); + mtspr(SPRN_MMCRA, mmcra); + isync(); +} + +void switch_pmu_to_guest(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs) +{ + struct lppaca *lp; + int load_pmu = 1; + + lp = vcpu->arch.vpa.pinned_addr; + if (lp) + load_pmu = lp->pmcregs_in_use; + + /* Save host */ + if (ppc_get_pmu_inuse()) { + /* + * It might be better to put PMU handling (at least for the + * host) in the perf subsystem because it knows more about what + * is being used. + */ + + /* POWER9, POWER10 do not implement HPMC or SPMC */ + + host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0); + host_os_sprs->mmcra = mfspr(SPRN_MMCRA); + + freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra); + + host_os_sprs->pmc1 = mfspr(SPRN_PMC1); + host_os_sprs->pmc2 = mfspr(SPRN_PMC2); + host_os_sprs->pmc3 = mfspr(SPRN_PMC3); + host_os_sprs->pmc4 = mfspr(SPRN_PMC4); + host_os_sprs->pmc5 = mfspr(SPRN_PMC5); + host_os_sprs->pmc6 = mfspr(SPRN_PMC6); + host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1); + host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2); + host_os_sprs->sdar = mfspr(SPRN_SDAR); + host_os_sprs->siar = mfspr(SPRN_SIAR); + host_os_sprs->sier1 = mfspr(SPRN_SIER); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3); + host_os_sprs->sier2 = mfspr(SPRN_SIER2); + host_os_sprs->sier3 = mfspr(SPRN_SIER3); + } + } + +#ifdef CONFIG_PPC_PSERIES + /* After saving PMU, before loading guest PMU, flip pmcregs_in_use */ + if (kvmhv_on_pseries()) { + barrier(); + get_lppaca()->pmcregs_in_use = load_pmu; + barrier(); + } +#endif + + /* + * Load guest. If the VPA said the PMCs are not in use but the guest + * tried to access them anyway, HFSCR[PM] will be set by the HFAC + * fault so we can make forward progress. + */ + if (load_pmu || (vcpu->arch.hfscr & HFSCR_PM)) { + mtspr(SPRN_PMC1, vcpu->arch.pmc[0]); + mtspr(SPRN_PMC2, vcpu->arch.pmc[1]); + mtspr(SPRN_PMC3, vcpu->arch.pmc[2]); + mtspr(SPRN_PMC4, vcpu->arch.pmc[3]); + mtspr(SPRN_PMC5, vcpu->arch.pmc[4]); + mtspr(SPRN_PMC6, vcpu->arch.pmc[5]); + mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]); + mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]); + mtspr(SPRN_SDAR, vcpu->arch.sdar); + mtspr(SPRN_SIAR, vcpu->arch.siar); + mtspr(SPRN_SIER, vcpu->arch.sier[0]); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + mtspr(SPRN_MMCR3, vcpu->arch.mmcr[3]); + mtspr(SPRN_SIER2, vcpu->arch.sier[1]); + mtspr(SPRN_SIER3, vcpu->arch.sier[2]); + } + + /* Set MMCRA then MMCR0 last */ + mtspr(SPRN_MMCRA, vcpu->arch.mmcra); + mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]); + /* No isync necessary because we're starting counters */ + + if (!vcpu->arch.nested && + (vcpu->arch.hfscr_permitted & HFSCR_PM)) + vcpu->arch.hfscr |= HFSCR_PM; + } +} +EXPORT_SYMBOL_GPL(switch_pmu_to_guest); + +void switch_pmu_to_host(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs) +{ + struct lppaca *lp; + int save_pmu = 1; + + lp = vcpu->arch.vpa.pinned_addr; + if (lp) + save_pmu = lp->pmcregs_in_use; + if (IS_ENABLED(CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND)) { + /* + * Save pmu if this guest is capable of running nested guests. + * This is option is for old L1s that do not set their + * lppaca->pmcregs_in_use properly when entering their L2. + */ + save_pmu |= nesting_enabled(vcpu->kvm); + } + + if (save_pmu) { + vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0); + vcpu->arch.mmcra = mfspr(SPRN_MMCRA); + + freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra); + + vcpu->arch.pmc[0] = mfspr(SPRN_PMC1); + vcpu->arch.pmc[1] = mfspr(SPRN_PMC2); + vcpu->arch.pmc[2] = mfspr(SPRN_PMC3); + vcpu->arch.pmc[3] = mfspr(SPRN_PMC4); + vcpu->arch.pmc[4] = mfspr(SPRN_PMC5); + vcpu->arch.pmc[5] = mfspr(SPRN_PMC6); + vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1); + vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2); + vcpu->arch.sdar = mfspr(SPRN_SDAR); + vcpu->arch.siar = mfspr(SPRN_SIAR); + vcpu->arch.sier[0] = mfspr(SPRN_SIER); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3); + vcpu->arch.sier[1] = mfspr(SPRN_SIER2); + vcpu->arch.sier[2] = mfspr(SPRN_SIER3); + } + + } else if (vcpu->arch.hfscr & HFSCR_PM) { + /* + * The guest accessed PMC SPRs without specifying they should + * be preserved, or it cleared pmcregs_in_use after the last + * access. Just ensure they are frozen. + */ + freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA)); + + /* + * Demand-fault PMU register access in the guest. + * + * This is used to grab the guest's VPA pmcregs_in_use value + * and reflect it into the host's VPA in the case of a nested + * hypervisor. + * + * It also avoids having to zero-out SPRs after each guest + * exit to avoid side-channels when. + * + * This is cleared here when we exit the guest, so later HFSCR + * interrupt handling can add it back to run the guest with + * PM enabled next time. + */ + if (!vcpu->arch.nested) + vcpu->arch.hfscr &= ~HFSCR_PM; + } /* otherwise the PMU should still be frozen */ + +#ifdef CONFIG_PPC_PSERIES + if (kvmhv_on_pseries()) { + barrier(); + get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse(); + barrier(); + } +#endif + + if (ppc_get_pmu_inuse()) { + mtspr(SPRN_PMC1, host_os_sprs->pmc1); + mtspr(SPRN_PMC2, host_os_sprs->pmc2); + mtspr(SPRN_PMC3, host_os_sprs->pmc3); + mtspr(SPRN_PMC4, host_os_sprs->pmc4); + mtspr(SPRN_PMC5, host_os_sprs->pmc5); + mtspr(SPRN_PMC6, host_os_sprs->pmc6); + mtspr(SPRN_MMCR1, host_os_sprs->mmcr1); + mtspr(SPRN_MMCR2, host_os_sprs->mmcr2); + mtspr(SPRN_SDAR, host_os_sprs->sdar); + mtspr(SPRN_SIAR, host_os_sprs->siar); + mtspr(SPRN_SIER, host_os_sprs->sier1); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + mtspr(SPRN_MMCR3, host_os_sprs->mmcr3); + mtspr(SPRN_SIER2, host_os_sprs->sier2); + mtspr(SPRN_SIER3, host_os_sprs->sier3); + } + + /* Set MMCRA then MMCR0 last */ + mtspr(SPRN_MMCRA, host_os_sprs->mmcra); + mtspr(SPRN_MMCR0, host_os_sprs->mmcr0); + isync(); + } +} +EXPORT_SYMBOL_GPL(switch_pmu_to_host); + +static void load_spr_state(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs) +{ + /* TAR is very fast */ + mtspr(SPRN_TAR, vcpu->arch.tar); + +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC) && + current->thread.vrsave != vcpu->arch.vrsave) + mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); +#endif + + if (vcpu->arch.hfscr & HFSCR_EBB) { + if (current->thread.ebbhr != vcpu->arch.ebbhr) + mtspr(SPRN_EBBHR, vcpu->arch.ebbhr); + if (current->thread.ebbrr != vcpu->arch.ebbrr) + mtspr(SPRN_EBBRR, vcpu->arch.ebbrr); + if (current->thread.bescr != vcpu->arch.bescr) + mtspr(SPRN_BESCR, vcpu->arch.bescr); + } + + if (cpu_has_feature(CPU_FTR_P9_TIDR) && + current->thread.tidr != vcpu->arch.tid) + mtspr(SPRN_TIDR, vcpu->arch.tid); + if (host_os_sprs->iamr != vcpu->arch.iamr) + mtspr(SPRN_IAMR, vcpu->arch.iamr); + if (host_os_sprs->amr != vcpu->arch.amr) + mtspr(SPRN_AMR, vcpu->arch.amr); + if (vcpu->arch.uamor != 0) + mtspr(SPRN_UAMOR, vcpu->arch.uamor); + if (current->thread.fscr != vcpu->arch.fscr) + mtspr(SPRN_FSCR, vcpu->arch.fscr); + if (current->thread.dscr != vcpu->arch.dscr) + mtspr(SPRN_DSCR, vcpu->arch.dscr); + if (vcpu->arch.pspb != 0) + mtspr(SPRN_PSPB, vcpu->arch.pspb); + + /* + * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI] + * clear (or hstate set appropriately to catch those registers + * being clobbered if we take a MCE or SRESET), so those are done + * later. + */ + + if (!(vcpu->arch.ctrl & 1)) + mtspr(SPRN_CTRLT, 0); +} + +static void store_spr_state(struct kvm_vcpu *vcpu) +{ + vcpu->arch.tar = mfspr(SPRN_TAR); + +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); +#endif + + if (vcpu->arch.hfscr & HFSCR_EBB) { + vcpu->arch.ebbhr = mfspr(SPRN_EBBHR); + vcpu->arch.ebbrr = mfspr(SPRN_EBBRR); + vcpu->arch.bescr = mfspr(SPRN_BESCR); + } + + if (cpu_has_feature(CPU_FTR_P9_TIDR)) + vcpu->arch.tid = mfspr(SPRN_TIDR); + vcpu->arch.iamr = mfspr(SPRN_IAMR); + vcpu->arch.amr = mfspr(SPRN_AMR); + vcpu->arch.uamor = mfspr(SPRN_UAMOR); + vcpu->arch.fscr = mfspr(SPRN_FSCR); + vcpu->arch.dscr = mfspr(SPRN_DSCR); + vcpu->arch.pspb = mfspr(SPRN_PSPB); + + vcpu->arch.ctrl = mfspr(SPRN_CTRLF); +} + +/* Returns true if current MSR and/or guest MSR may have changed */ +bool load_vcpu_state(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs) +{ + bool ret = false; + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (cpu_has_feature(CPU_FTR_TM) || + cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) { + unsigned long guest_msr = vcpu->arch.shregs.msr; + if (MSR_TM_ACTIVE(guest_msr)) { + kvmppc_restore_tm_hv(vcpu, guest_msr, true); + ret = true; + } else if (vcpu->arch.hfscr & HFSCR_TM) { + mtspr(SPRN_TEXASR, vcpu->arch.texasr); + mtspr(SPRN_TFHAR, vcpu->arch.tfhar); + mtspr(SPRN_TFIAR, vcpu->arch.tfiar); + } + } +#endif + + load_spr_state(vcpu, host_os_sprs); + + load_fp_state(&vcpu->arch.fp); +#ifdef CONFIG_ALTIVEC + load_vr_state(&vcpu->arch.vr); +#endif + + return ret; +} +EXPORT_SYMBOL_GPL(load_vcpu_state); + +void store_vcpu_state(struct kvm_vcpu *vcpu) +{ + store_spr_state(vcpu); + + store_fp_state(&vcpu->arch.fp); +#ifdef CONFIG_ALTIVEC + store_vr_state(&vcpu->arch.vr); +#endif + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (cpu_has_feature(CPU_FTR_TM) || + cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) { + unsigned long guest_msr = vcpu->arch.shregs.msr; + if (MSR_TM_ACTIVE(guest_msr)) { + kvmppc_save_tm_hv(vcpu, guest_msr, true); + } else if (vcpu->arch.hfscr & HFSCR_TM) { + vcpu->arch.texasr = mfspr(SPRN_TEXASR); + vcpu->arch.tfhar = mfspr(SPRN_TFHAR); + vcpu->arch.tfiar = mfspr(SPRN_TFIAR); + + if (!vcpu->arch.nested) { + vcpu->arch.load_tm++; /* see load_ebb comment */ + if (!vcpu->arch.load_tm) + vcpu->arch.hfscr &= ~HFSCR_TM; + } + } + } +#endif +} +EXPORT_SYMBOL_GPL(store_vcpu_state); + +void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs) +{ + host_os_sprs->iamr = mfspr(SPRN_IAMR); + host_os_sprs->amr = mfspr(SPRN_AMR); +} +EXPORT_SYMBOL_GPL(save_p9_host_os_sprs); + +/* vcpu guest regs must already be saved */ +void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, + struct p9_host_os_sprs *host_os_sprs) +{ + /* + * current->thread.xxx registers must all be restored to host + * values before a potential context switch, othrewise the context + * switch itself will overwrite current->thread.xxx with the values + * from the guest SPRs. + */ + + mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso); + + if (cpu_has_feature(CPU_FTR_P9_TIDR) && + current->thread.tidr != vcpu->arch.tid) + mtspr(SPRN_TIDR, current->thread.tidr); + if (host_os_sprs->iamr != vcpu->arch.iamr) + mtspr(SPRN_IAMR, host_os_sprs->iamr); + if (vcpu->arch.uamor != 0) + mtspr(SPRN_UAMOR, 0); + if (host_os_sprs->amr != vcpu->arch.amr) + mtspr(SPRN_AMR, host_os_sprs->amr); + if (current->thread.fscr != vcpu->arch.fscr) + mtspr(SPRN_FSCR, current->thread.fscr); + if (current->thread.dscr != vcpu->arch.dscr) + mtspr(SPRN_DSCR, current->thread.dscr); + if (vcpu->arch.pspb != 0) + mtspr(SPRN_PSPB, 0); + + /* Save guest CTRL register, set runlatch to 1 */ + if (!(vcpu->arch.ctrl & 1)) + mtspr(SPRN_CTRLT, 1); + +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC) && + vcpu->arch.vrsave != current->thread.vrsave) + mtspr(SPRN_VRSAVE, current->thread.vrsave); +#endif + if (vcpu->arch.hfscr & HFSCR_EBB) { + if (vcpu->arch.bescr != current->thread.bescr) + mtspr(SPRN_BESCR, current->thread.bescr); + if (vcpu->arch.ebbhr != current->thread.ebbhr) + mtspr(SPRN_EBBHR, current->thread.ebbhr); + if (vcpu->arch.ebbrr != current->thread.ebbrr) + mtspr(SPRN_EBBRR, current->thread.ebbrr); + + if (!vcpu->arch.nested) { + /* + * This is like load_fp in context switching, turn off + * the facility after it wraps the u8 to try avoiding + * saving and restoring the registers each partition + * switch. + */ + vcpu->arch.load_ebb++; + if (!vcpu->arch.load_ebb) + vcpu->arch.hfscr &= ~HFSCR_EBB; + } + } + + if (vcpu->arch.tar != current->thread.tar) + mtspr(SPRN_TAR, current->thread.tar); +} +EXPORT_SYMBOL_GPL(restore_p9_host_os_sprs); + #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next) { @@ -56,10 +487,22 @@ static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator #define accumulate_time(vcpu, next) do {} while (0) #endif -static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev) +static inline u64 mfslbv(unsigned int idx) { - asm volatile("slbmfev %0,%1" : "=r" (*slbev) : "r" (idx)); - asm volatile("slbmfee %0,%1" : "=r" (*slbee) : "r" (idx)); + u64 slbev; + + asm volatile("slbmfev %0,%1" : "=r" (slbev) : "r" (idx)); + + return slbev; +} + +static inline u64 mfslbe(unsigned int idx) +{ + u64 slbee; + + asm volatile("slbmfee %0,%1" : "=r" (slbee) : "r" (idx)); + + return slbee; } static inline void mtslb(u64 slbee, u64 slbev) @@ -100,17 +543,19 @@ static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u6 lpid = nested ? nested->shadow_lpid : kvm->arch.lpid; /* - * All the isync()s are overkill but trivially follow the ISA - * requirements. Some can likely be replaced with justification - * comment for why they are not needed. + * Prior memory accesses to host PID Q3 must be completed before we + * start switching, and stores must be drained to avoid not-my-LPAR + * logic (see switch_mmu_to_host). */ + asm volatile("hwsync" ::: "memory"); isync(); mtspr(SPRN_LPID, lpid); - isync(); mtspr(SPRN_LPCR, lpcr); - isync(); mtspr(SPRN_PID, vcpu->arch.pid); - isync(); + /* + * isync not required here because we are HRFID'ing to guest before + * any guest context access, which is context synchronising. + */ } static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr) @@ -120,25 +565,41 @@ static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpid = kvm->arch.lpid; + /* + * See switch_mmu_to_guest_radix. ptesync should not be required here + * even if the host is in HPT mode because speculative accesses would + * not cause RC updates (we are in real mode). + */ + asm volatile("hwsync" ::: "memory"); + isync(); mtspr(SPRN_LPID, lpid); mtspr(SPRN_LPCR, lpcr); mtspr(SPRN_PID, vcpu->arch.pid); for (i = 0; i < vcpu->arch.slb_max; i++) mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv); - - isync(); + /* + * isync not required here, see switch_mmu_to_guest_radix. + */ } static void switch_mmu_to_host(struct kvm *kvm, u32 pid) { + /* + * The guest has exited, so guest MMU context is no longer being + * non-speculatively accessed, but a hwsync is needed before the + * mtLPIDR / mtPIDR switch, in order to ensure all stores are drained, + * so the not-my-LPAR tlbie logic does not overlook them. + */ + asm volatile("hwsync" ::: "memory"); isync(); mtspr(SPRN_PID, pid); - isync(); mtspr(SPRN_LPID, kvm->arch.host_lpid); - isync(); mtspr(SPRN_LPCR, kvm->arch.host_lpcr); - isync(); + /* + * isync is not required after the switch, because mtmsrd with L=0 + * is performed after this switch, which is context synchronising. + */ if (!radix_enabled()) slb_restore_bolted_realmode(); @@ -171,8 +632,10 @@ static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu) */ for (i = 0; i < vcpu->arch.slb_nr; i++) { u64 slbee, slbev; - mfslb(i, &slbee, &slbev); + + slbee = mfslbe(i); if (slbee & SLB_ESID_V) { + slbev = mfslbv(i); vcpu->arch.slb[nr].orige = slbee | i; vcpu->arch.slb[nr].origv = slbev; nr++; @@ -183,15 +646,128 @@ static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu) } } -int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr) +static void flush_guest_tlb(struct kvm *kvm) { + unsigned long rb, set; + + rb = PPC_BIT(52); /* IS = 2 */ + if (kvm_is_radix(kvm)) { + /* R=1 PRS=1 RIC=2 */ + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r" (rb), "i" (1), "i" (1), "i" (2), + "r" (0) : "memory"); + for (set = 1; set < kvm->arch.tlb_sets; ++set) { + rb += PPC_BIT(51); /* increment set number */ + /* R=1 PRS=1 RIC=0 */ + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r" (rb), "i" (1), "i" (1), "i" (0), + "r" (0) : "memory"); + } + asm volatile("ptesync": : :"memory"); + // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. + asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory"); + } else { + for (set = 0; set < kvm->arch.tlb_sets; ++set) { + /* R=0 PRS=0 RIC=0 */ + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r" (rb), "i" (0), "i" (0), "i" (0), + "r" (0) : "memory"); + rb += PPC_BIT(51); /* increment set number */ + } + asm volatile("ptesync": : :"memory"); + // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); + } +} + +static void check_need_tlb_flush(struct kvm *kvm, int pcpu, + struct kvm_nested_guest *nested) +{ + cpumask_t *need_tlb_flush; + bool all_set = true; + int i; + + if (nested) + need_tlb_flush = &nested->need_tlb_flush; + else + need_tlb_flush = &kvm->arch.need_tlb_flush; + + if (likely(!cpumask_test_cpu(pcpu, need_tlb_flush))) + return; + + /* + * Individual threads can come in here, but the TLB is shared between + * the 4 threads in a core, hence invalidating on one thread + * invalidates for all, so only invalidate the first time (if all bits + * were set. The others must still execute a ptesync. + * + * If a race occurs and two threads do the TLB flush, that is not a + * problem, just sub-optimal. + */ + for (i = cpu_first_tlb_thread_sibling(pcpu); + i <= cpu_last_tlb_thread_sibling(pcpu); + i += cpu_tlb_thread_sibling_step()) { + if (!cpumask_test_cpu(i, need_tlb_flush)) { + all_set = false; + break; + } + } + if (all_set) + flush_guest_tlb(kvm); + else + asm volatile("ptesync" ::: "memory"); + + /* Clear the bit after the TLB flush */ + cpumask_clear_cpu(pcpu, need_tlb_flush); +} + +unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr) +{ + unsigned long msr_needed = 0; + + msr &= ~MSR_EE; + + /* MSR bits may have been cleared by context switch so must recheck */ + if (IS_ENABLED(CONFIG_PPC_FPU)) + msr_needed |= MSR_FP; + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + msr_needed |= MSR_VEC; + if (cpu_has_feature(CPU_FTR_VSX)) + msr_needed |= MSR_VSX; + if ((cpu_has_feature(CPU_FTR_TM) || + cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) && + (vcpu->arch.hfscr & HFSCR_TM)) + msr_needed |= MSR_TM; + + /* + * This could be combined with MSR[RI] clearing, but that expands + * the unrecoverable window. It would be better to cover unrecoverable + * with KVM bad interrupt handling rather than use MSR[RI] at all. + * + * Much more difficult and less worthwhile to combine with IR/DR + * disable. + */ + if ((msr & msr_needed) != msr_needed) { + msr |= msr_needed; + __mtmsrd(msr, 0); + } else { + __hard_irq_disable(); + } + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + return msr; +} +EXPORT_SYMBOL_GPL(kvmppc_msr_hard_disable_set_facilities); + +int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb) +{ + struct p9_host_os_sprs host_os_sprs; struct kvm *kvm = vcpu->kvm; struct kvm_nested_guest *nested = vcpu->arch.nested; struct kvmppc_vcore *vc = vcpu->arch.vcore; - s64 hdec; - u64 tb, purr, spurr; + s64 hdec, dec; + u64 purr, spurr; u64 *exsave; - bool ri_set; int trap; unsigned long msr; unsigned long host_hfscr; @@ -199,11 +775,13 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc unsigned long host_dawr0; unsigned long host_dawrx0; unsigned long host_psscr; + unsigned long host_hpsscr; unsigned long host_pidr; unsigned long host_dawr1; unsigned long host_dawrx1; + unsigned long dpdes; - hdec = time_limit - mftb(); + hdec = time_limit - *tb; if (hdec < 0) return BOOK3S_INTERRUPT_HV_DECREMENTER; @@ -214,51 +792,84 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc vcpu->arch.ceded = 0; - if (vc->tb_offset) { - u64 new_tb = mftb() + vc->tb_offset; - mtspr(SPRN_TBU40, new_tb); - tb = mftb(); - if ((tb & 0xffffff) < (new_tb & 0xffffff)) - mtspr(SPRN_TBU40, new_tb + 0x1000000); - vc->tb_offset_applied = vc->tb_offset; - } - - msr = mfmsr(); + /* Save MSR for restore, with EE clear. */ + msr = mfmsr() & ~MSR_EE; host_hfscr = mfspr(SPRN_HFSCR); host_ciabr = mfspr(SPRN_CIABR); - host_dawr0 = mfspr(SPRN_DAWR0); - host_dawrx0 = mfspr(SPRN_DAWRX0); - host_psscr = mfspr(SPRN_PSSCR); + host_psscr = mfspr(SPRN_PSSCR_PR); + if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) + host_hpsscr = mfspr(SPRN_PSSCR); host_pidr = mfspr(SPRN_PID); - if (cpu_has_feature(CPU_FTR_DAWR1)) { - host_dawr1 = mfspr(SPRN_DAWR1); - host_dawrx1 = mfspr(SPRN_DAWRX1); - } - if (vc->pcr) - mtspr(SPRN_PCR, vc->pcr | PCR_MASK); - mtspr(SPRN_DPDES, vc->dpdes); - mtspr(SPRN_VTB, vc->vtb); + if (dawr_enabled()) { + host_dawr0 = mfspr(SPRN_DAWR0); + host_dawrx0 = mfspr(SPRN_DAWRX0); + if (cpu_has_feature(CPU_FTR_DAWR1)) { + host_dawr1 = mfspr(SPRN_DAWR1); + host_dawrx1 = mfspr(SPRN_DAWRX1); + } + } local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR); local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR); + + save_p9_host_os_sprs(&host_os_sprs); + + msr = kvmppc_msr_hard_disable_set_facilities(vcpu, msr); + if (lazy_irq_pending()) { + trap = 0; + goto out; + } + + if (unlikely(load_vcpu_state(vcpu, &host_os_sprs))) + msr = mfmsr(); /* MSR may have been updated */ + + if (vc->tb_offset) { + u64 new_tb = *tb + vc->tb_offset; + mtspr(SPRN_TBU40, new_tb); + if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) { + new_tb += 0x1000000; + mtspr(SPRN_TBU40, new_tb); + } + *tb = new_tb; + vc->tb_offset_applied = vc->tb_offset; + } + + mtspr(SPRN_VTB, vc->vtb); mtspr(SPRN_PURR, vcpu->arch.purr); mtspr(SPRN_SPURR, vcpu->arch.spurr); + if (vc->pcr) + mtspr(SPRN_PCR, vc->pcr | PCR_MASK); + if (vcpu->arch.doorbell_request) { + vcpu->arch.doorbell_request = 0; + mtspr(SPRN_DPDES, 1); + } + if (dawr_enabled()) { - mtspr(SPRN_DAWR0, vcpu->arch.dawr0); - mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0); + if (vcpu->arch.dawr0 != host_dawr0) + mtspr(SPRN_DAWR0, vcpu->arch.dawr0); + if (vcpu->arch.dawrx0 != host_dawrx0) + mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0); if (cpu_has_feature(CPU_FTR_DAWR1)) { - mtspr(SPRN_DAWR1, vcpu->arch.dawr1); - mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1); + if (vcpu->arch.dawr1 != host_dawr1) + mtspr(SPRN_DAWR1, vcpu->arch.dawr1); + if (vcpu->arch.dawrx1 != host_dawrx1) + mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1); } } - mtspr(SPRN_CIABR, vcpu->arch.ciabr); - mtspr(SPRN_IC, vcpu->arch.ic); + if (vcpu->arch.ciabr != host_ciabr) + mtspr(SPRN_CIABR, vcpu->arch.ciabr); - mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC | - (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); + + if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) { + mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC | + (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); + } else { + if (vcpu->arch.psscr != host_psscr) + mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr); + } mtspr(SPRN_HFSCR, vcpu->arch.hfscr); @@ -276,18 +887,34 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc * HDSI which should correctly update the HDSISR the second time HDSI * entry. * - * Just do this on all p9 processors for now. + * The "radix prefetch bug" test can be used to test for this bug, as + * it also exists fo DD2.1 and below. */ - mtspr(SPRN_HDSISR, HDSISR_CANARY); + if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) + mtspr(SPRN_HDSISR, HDSISR_CANARY); mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0); mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1); mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2); mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3); - mtspr(SPRN_AMOR, ~0UL); + /* + * It might be preferable to load_vcpu_state here, in order to get the + * GPR/FP register loads executing in parallel with the previous mtSPR + * instructions, but for now that can't be done because the TM handling + * in load_vcpu_state can change some SPRs and vcpu state (nip, msr). + * But TM could be split out if this would be a significant benefit. + */ - local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_P9; + /* + * MSR[RI] does not need to be cleared (and is not, for radix guests + * with no prefetch bug), because in_guest is set. If we take a SRESET + * or MCE with in_guest set but still in HV mode, then + * kvmppc_p9_bad_interrupt handles the interrupt, which effectively + * clears MSR[RI] and doesn't return. + */ + WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_HV_P9); + barrier(); /* Open in_guest critical section */ /* * Hash host, hash guest, or radix guest with prefetch bug, all have @@ -299,17 +926,13 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc save_clear_host_mmu(kvm); - if (kvm_is_radix(kvm)) { + if (kvm_is_radix(kvm)) switch_mmu_to_guest_radix(kvm, vcpu, lpcr); - if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) - __mtmsrd(0, 1); /* clear RI */ - - } else { + else switch_mmu_to_guest_hpt(kvm, vcpu, lpcr); - } /* TLBIEL uses LPID=LPIDR, so run this after setting guest LPID */ - kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested); + check_need_tlb_flush(kvm, vc->pcpu, nested); /* * P9 suppresses the HDEC exception when LPCR[HDICE] = 0, @@ -317,6 +940,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc */ mtspr(SPRN_HDEC, hdec); + mtspr(SPRN_DEC, vcpu->arch.dec_expires - *tb); + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_return_to_guest: #endif @@ -327,7 +952,9 @@ tm_return_to_guest: accumulate_time(vcpu, &vcpu->arch.guest_time); + switch_pmu_to_guest(vcpu, &host_os_sprs); kvmppc_p9_enter_guest(vcpu); + switch_pmu_to_host(vcpu, &host_os_sprs); accumulate_time(vcpu, &vcpu->arch.rm_intr); @@ -340,36 +967,27 @@ tm_return_to_guest: /* 0x2 bit for HSRR is only used by PR and P7/8 HV paths, clear it */ trap = local_paca->kvm_hstate.scratch0 & ~0x2; - /* HSRR interrupts leave MSR[RI] unchanged, SRR interrupts clear it. */ - ri_set = false; - if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) { - if (trap != BOOK3S_INTERRUPT_SYSCALL && - (vcpu->arch.shregs.msr & MSR_RI)) - ri_set = true; + if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) exsave = local_paca->exgen; - } else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) { + else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) exsave = local_paca->exnmi; - } else { /* trap == 0x200 */ + else /* trap == 0x200 */ exsave = local_paca->exmc; - } vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1; vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2; /* - * Only set RI after reading machine check regs (DAR, DSISR, SRR0/1) - * and hstate scratch (which we need to move into exsave to make - * re-entrant vs SRESET/MCE) + * After reading machine check regs (DAR, DSISR, SRR0/1) and hstate + * scratch (which we need to move into exsave to make re-entrant vs + * SRESET/MCE), register state is protected from reentrancy. However + * timebase, MMU, among other state is still set to guest, so don't + * enable MSR[RI] here. It gets enabled at the end, after in_guest + * is cleared. + * + * It is possible an NMI could come in here, which is why it is + * important to save the above state early so it can be debugged. */ - if (ri_set) { - if (unlikely(!(mfmsr() & MSR_RI))) { - __mtmsrd(MSR_RI, 1); - WARN_ON_ONCE(1); - } - } else { - WARN_ON_ONCE(mfmsr() & MSR_RI); - __mtmsrd(MSR_RI, 1); - } vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)]; vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)]; @@ -388,7 +1006,7 @@ tm_return_to_guest: kvmppc_realmode_machine_check(vcpu); } else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) { - kvmppc_realmode_hmi_handler(); + kvmppc_p9_realmode_hmi_handler(vcpu); } else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) { vcpu->arch.emul_inst = mfspr(SPRN_HEIR); @@ -427,13 +1045,6 @@ tm_return_to_guest: */ mtspr(SPRN_HSRR0, vcpu->arch.regs.nip); mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr); - - /* - * tm_return_to_guest re-loads SRR0/1, DAR, - * DSISR after RI is cleared, in case they had - * been clobbered by a MCE. - */ - __mtmsrd(0, 1); /* clear RI */ goto tm_return_to_guest; } } @@ -445,81 +1056,109 @@ tm_return_to_guest: /* Advance host PURR/SPURR by the amount used by guest */ purr = mfspr(SPRN_PURR); spurr = mfspr(SPRN_SPURR); - mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr + - purr - vcpu->arch.purr); - mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr + - spurr - vcpu->arch.spurr); + local_paca->kvm_hstate.host_purr += purr - vcpu->arch.purr; + local_paca->kvm_hstate.host_spurr += spurr - vcpu->arch.spurr; vcpu->arch.purr = purr; vcpu->arch.spurr = spurr; vcpu->arch.ic = mfspr(SPRN_IC); vcpu->arch.pid = mfspr(SPRN_PID); - vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS; + vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR); vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0); vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1); vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2); vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3); - /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */ - mtspr(SPRN_PSSCR, host_psscr | - (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); - mtspr(SPRN_HFSCR, host_hfscr); - mtspr(SPRN_CIABR, host_ciabr); - mtspr(SPRN_DAWR0, host_dawr0); - mtspr(SPRN_DAWRX0, host_dawrx0); - if (cpu_has_feature(CPU_FTR_DAWR1)) { - mtspr(SPRN_DAWR1, host_dawr1); - mtspr(SPRN_DAWRX1, host_dawrx1); - } - - if (kvm_is_radix(kvm)) { - /* - * Since this is radix, do a eieio; tlbsync; ptesync sequence - * in case we interrupted the guest between a tlbie and a - * ptesync. - */ - asm volatile("eieio; tlbsync; ptesync"); - } + dpdes = mfspr(SPRN_DPDES); + if (dpdes) + vcpu->arch.doorbell_request = 1; - /* - * cp_abort is required if the processor supports local copy-paste - * to clear the copy buffer that was under control of the guest. - */ - if (cpu_has_feature(CPU_FTR_ARCH_31)) - asm volatile(PPC_CP_ABORT); - - vc->dpdes = mfspr(SPRN_DPDES); vc->vtb = mfspr(SPRN_VTB); - mtspr(SPRN_DPDES, 0); - if (vc->pcr) - mtspr(SPRN_PCR, PCR_MASK); + + dec = mfspr(SPRN_DEC); + if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */ + dec = (s32) dec; + *tb = mftb(); + vcpu->arch.dec_expires = dec + *tb; if (vc->tb_offset_applied) { - u64 new_tb = mftb() - vc->tb_offset_applied; + u64 new_tb = *tb - vc->tb_offset_applied; mtspr(SPRN_TBU40, new_tb); - tb = mftb(); - if ((tb & 0xffffff) < (new_tb & 0xffffff)) - mtspr(SPRN_TBU40, new_tb + 0x1000000); + if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) { + new_tb += 0x1000000; + mtspr(SPRN_TBU40, new_tb); + } + *tb = new_tb; vc->tb_offset_applied = 0; } - mtspr(SPRN_HDEC, 0x7fffffff); - save_clear_guest_mmu(kvm, vcpu); switch_mmu_to_host(kvm, host_pidr); - local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE; /* - * If we are in real mode, only switch MMU on after the MMU is - * switched to host, to avoid the P9_RADIX_PREFETCH_BUG. + * Enable MSR here in order to have facilities enabled to save + * guest registers. This enables MMU (if we were in realmode), so + * only switch MMU on after the MMU is switched to host, to avoid + * the P9_RADIX_PREFETCH_BUG or hash guest context. */ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && - vcpu->arch.shregs.msr & MSR_TS_MASK) + vcpu->arch.shregs.msr & MSR_TS_MASK) msr |= MSR_TS_S; - __mtmsrd(msr, 0); + store_vcpu_state(vcpu); + + mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr); + mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr); + + if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) { + /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */ + mtspr(SPRN_PSSCR, host_hpsscr | + (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); + } + + mtspr(SPRN_HFSCR, host_hfscr); + if (vcpu->arch.ciabr != host_ciabr) + mtspr(SPRN_CIABR, host_ciabr); + + if (dawr_enabled()) { + if (vcpu->arch.dawr0 != host_dawr0) + mtspr(SPRN_DAWR0, host_dawr0); + if (vcpu->arch.dawrx0 != host_dawrx0) + mtspr(SPRN_DAWRX0, host_dawrx0); + if (cpu_has_feature(CPU_FTR_DAWR1)) { + if (vcpu->arch.dawr1 != host_dawr1) + mtspr(SPRN_DAWR1, host_dawr1); + if (vcpu->arch.dawrx1 != host_dawrx1) + mtspr(SPRN_DAWRX1, host_dawrx1); + } + } + + if (dpdes) + mtspr(SPRN_DPDES, 0); + if (vc->pcr) + mtspr(SPRN_PCR, PCR_MASK); + + /* HDEC must be at least as large as DEC, so decrementer_max fits */ + mtspr(SPRN_HDEC, decrementer_max); + + timer_rearm_host_dec(*tb); + + restore_p9_host_os_sprs(vcpu, &host_os_sprs); + + barrier(); /* Close in_guest critical section */ + WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_NONE); + /* Interrupts are recoverable at this point */ + + /* + * cp_abort is required if the processor supports local copy-paste + * to clear the copy buffer that was under control of the guest. + */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + asm volatile(PPC_CP_ABORT); + +out: end_timing(vcpu); return trap; diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index d4bca93b79f6..ccfd96965630 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -136,6 +136,60 @@ void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) vcpu->arch.mce_evt = mce_evt; } + +long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcore *vc = vcpu->arch.vcore; + long ret = 0; + + /* + * Unapply and clear the offset first. That way, if the TB was not + * resynced then it will remain in host-offset, and if it was resynced + * then it is brought into host-offset. Then the tb offset is + * re-applied before continuing with the KVM exit. + * + * This way, we don't need to actually know whether not OPAL resynced + * the timebase or do any of the complicated dance that the P7/8 + * path requires. + */ + if (vc->tb_offset_applied) { + u64 new_tb = mftb() - vc->tb_offset_applied; + mtspr(SPRN_TBU40, new_tb); + if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) { + new_tb += 0x1000000; + mtspr(SPRN_TBU40, new_tb); + } + vc->tb_offset_applied = 0; + } + + local_paca->hmi_irqs++; + + if (hmi_handle_debugtrig(NULL) >= 0) { + ret = 1; + goto out; + } + + if (ppc_md.hmi_exception_early) + ppc_md.hmi_exception_early(NULL); + +out: + if (vc->tb_offset) { + u64 new_tb = mftb() + vc->tb_offset; + mtspr(SPRN_TBU40, new_tb); + if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) { + new_tb += 0x1000000; + mtspr(SPRN_TBU40, new_tb); + } + vc->tb_offset_applied = vc->tb_offset; + } + + return ret; +} + +/* + * The following subcore HMI handling is all only for pre-POWER9 CPUs. + */ + /* Check if dynamic split is in force and return subcore size accordingly. */ static inline int kvmppc_cur_subcore_size(void) { diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 2c1f3c6e72d1..2257fb18cb72 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -55,12 +55,6 @@ static int global_invalidates(struct kvm *kvm) smp_wmb(); cpumask_setall(&kvm->arch.need_tlb_flush); cpu = local_paca->kvm_hstate.kvm_vcore->pcpu; - /* - * On POWER9, threads are independent but the TLB is shared, - * so use the bit for the first thread to represent the core. - */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) - cpu = cpu_first_tlb_thread_sibling(cpu); cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush); } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 32a4b4d412b9..d185dee26026 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -778,17 +778,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) /* Restore AMR and UAMOR, set AMOR to all 1s */ ld r5,VCPU_AMR(r4) ld r6,VCPU_UAMOR(r4) - li r7,-1 mtspr SPRN_AMR,r5 mtspr SPRN_UAMOR,r6 - mtspr SPRN_AMOR,r7 - /* Restore state of CTRL run bit; assume 1 on entry */ + /* Restore state of CTRL run bit; the host currently has it set to 1 */ lwz r5,VCPU_CTRL(r4) andi. r5,r5,1 bne 4f - mfspr r6,SPRN_CTRLF - clrrdi r6,r6,1 + li r6,0 mtspr SPRN_CTRLT,r6 4: /* Secondary threads wait for primary to have done partition switch */ @@ -817,10 +814,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) * Set the decrementer to the guest decrementer. */ ld r8,VCPU_DEC_EXPIRES(r4) - /* r8 is a host timebase value here, convert to guest TB */ - ld r5,HSTATE_KVM_VCORE(r13) - ld r6,VCORE_TB_OFFSET_APPL(r5) - add r8,r8,r6 mftb r7 subf r3,r7,r8 mtspr SPRN_DEC,r3 @@ -1195,9 +1188,6 @@ guest_bypass: mftb r6 extsw r5,r5 16: add r5,r5,r6 - /* r5 is a guest timebase value here, convert to host TB */ - ld r4,VCORE_TB_OFFSET_APPL(r3) - subf r5,r4,r5 std r5,VCPU_DEC_EXPIRES(r9) /* Increment exit count, poke other threads to exit */ @@ -1211,12 +1201,12 @@ guest_bypass: stw r0, VCPU_CPU(r9) stw r0, VCPU_THREAD_CPU(r9) - /* Save guest CTRL register, set runlatch to 1 */ + /* Save guest CTRL register, set runlatch to 1 if it was clear */ mfspr r6,SPRN_CTRLF stw r6,VCPU_CTRL(r9) andi. r0,r6,1 bne 4f - ori r6,r6,1 + li r6,1 mtspr SPRN_CTRLT,r6 4: /* @@ -2163,9 +2153,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) /* save expiry time of guest decrementer */ add r3, r3, r5 ld r4, HSTATE_KVM_VCPU(r13) - ld r5, HSTATE_KVM_VCORE(r13) - ld r6, VCORE_TB_OFFSET_APPL(r5) - subf r3, r6, r3 /* convert to host TB value */ std r3, VCPU_DEC_EXPIRES(r4) #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING @@ -2186,8 +2173,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) * Also clear the runlatch bit before napping. */ kvm_do_nap: - mfspr r0, SPRN_CTRLF - clrrdi r0, r0, 1 + li r0,0 mtspr SPRN_CTRLT, r0 li r0,1 @@ -2206,8 +2192,7 @@ kvm_nap_sequence: /* desired LPCR value in r5 */ bl isa206_idle_insn_mayloss - mfspr r0, SPRN_CTRLF - ori r0, r0, 1 + li r0,1 mtspr SPRN_CTRLT, r0 mtspr SPRN_SRR1, r3 @@ -2264,9 +2249,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) /* Restore guest decrementer */ ld r3, VCPU_DEC_EXPIRES(r4) - ld r5, HSTATE_KVM_VCORE(r13) - ld r6, VCORE_TB_OFFSET_APPL(r5) - add r3, r3, r6 /* convert host TB to guest TB value */ mftb r7 subf r3, r7, r3 mtspr SPRN_DEC, r3 @@ -2711,8 +2693,7 @@ kvmppc_bad_host_intr: std r0, GPR0(r1) std r9, GPR1(r1) std r2, GPR2(r1) - SAVE_4GPRS(3, r1) - SAVE_2GPRS(7, r1) + SAVE_GPRS(3, 8, r1) srdi r0, r12, 32 clrldi r12, r12, 32 std r0, _CCR(r1) @@ -2735,7 +2716,7 @@ kvmppc_bad_host_intr: ld r9, HSTATE_SCRATCH2(r13) ld r12, HSTATE_SCRATCH0(r13) GET_SCRATCH0(r0) - SAVE_4GPRS(9, r1) + SAVE_GPRS(9, 12, r1) std r0, GPR13(r1) SAVE_NVGPRS(r1) ld r5, HSTATE_CFAR(r13) @@ -2778,10 +2759,11 @@ kvmppc_msr_interrupt: blr /* + * void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu) + * * Load up guest PMU state. R3 points to the vcpu struct. */ -_GLOBAL(kvmhv_load_guest_pmu) -EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu) +kvmhv_load_guest_pmu: mr r4, r3 mflr r0 li r3, 1 @@ -2816,26 +2798,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) mtspr SPRN_SIAR, r7 mtspr SPRN_SDAR, r8 BEGIN_FTR_SECTION - ld r5, VCPU_MMCR + 24(r4) - ld r6, VCPU_SIER + 8(r4) - ld r7, VCPU_SIER + 16(r4) - mtspr SPRN_MMCR3, r5 - mtspr SPRN_SIER2, r6 - mtspr SPRN_SIER3, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) -BEGIN_FTR_SECTION ld r5, VCPU_MMCR + 16(r4) ld r6, VCPU_SIER(r4) mtspr SPRN_MMCR2, r5 mtspr SPRN_SIER, r6 -BEGIN_FTR_SECTION_NESTED(96) lwz r7, VCPU_PMC + 24(r4) lwz r8, VCPU_PMC + 28(r4) ld r9, VCPU_MMCRS(r4) mtspr SPRN_SPMC1, r7 mtspr SPRN_SPMC2, r8 mtspr SPRN_MMCRS, r9 -END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtspr SPRN_MMCR0, r3 isync @@ -2843,10 +2815,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) blr /* + * void kvmhv_load_host_pmu(void) + * * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu. */ -_GLOBAL(kvmhv_load_host_pmu) -EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu) +kvmhv_load_host_pmu: mflr r0 lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */ cmpwi r4, 0 @@ -2884,25 +2857,18 @@ BEGIN_FTR_SECTION mtspr SPRN_MMCR2, r8 mtspr SPRN_SIER, r9 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) -BEGIN_FTR_SECTION - ld r5, HSTATE_MMCR3(r13) - ld r6, HSTATE_SIER2(r13) - ld r7, HSTATE_SIER3(r13) - mtspr SPRN_MMCR3, r5 - mtspr SPRN_SIER2, r6 - mtspr SPRN_SIER3, r7 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) mtspr SPRN_MMCR0, r3 isync mtlr r0 23: blr /* + * void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use) + * * Save guest PMU state into the vcpu struct. * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA) */ -_GLOBAL(kvmhv_save_guest_pmu) -EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu) +kvmhv_save_guest_pmu: mr r9, r3 mr r8, r4 BEGIN_FTR_SECTION @@ -2951,14 +2917,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) BEGIN_FTR_SECTION std r10, VCPU_MMCR + 16(r9) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) -BEGIN_FTR_SECTION - mfspr r5, SPRN_MMCR3 - mfspr r6, SPRN_SIER2 - mfspr r7, SPRN_SIER3 - std r5, VCPU_MMCR + 24(r9) - std r6, VCPU_SIER + 8(r9) - std r7, VCPU_SIER + 16(r9) -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) std r7, VCPU_SIAR(r9) std r8, VCPU_SDAR(r9) mfspr r3, SPRN_PMC1 @@ -2976,7 +2934,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31) BEGIN_FTR_SECTION mfspr r5, SPRN_SIER std r5, VCPU_SIER(r9) -BEGIN_FTR_SECTION_NESTED(96) mfspr r6, SPRN_SPMC1 mfspr r7, SPRN_SPMC2 mfspr r8, SPRN_MMCRS @@ -2985,7 +2942,6 @@ BEGIN_FTR_SECTION_NESTED(96) std r8, VCPU_MMCRS(r9) lis r4, 0x8000 mtspr SPRN_MMCRS, r4 -END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 22: blr diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 9e5d0f413b71..5d1881d2e39a 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -19,7 +19,12 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING endif -obj-y += alloc.o code-patching.o feature-fixups.o pmem.o test_code-patching.o +CFLAGS_code-patching.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) +CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + +obj-y += alloc.o code-patching.o feature-fixups.o pmem.o + +obj-$(CONFIG_CODE_PATCHING_SELFTEST) += test-code-patching.o ifndef CONFIG_KASAN obj-y += string.o memcmp_$(BITS).o diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index c5ed98823835..906d43463366 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -3,22 +3,18 @@ * Copyright 2008 Michael Ellerman, IBM Corporation. */ -#include <linux/kernel.h> #include <linux/kprobes.h> #include <linux/vmalloc.h> #include <linux/init.h> -#include <linux/mm.h> #include <linux/cpuhotplug.h> -#include <linux/slab.h> #include <linux/uaccess.h> #include <asm/tlbflush.h> #include <asm/page.h> #include <asm/code-patching.h> -#include <asm/setup.h> #include <asm/inst.h> -static int __patch_instruction(u32 *exec_addr, struct ppc_inst instr, u32 *patch_addr) +static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr) { if (!ppc_inst_prefixed(instr)) { u32 val = ppc_inst_val(instr); @@ -39,7 +35,7 @@ failed: return -EFAULT; } -int raw_patch_instruction(u32 *addr, struct ppc_inst instr) +int raw_patch_instruction(u32 *addr, ppc_inst_t instr) { return __patch_instruction(addr, instr, addr); } @@ -86,23 +82,16 @@ void __init poking_init(void) static int map_patch_area(void *addr, unsigned long text_poke_addr) { unsigned long pfn; - int err; if (is_vmalloc_or_module_addr(addr)) pfn = vmalloc_to_pfn(addr); else pfn = __pa_symbol(addr) >> PAGE_SHIFT; - err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL); - - pr_devel("Mapped addr %lx with pfn %lx:%d\n", text_poke_addr, pfn, err); - if (err) - return -1; - - return 0; + return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL); } -static inline int unmap_patch_area(unsigned long addr) +static void unmap_patch_area(unsigned long addr) { pte_t *ptep; pmd_t *pmdp; @@ -111,43 +100,56 @@ static inline int unmap_patch_area(unsigned long addr) pgd_t *pgdp; pgdp = pgd_offset_k(addr); - if (unlikely(!pgdp)) - return -EINVAL; + if (WARN_ON(pgd_none(*pgdp))) + return; p4dp = p4d_offset(pgdp, addr); - if (unlikely(!p4dp)) - return -EINVAL; + if (WARN_ON(p4d_none(*p4dp))) + return; pudp = pud_offset(p4dp, addr); - if (unlikely(!pudp)) - return -EINVAL; + if (WARN_ON(pud_none(*pudp))) + return; pmdp = pmd_offset(pudp, addr); - if (unlikely(!pmdp)) - return -EINVAL; + if (WARN_ON(pmd_none(*pmdp))) + return; ptep = pte_offset_kernel(pmdp, addr); - if (unlikely(!ptep)) - return -EINVAL; - - pr_devel("clearing mm %p, pte %p, addr %lx\n", &init_mm, ptep, addr); + if (WARN_ON(pte_none(*ptep))) + return; /* * In hash, pte_clear flushes the tlb, in radix, we have to */ pte_clear(&init_mm, addr, ptep); flush_tlb_kernel_range(addr, addr + PAGE_SIZE); +} - return 0; +static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) +{ + int err; + u32 *patch_addr; + unsigned long text_poke_addr; + + text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr; + patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); + + err = map_patch_area(addr, text_poke_addr); + if (err) + return err; + + err = __patch_instruction(addr, instr, patch_addr); + + unmap_patch_area(text_poke_addr); + + return err; } -static int do_patch_instruction(u32 *addr, struct ppc_inst instr) +static int do_patch_instruction(u32 *addr, ppc_inst_t instr) { int err; - u32 *patch_addr = NULL; unsigned long flags; - unsigned long text_poke_addr; - unsigned long kaddr = (unsigned long)addr; /* * During early early boot patch_instruction is called @@ -158,51 +160,37 @@ static int do_patch_instruction(u32 *addr, struct ppc_inst instr) return raw_patch_instruction(addr, instr); local_irq_save(flags); - - text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr; - if (map_patch_area(addr, text_poke_addr)) { - err = -1; - goto out; - } - - patch_addr = (u32 *)(text_poke_addr + (kaddr & ~PAGE_MASK)); - - __patch_instruction(addr, instr, patch_addr); - - err = unmap_patch_area(text_poke_addr); - if (err) - pr_warn("failed to unmap %lx\n", text_poke_addr); - -out: + err = __do_patch_instruction(addr, instr); local_irq_restore(flags); return err; } #else /* !CONFIG_STRICT_KERNEL_RWX */ -static int do_patch_instruction(u32 *addr, struct ppc_inst instr) +static int do_patch_instruction(u32 *addr, ppc_inst_t instr) { return raw_patch_instruction(addr, instr); } #endif /* CONFIG_STRICT_KERNEL_RWX */ -int patch_instruction(u32 *addr, struct ppc_inst instr) +int patch_instruction(u32 *addr, ppc_inst_t instr) { /* Make sure we aren't patching a freed init section */ - if (init_mem_is_free && init_section_contains(addr, 4)) { - pr_debug("Skipping init section patching addr: 0x%px\n", addr); + if (system_state >= SYSTEM_FREEING_INITMEM && init_section_contains(addr, 4)) return 0; - } + return do_patch_instruction(addr, instr); } NOKPROBE_SYMBOL(patch_instruction); int patch_branch(u32 *addr, unsigned long target, int flags) { - struct ppc_inst instr; + ppc_inst_t instr; + + if (create_branch(&instr, addr, target, flags)) + return -ERANGE; - create_branch(&instr, addr, target, flags); return patch_instruction(addr, instr); } @@ -237,7 +225,7 @@ bool is_offset_in_cond_branch_range(long offset) * Helper to check if a given instruction is a conditional branch * Derived from the conditional checks in analyse_instr() */ -bool is_conditional_branch(struct ppc_inst instr) +bool is_conditional_branch(ppc_inst_t instr) { unsigned int opcode = ppc_inst_primary_opcode(instr); @@ -255,7 +243,7 @@ bool is_conditional_branch(struct ppc_inst instr) } NOKPROBE_SYMBOL(is_conditional_branch); -int create_branch(struct ppc_inst *instr, const u32 *addr, +int create_branch(ppc_inst_t *instr, const u32 *addr, unsigned long target, int flags) { long offset; @@ -275,7 +263,7 @@ int create_branch(struct ppc_inst *instr, const u32 *addr, return 0; } -int create_cond_branch(struct ppc_inst *instr, const u32 *addr, +int create_cond_branch(ppc_inst_t *instr, const u32 *addr, unsigned long target, int flags) { long offset; @@ -294,22 +282,7 @@ int create_cond_branch(struct ppc_inst *instr, const u32 *addr, return 0; } -static unsigned int branch_opcode(struct ppc_inst instr) -{ - return ppc_inst_primary_opcode(instr) & 0x3F; -} - -static int instr_is_branch_iform(struct ppc_inst instr) -{ - return branch_opcode(instr) == 18; -} - -static int instr_is_branch_bform(struct ppc_inst instr) -{ - return branch_opcode(instr) == 16; -} - -int instr_is_relative_branch(struct ppc_inst instr) +int instr_is_relative_branch(ppc_inst_t instr) { if (ppc_inst_val(instr) & BRANCH_ABSOLUTE) return 0; @@ -317,7 +290,7 @@ int instr_is_relative_branch(struct ppc_inst instr) return instr_is_branch_iform(instr) || instr_is_branch_bform(instr); } -int instr_is_relative_link_branch(struct ppc_inst instr) +int instr_is_relative_link_branch(ppc_inst_t instr) { return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK); } @@ -364,7 +337,7 @@ unsigned long branch_target(const u32 *instr) return 0; } -int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src) +int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src) { unsigned long target; target = branch_target(src); @@ -378,375 +351,3 @@ int translate_branch(struct ppc_inst *instr, const u32 *dest, const u32 *src) return 1; } - -#ifdef CONFIG_PPC_BOOK3E_64 -void __patch_exception(int exc, unsigned long addr) -{ - extern unsigned int interrupt_base_book3e; - unsigned int *ibase = &interrupt_base_book3e; - - /* Our exceptions vectors start with a NOP and -then- a branch - * to deal with single stepping from userspace which stops on - * the second instruction. Thus we need to patch the second - * instruction of the exception, not the first one - */ - - patch_branch(ibase + (exc / 4) + 1, addr, 0); -} -#endif - -#ifdef CONFIG_CODE_PATCHING_SELFTEST - -static int instr_is_branch_to_addr(const u32 *instr, unsigned long addr) -{ - if (instr_is_branch_iform(ppc_inst_read(instr)) || - instr_is_branch_bform(ppc_inst_read(instr))) - return branch_target(instr) == addr; - - return 0; -} - -static void __init test_trampoline(void) -{ - asm ("nop;\n"); -} - -#define check(x) \ - if (!(x)) printk("code-patching: test failed at line %d\n", __LINE__); - -static void __init test_branch_iform(void) -{ - int err; - struct ppc_inst instr; - u32 tmp[2]; - u32 *iptr = tmp; - unsigned long addr = (unsigned long)tmp; - - /* The simplest case, branch to self, no flags */ - check(instr_is_branch_iform(ppc_inst(0x48000000))); - /* All bits of target set, and flags */ - check(instr_is_branch_iform(ppc_inst(0x4bffffff))); - /* High bit of opcode set, which is wrong */ - check(!instr_is_branch_iform(ppc_inst(0xcbffffff))); - /* Middle bits of opcode set, which is wrong */ - check(!instr_is_branch_iform(ppc_inst(0x7bffffff))); - - /* Simplest case, branch to self with link */ - check(instr_is_branch_iform(ppc_inst(0x48000001))); - /* All bits of targets set */ - check(instr_is_branch_iform(ppc_inst(0x4bfffffd))); - /* Some bits of targets set */ - check(instr_is_branch_iform(ppc_inst(0x4bff00fd))); - /* Must be a valid branch to start with */ - check(!instr_is_branch_iform(ppc_inst(0x7bfffffd))); - - /* Absolute branch to 0x100 */ - patch_instruction(iptr, ppc_inst(0x48000103)); - check(instr_is_branch_to_addr(iptr, 0x100)); - /* Absolute branch to 0x420fc */ - patch_instruction(iptr, ppc_inst(0x480420ff)); - check(instr_is_branch_to_addr(iptr, 0x420fc)); - /* Maximum positive relative branch, + 20MB - 4B */ - patch_instruction(iptr, ppc_inst(0x49fffffc)); - check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC)); - /* Smallest negative relative branch, - 4B */ - patch_instruction(iptr, ppc_inst(0x4bfffffc)); - check(instr_is_branch_to_addr(iptr, addr - 4)); - /* Largest negative relative branch, - 32 MB */ - patch_instruction(iptr, ppc_inst(0x4a000000)); - check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); - - /* Branch to self, with link */ - err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr)); - - /* Branch to self - 0x100, with link */ - err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr - 0x100)); - - /* Branch to self + 0x100, no link */ - err = create_branch(&instr, iptr, addr + 0x100, 0); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr + 0x100)); - - /* Maximum relative negative offset, - 32 MB */ - err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); - - /* Out of range relative negative offset, - 32 MB + 4*/ - err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK); - check(err); - - /* Out of range relative positive offset, + 32 MB */ - err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK); - check(err); - - /* Unaligned target */ - err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK); - check(err); - - /* Check flags are masked correctly */ - err = create_branch(&instr, iptr, addr, 0xFFFFFFFC); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr)); - check(ppc_inst_equal(instr, ppc_inst(0x48000000))); -} - -static void __init test_create_function_call(void) -{ - u32 *iptr; - unsigned long dest; - struct ppc_inst instr; - - /* Check we can create a function call */ - iptr = (u32 *)ppc_function_entry(test_trampoline); - dest = ppc_function_entry(test_create_function_call); - create_branch(&instr, iptr, dest, BRANCH_SET_LINK); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, dest)); -} - -static void __init test_branch_bform(void) -{ - int err; - unsigned long addr; - struct ppc_inst instr; - u32 tmp[2]; - u32 *iptr = tmp; - unsigned int flags; - - addr = (unsigned long)iptr; - - /* The simplest case, branch to self, no flags */ - check(instr_is_branch_bform(ppc_inst(0x40000000))); - /* All bits of target set, and flags */ - check(instr_is_branch_bform(ppc_inst(0x43ffffff))); - /* High bit of opcode set, which is wrong */ - check(!instr_is_branch_bform(ppc_inst(0xc3ffffff))); - /* Middle bits of opcode set, which is wrong */ - check(!instr_is_branch_bform(ppc_inst(0x7bffffff))); - - /* Absolute conditional branch to 0x100 */ - patch_instruction(iptr, ppc_inst(0x43ff0103)); - check(instr_is_branch_to_addr(iptr, 0x100)); - /* Absolute conditional branch to 0x20fc */ - patch_instruction(iptr, ppc_inst(0x43ff20ff)); - check(instr_is_branch_to_addr(iptr, 0x20fc)); - /* Maximum positive relative conditional branch, + 32 KB - 4B */ - patch_instruction(iptr, ppc_inst(0x43ff7ffc)); - check(instr_is_branch_to_addr(iptr, addr + 0x7FFC)); - /* Smallest negative relative conditional branch, - 4B */ - patch_instruction(iptr, ppc_inst(0x43fffffc)); - check(instr_is_branch_to_addr(iptr, addr - 4)); - /* Largest negative relative conditional branch, - 32 KB */ - patch_instruction(iptr, ppc_inst(0x43ff8000)); - check(instr_is_branch_to_addr(iptr, addr - 0x8000)); - - /* All condition code bits set & link */ - flags = 0x3ff000 | BRANCH_SET_LINK; - - /* Branch to self */ - err = create_cond_branch(&instr, iptr, addr, flags); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr)); - - /* Branch to self - 0x100 */ - err = create_cond_branch(&instr, iptr, addr - 0x100, flags); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr - 0x100)); - - /* Branch to self + 0x100 */ - err = create_cond_branch(&instr, iptr, addr + 0x100, flags); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr + 0x100)); - - /* Maximum relative negative offset, - 32 KB */ - err = create_cond_branch(&instr, iptr, addr - 0x8000, flags); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr - 0x8000)); - - /* Out of range relative negative offset, - 32 KB + 4*/ - err = create_cond_branch(&instr, iptr, addr - 0x8004, flags); - check(err); - - /* Out of range relative positive offset, + 32 KB */ - err = create_cond_branch(&instr, iptr, addr + 0x8000, flags); - check(err); - - /* Unaligned target */ - err = create_cond_branch(&instr, iptr, addr + 3, flags); - check(err); - - /* Check flags are masked correctly */ - err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC); - patch_instruction(iptr, instr); - check(instr_is_branch_to_addr(iptr, addr)); - check(ppc_inst_equal(instr, ppc_inst(0x43FF0000))); -} - -static void __init test_translate_branch(void) -{ - unsigned long addr; - void *p, *q; - struct ppc_inst instr; - void *buf; - - buf = vmalloc(PAGE_ALIGN(0x2000000 + 1)); - check(buf); - if (!buf) - return; - - /* Simple case, branch to self moved a little */ - p = buf; - addr = (unsigned long)p; - patch_branch(p, addr, 0); - check(instr_is_branch_to_addr(p, addr)); - q = p + 4; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(q, addr)); - - /* Maximum negative case, move b . to addr + 32 MB */ - p = buf; - addr = (unsigned long)p; - patch_branch(p, addr, 0); - q = buf + 0x2000000; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x4a000000))); - - /* Maximum positive case, move x to x - 32 MB + 4 */ - p = buf + 0x2000000; - addr = (unsigned long)p; - patch_branch(p, addr, 0); - q = buf + 4; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x49fffffc))); - - /* Jump to x + 16 MB moved to x + 20 MB */ - p = buf; - addr = 0x1000000 + (unsigned long)buf; - patch_branch(p, addr, BRANCH_SET_LINK); - q = buf + 0x1400000; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - /* Jump to x + 16 MB moved to x - 16 MB + 4 */ - p = buf + 0x1000000; - addr = 0x2000000 + (unsigned long)buf; - patch_branch(p, addr, 0); - q = buf + 4; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - - /* Conditional branch tests */ - - /* Simple case, branch to self moved a little */ - p = buf; - addr = (unsigned long)p; - create_cond_branch(&instr, p, addr, 0); - patch_instruction(p, instr); - check(instr_is_branch_to_addr(p, addr)); - q = buf + 4; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(q, addr)); - - /* Maximum negative case, move b . to addr + 32 KB */ - p = buf; - addr = (unsigned long)p; - create_cond_branch(&instr, p, addr, 0xFFFFFFFC); - patch_instruction(p, instr); - q = buf + 0x8000; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff8000))); - - /* Maximum positive case, move x to x - 32 KB + 4 */ - p = buf + 0x8000; - addr = (unsigned long)p; - create_cond_branch(&instr, p, addr, 0xFFFFFFFC); - patch_instruction(p, instr); - q = buf + 4; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff7ffc))); - - /* Jump to x + 12 KB moved to x + 20 KB */ - p = buf; - addr = 0x3000 + (unsigned long)buf; - create_cond_branch(&instr, p, addr, BRANCH_SET_LINK); - patch_instruction(p, instr); - q = buf + 0x5000; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - /* Jump to x + 8 KB moved to x - 8 KB + 4 */ - p = buf + 0x2000; - addr = 0x4000 + (unsigned long)buf; - create_cond_branch(&instr, p, addr, 0); - patch_instruction(p, instr); - q = buf + 4; - translate_branch(&instr, q, p); - patch_instruction(q, instr); - check(instr_is_branch_to_addr(p, addr)); - check(instr_is_branch_to_addr(q, addr)); - - /* Free the buffer we were using */ - vfree(buf); -} - -#ifdef CONFIG_PPC64 -static void __init test_prefixed_patching(void) -{ - extern unsigned int code_patching_test1[]; - extern unsigned int code_patching_test1_expected[]; - extern unsigned int end_code_patching_test1[]; - - __patch_instruction(code_patching_test1, - ppc_inst_prefix(OP_PREFIX << 26, 0x00000000), - code_patching_test1); - - check(!memcmp(code_patching_test1, - code_patching_test1_expected, - sizeof(unsigned int) * - (end_code_patching_test1 - code_patching_test1))); -} -#else -static inline void test_prefixed_patching(void) {} -#endif - -static int __init test_code_patching(void) -{ - printk(KERN_DEBUG "Running code patching self-tests ...\n"); - - test_branch_iform(); - test_branch_bform(); - test_create_function_call(); - test_translate_branch(); - test_prefixed_patching(); - - return 0; -} -late_initcall(test_code_patching); - -#endif /* CONFIG_CODE_PATCHING_SELFTEST */ diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index c3e06922468b..343a78826035 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -47,7 +47,7 @@ static u32 *calc_addr(struct fixup_entry *fcur, long offset) static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_end) { int err; - struct ppc_inst instr; + ppc_inst_t instr; instr = ppc_inst_read(src); @@ -580,7 +580,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_ printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i); } -static void patch_btb_flush_section(long *curr) +static void __init patch_btb_flush_section(long *curr) { unsigned int *start, *end; @@ -592,7 +592,7 @@ static void patch_btb_flush_section(long *curr) } } -void do_btb_flush_fixups(void) +void __init do_btb_flush_fixups(void) { long *start, *end; @@ -621,10 +621,10 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) } } -static void do_final_fixups(void) +static void __init do_final_fixups(void) { #if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE) - struct ppc_inst inst; + ppc_inst_t inst; u32 *src, *dest, *end; if (PHYSICAL_START == 0) @@ -715,12 +715,12 @@ late_initcall(check_features); /* This must be after the text it fixes up, vmlinux.lds.S enforces that atm */ static struct fixup_entry fixup; -static long calc_offset(struct fixup_entry *entry, unsigned int *p) +static long __init calc_offset(struct fixup_entry *entry, unsigned int *p) { return (unsigned long)p - (unsigned long)entry; } -static void test_basic_patching(void) +static void __init test_basic_patching(void) { extern unsigned int ftr_fixup_test1[]; extern unsigned int end_ftr_fixup_test1[]; @@ -751,7 +751,7 @@ static void test_basic_patching(void) check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0); } -static void test_alternative_patching(void) +static void __init test_alternative_patching(void) { extern unsigned int ftr_fixup_test2[]; extern unsigned int end_ftr_fixup_test2[]; @@ -784,7 +784,7 @@ static void test_alternative_patching(void) check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0); } -static void test_alternative_case_too_big(void) +static void __init test_alternative_case_too_big(void) { extern unsigned int ftr_fixup_test3[]; extern unsigned int end_ftr_fixup_test3[]; @@ -810,7 +810,7 @@ static void test_alternative_case_too_big(void) check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0); } -static void test_alternative_case_too_small(void) +static void __init test_alternative_case_too_small(void) { extern unsigned int ftr_fixup_test4[]; extern unsigned int end_ftr_fixup_test4[]; @@ -856,7 +856,7 @@ static void test_alternative_case_with_branch(void) check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0); } -static void test_alternative_case_with_external_branch(void) +static void __init test_alternative_case_with_external_branch(void) { extern unsigned int ftr_fixup_test6[]; extern unsigned int end_ftr_fixup_test6[]; @@ -866,7 +866,7 @@ static void test_alternative_case_with_external_branch(void) check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0); } -static void test_alternative_case_with_branch_to_end(void) +static void __init test_alternative_case_with_branch_to_end(void) { extern unsigned int ftr_fixup_test7[]; extern unsigned int end_ftr_fixup_test7[]; @@ -876,7 +876,7 @@ static void test_alternative_case_with_branch_to_end(void) check(memcmp(ftr_fixup_test7, ftr_fixup_test7_expected, size) == 0); } -static void test_cpu_macros(void) +static void __init test_cpu_macros(void) { extern u8 ftr_fixup_test_FTR_macros[]; extern u8 ftr_fixup_test_FTR_macros_expected[]; @@ -888,7 +888,7 @@ static void test_cpu_macros(void) ftr_fixup_test_FTR_macros_expected, size) == 0); } -static void test_fw_macros(void) +static void __init test_fw_macros(void) { #ifdef CONFIG_PPC64 extern u8 ftr_fixup_test_FW_FTR_macros[]; @@ -902,7 +902,7 @@ static void test_fw_macros(void) #endif } -static void test_lwsync_macros(void) +static void __init test_lwsync_macros(void) { extern u8 lwsync_fixup_test[]; extern u8 end_lwsync_fixup_test[]; diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 86f49e3e7cf5..a94b0cd0bdc5 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1354,7 +1354,7 @@ static nokprobe_inline int trap_compare(long v1, long v2) * otherwise. */ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, - struct ppc_inst instr) + ppc_inst_t instr) { #ifdef CONFIG_PPC64 unsigned int suffixopcode, prefixtype, prefix_r; @@ -3578,7 +3578,7 @@ NOKPROBE_SYMBOL(emulate_loadstore); * or -1 if the instruction is one that should not be stepped, * such as an rfid, or a mtmsrd that would clear MSR_RI. */ -int emulate_step(struct pt_regs *regs, struct ppc_inst instr) +int emulate_step(struct pt_regs *regs, ppc_inst_t instr) { struct instruction_op op; int r, err, type; diff --git a/arch/powerpc/lib/test-code-patching.c b/arch/powerpc/lib/test-code-patching.c new file mode 100644 index 000000000000..c44823292f73 --- /dev/null +++ b/arch/powerpc/lib/test-code-patching.c @@ -0,0 +1,362 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2008 Michael Ellerman, IBM Corporation. + */ + +#include <linux/vmalloc.h> +#include <linux/init.h> + +#include <asm/code-patching.h> + +static int __init instr_is_branch_to_addr(const u32 *instr, unsigned long addr) +{ + if (instr_is_branch_iform(ppc_inst_read(instr)) || + instr_is_branch_bform(ppc_inst_read(instr))) + return branch_target(instr) == addr; + + return 0; +} + +static void __init test_trampoline(void) +{ + asm ("nop;nop;\n"); +} + +#define check(x) do { \ + if (!(x)) \ + pr_err("code-patching: test failed at line %d\n", __LINE__); \ +} while (0) + +static void __init test_branch_iform(void) +{ + int err; + ppc_inst_t instr; + u32 tmp[2]; + u32 *iptr = tmp; + unsigned long addr = (unsigned long)tmp; + + /* The simplest case, branch to self, no flags */ + check(instr_is_branch_iform(ppc_inst(0x48000000))); + /* All bits of target set, and flags */ + check(instr_is_branch_iform(ppc_inst(0x4bffffff))); + /* High bit of opcode set, which is wrong */ + check(!instr_is_branch_iform(ppc_inst(0xcbffffff))); + /* Middle bits of opcode set, which is wrong */ + check(!instr_is_branch_iform(ppc_inst(0x7bffffff))); + + /* Simplest case, branch to self with link */ + check(instr_is_branch_iform(ppc_inst(0x48000001))); + /* All bits of targets set */ + check(instr_is_branch_iform(ppc_inst(0x4bfffffd))); + /* Some bits of targets set */ + check(instr_is_branch_iform(ppc_inst(0x4bff00fd))); + /* Must be a valid branch to start with */ + check(!instr_is_branch_iform(ppc_inst(0x7bfffffd))); + + /* Absolute branch to 0x100 */ + ppc_inst_write(iptr, ppc_inst(0x48000103)); + check(instr_is_branch_to_addr(iptr, 0x100)); + /* Absolute branch to 0x420fc */ + ppc_inst_write(iptr, ppc_inst(0x480420ff)); + check(instr_is_branch_to_addr(iptr, 0x420fc)); + /* Maximum positive relative branch, + 20MB - 4B */ + ppc_inst_write(iptr, ppc_inst(0x49fffffc)); + check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC)); + /* Smallest negative relative branch, - 4B */ + ppc_inst_write(iptr, ppc_inst(0x4bfffffc)); + check(instr_is_branch_to_addr(iptr, addr - 4)); + /* Largest negative relative branch, - 32 MB */ + ppc_inst_write(iptr, ppc_inst(0x4a000000)); + check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); + + /* Branch to self, with link */ + err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + + /* Branch to self - 0x100, with link */ + err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x100)); + + /* Branch to self + 0x100, no link */ + err = create_branch(&instr, iptr, addr + 0x100, 0); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr + 0x100)); + + /* Maximum relative negative offset, - 32 MB */ + err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x2000000)); + + /* Out of range relative negative offset, - 32 MB + 4*/ + err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK); + check(err); + + /* Out of range relative positive offset, + 32 MB */ + err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK); + check(err); + + /* Unaligned target */ + err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK); + check(err); + + /* Check flags are masked correctly */ + err = create_branch(&instr, iptr, addr, 0xFFFFFFFC); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + check(ppc_inst_equal(instr, ppc_inst(0x48000000))); +} + +static void __init test_create_function_call(void) +{ + u32 *iptr; + unsigned long dest; + ppc_inst_t instr; + + /* Check we can create a function call */ + iptr = (u32 *)ppc_function_entry(test_trampoline); + dest = ppc_function_entry(test_create_function_call); + create_branch(&instr, iptr, dest, BRANCH_SET_LINK); + patch_instruction(iptr, instr); + check(instr_is_branch_to_addr(iptr, dest)); +} + +static void __init test_branch_bform(void) +{ + int err; + unsigned long addr; + ppc_inst_t instr; + u32 tmp[2]; + u32 *iptr = tmp; + unsigned int flags; + + addr = (unsigned long)iptr; + + /* The simplest case, branch to self, no flags */ + check(instr_is_branch_bform(ppc_inst(0x40000000))); + /* All bits of target set, and flags */ + check(instr_is_branch_bform(ppc_inst(0x43ffffff))); + /* High bit of opcode set, which is wrong */ + check(!instr_is_branch_bform(ppc_inst(0xc3ffffff))); + /* Middle bits of opcode set, which is wrong */ + check(!instr_is_branch_bform(ppc_inst(0x7bffffff))); + + /* Absolute conditional branch to 0x100 */ + ppc_inst_write(iptr, ppc_inst(0x43ff0103)); + check(instr_is_branch_to_addr(iptr, 0x100)); + /* Absolute conditional branch to 0x20fc */ + ppc_inst_write(iptr, ppc_inst(0x43ff20ff)); + check(instr_is_branch_to_addr(iptr, 0x20fc)); + /* Maximum positive relative conditional branch, + 32 KB - 4B */ + ppc_inst_write(iptr, ppc_inst(0x43ff7ffc)); + check(instr_is_branch_to_addr(iptr, addr + 0x7FFC)); + /* Smallest negative relative conditional branch, - 4B */ + ppc_inst_write(iptr, ppc_inst(0x43fffffc)); + check(instr_is_branch_to_addr(iptr, addr - 4)); + /* Largest negative relative conditional branch, - 32 KB */ + ppc_inst_write(iptr, ppc_inst(0x43ff8000)); + check(instr_is_branch_to_addr(iptr, addr - 0x8000)); + + /* All condition code bits set & link */ + flags = 0x3ff000 | BRANCH_SET_LINK; + + /* Branch to self */ + err = create_cond_branch(&instr, iptr, addr, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + + /* Branch to self - 0x100 */ + err = create_cond_branch(&instr, iptr, addr - 0x100, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x100)); + + /* Branch to self + 0x100 */ + err = create_cond_branch(&instr, iptr, addr + 0x100, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr + 0x100)); + + /* Maximum relative negative offset, - 32 KB */ + err = create_cond_branch(&instr, iptr, addr - 0x8000, flags); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr - 0x8000)); + + /* Out of range relative negative offset, - 32 KB + 4*/ + err = create_cond_branch(&instr, iptr, addr - 0x8004, flags); + check(err); + + /* Out of range relative positive offset, + 32 KB */ + err = create_cond_branch(&instr, iptr, addr + 0x8000, flags); + check(err); + + /* Unaligned target */ + err = create_cond_branch(&instr, iptr, addr + 3, flags); + check(err); + + /* Check flags are masked correctly */ + err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC); + ppc_inst_write(iptr, instr); + check(instr_is_branch_to_addr(iptr, addr)); + check(ppc_inst_equal(instr, ppc_inst(0x43FF0000))); +} + +static void __init test_translate_branch(void) +{ + unsigned long addr; + void *p, *q; + ppc_inst_t instr; + void *buf; + + buf = vmalloc(PAGE_ALIGN(0x2000000 + 1)); + check(buf); + if (!buf) + return; + + /* Simple case, branch to self moved a little */ + p = buf; + addr = (unsigned long)p; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + check(instr_is_branch_to_addr(p, addr)); + q = p + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(q, addr)); + + /* Maximum negative case, move b . to addr + 32 MB */ + p = buf; + addr = (unsigned long)p; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 0x2000000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x4a000000))); + + /* Maximum positive case, move x to x - 32 MB + 4 */ + p = buf + 0x2000000; + addr = (unsigned long)p; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x49fffffc))); + + /* Jump to x + 16 MB moved to x + 20 MB */ + p = buf; + addr = 0x1000000 + (unsigned long)buf; + create_branch(&instr, p, addr, BRANCH_SET_LINK); + ppc_inst_write(p, instr); + q = buf + 0x1400000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + /* Jump to x + 16 MB moved to x - 16 MB + 4 */ + p = buf + 0x1000000; + addr = 0x2000000 + (unsigned long)buf; + create_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + + /* Conditional branch tests */ + + /* Simple case, branch to self moved a little */ + p = buf; + addr = (unsigned long)p; + create_cond_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + check(instr_is_branch_to_addr(p, addr)); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(q, addr)); + + /* Maximum negative case, move b . to addr + 32 KB */ + p = buf; + addr = (unsigned long)p; + create_cond_branch(&instr, p, addr, 0xFFFFFFFC); + ppc_inst_write(p, instr); + q = buf + 0x8000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff8000))); + + /* Maximum positive case, move x to x - 32 KB + 4 */ + p = buf + 0x8000; + addr = (unsigned long)p; + create_cond_branch(&instr, p, addr, 0xFFFFFFFC); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff7ffc))); + + /* Jump to x + 12 KB moved to x + 20 KB */ + p = buf; + addr = 0x3000 + (unsigned long)buf; + create_cond_branch(&instr, p, addr, BRANCH_SET_LINK); + ppc_inst_write(p, instr); + q = buf + 0x5000; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + /* Jump to x + 8 KB moved to x - 8 KB + 4 */ + p = buf + 0x2000; + addr = 0x4000 + (unsigned long)buf; + create_cond_branch(&instr, p, addr, 0); + ppc_inst_write(p, instr); + q = buf + 4; + translate_branch(&instr, q, p); + ppc_inst_write(q, instr); + check(instr_is_branch_to_addr(p, addr)); + check(instr_is_branch_to_addr(q, addr)); + + /* Free the buffer we were using */ + vfree(buf); +} + +static void __init test_prefixed_patching(void) +{ + u32 *iptr = (u32 *)ppc_function_entry(test_trampoline); + u32 expected[2] = {OP_PREFIX << 26, 0}; + ppc_inst_t inst = ppc_inst_prefix(OP_PREFIX << 26, 0); + + if (!IS_ENABLED(CONFIG_PPC64)) + return; + + patch_instruction(iptr, inst); + + check(!memcmp(iptr, expected, sizeof(expected))); +} + +static int __init test_code_patching(void) +{ + pr_info("Running code patching self-tests ...\n"); + + test_branch_iform(); + test_branch_bform(); + test_create_function_call(); + test_translate_branch(); + test_prefixed_patching(); + + return 0; +} +late_initcall(test_code_patching); diff --git a/arch/powerpc/lib/test_code-patching.S b/arch/powerpc/lib/test_code-patching.S deleted file mode 100644 index a9be6107844e..000000000000 --- a/arch/powerpc/lib/test_code-patching.S +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2020 IBM Corporation - */ -#include <asm/ppc-opcode.h> - - .text - -#define globl(x) \ - .globl x; \ -x: - -globl(code_patching_test1) - nop - nop -globl(end_code_patching_test1) - -globl(code_patching_test1_expected) - .long OP_PREFIX << 26 - .long 0x0000000 diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c index 8b4f6b3e96c4..4f141daafcff 100644 --- a/arch/powerpc/lib/test_emulate_step.c +++ b/arch/powerpc/lib/test_emulate_step.c @@ -792,7 +792,7 @@ static void __init test_lxvpx_stxvpx(void) #ifdef CONFIG_VSX static void __init test_plxvp_pstxvp(void) { - struct ppc_inst instr; + ppc_inst_t instr; struct pt_regs regs; union { vector128 a; @@ -906,7 +906,7 @@ struct compute_test { struct { char *descr; unsigned long flags; - struct ppc_inst instr; + ppc_inst_t instr; struct pt_regs regs; } subtests[MAX_SUBTESTS + 1]; }; @@ -1600,7 +1600,7 @@ static struct compute_test compute_tests[] = { }; static int __init emulate_compute_instr(struct pt_regs *regs, - struct ppc_inst instr, + ppc_inst_t instr, bool negative) { int analysed; @@ -1627,7 +1627,7 @@ static int __init emulate_compute_instr(struct pt_regs *regs, } static int __init execute_compute_instr(struct pt_regs *regs, - struct ppc_inst instr) + ppc_inst_t instr) { extern int exec_instr(struct pt_regs *regs); @@ -1658,7 +1658,7 @@ static void __init run_tests_compute(void) struct compute_test *test; struct pt_regs *regs, exp, got; unsigned int i, j, k; - struct ppc_inst instr; + ppc_inst_t instr; bool ignore_gpr, ignore_xer, ignore_ccr, passed, rc, negative; for (i = 0; i < ARRAY_SIZE(compute_tests); i++) { diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S index 9ef941d958d8..5473f9d03df3 100644 --- a/arch/powerpc/lib/test_emulate_step_exec_instr.S +++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S @@ -37,7 +37,7 @@ _GLOBAL(exec_instr) * The stack pointer (GPR1) and the thread pointer (GPR13) are not * saved as these should not be modified anyway. */ - SAVE_2GPRS(2, r1) + SAVE_GPRS(2, 3, r1) SAVE_NVGPRS(r1) /* @@ -75,8 +75,7 @@ _GLOBAL(exec_instr) /* Load GPRs from pt_regs */ REST_GPR(0, r31) - REST_10GPRS(2, r31) - REST_GPR(12, r31) + REST_GPRS(2, 12, r31) REST_NVGPRS(r31) /* Placeholder for the test instruction */ @@ -99,8 +98,7 @@ _GLOBAL(exec_instr) subi r3, r3, GPR0 SAVE_GPR(0, r3) SAVE_GPR(2, r3) - SAVE_8GPRS(4, r3) - SAVE_GPR(12, r3) + SAVE_GPRS(4, 12, r3) SAVE_NVGPRS(r3) /* Save resulting LR to pt_regs */ diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile index 15f4773643d2..50dd8f6bdf46 100644 --- a/arch/powerpc/mm/book3s32/Makefile +++ b/arch/powerpc/mm/book3s32/Makefile @@ -9,5 +9,4 @@ endif obj-y += mmu.o mmu_context.o obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o -obj-$(CONFIG_PPC_KUEP) += kuep.o obj-$(CONFIG_PPC_KUAP) += kuap.o diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c index 0f920f09af57..28676cabb005 100644 --- a/arch/powerpc/mm/book3s32/kuap.c +++ b/arch/powerpc/mm/book3s32/kuap.c @@ -20,8 +20,11 @@ EXPORT_SYMBOL(kuap_unlock_all_ool); void setup_kuap(bool disabled) { - if (!disabled) + if (!disabled) { kuap_lock_all_ool(); + init_mm.context.sr0 |= SR_KS; + current->thread.sr0 |= SR_KS; + } if (smp_processor_id() != boot_cpuid) return; diff --git a/arch/powerpc/mm/book3s32/kuep.c b/arch/powerpc/mm/book3s32/kuep.c deleted file mode 100644 index c20733d6e02c..000000000000 --- a/arch/powerpc/mm/book3s32/kuep.c +++ /dev/null @@ -1,20 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later - -#include <asm/kup.h> -#include <asm/smp.h> - -struct static_key_false disable_kuep_key; - -void setup_kuep(bool disabled) -{ - if (!disabled) - kuep_lock(); - - if (smp_processor_id() != boot_cpuid) - return; - - if (disabled) - static_branch_enable(&disable_kuep_key); - else - pr_info("Activating Kernel Userspace Execution Prevention\n"); -} diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 27061583a010..94045b265b6b 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -76,7 +76,7 @@ unsigned long p_block_mapped(phys_addr_t pa) return 0; } -static int find_free_bat(void) +static int __init find_free_bat(void) { int b; int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; @@ -196,18 +196,17 @@ void mmu_mark_initmem_nx(void) int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; int i; unsigned long base = (unsigned long)_stext - PAGE_OFFSET; - unsigned long top = (unsigned long)_etext - PAGE_OFFSET; + unsigned long top = ALIGN((unsigned long)_etext - PAGE_OFFSET, SZ_128K); unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; unsigned long size; - for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) { + for (i = 0; i < nb - 1 && base < top;) { size = block_size(base, top); setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); base += size; } if (base < top) { size = block_size(base, top); - size = max(size, 128UL << 10); if ((top - base) > size) { size <<= 1; if (strict_kernel_rwx_enabled() && base + size > border) diff --git a/arch/powerpc/mm/book3s32/mmu_context.c b/arch/powerpc/mm/book3s32/mmu_context.c index e2708e387dc3..269a3eb25a73 100644 --- a/arch/powerpc/mm/book3s32/mmu_context.c +++ b/arch/powerpc/mm/book3s32/mmu_context.c @@ -69,6 +69,12 @@ EXPORT_SYMBOL_GPL(__init_new_context); int init_new_context(struct task_struct *t, struct mm_struct *mm) { mm->context.id = __init_new_context(); + mm->context.sr0 = CTX_TO_VSID(mm->context.id, 0); + + if (!kuep_is_disabled()) + mm->context.sr0 |= SR_NX; + if (!kuap_is_disabled()) + mm->context.sr0 |= SR_KS; return 0; } @@ -108,20 +114,13 @@ void __init mmu_context_init(void) void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { long id = next->context.id; - unsigned long val; if (id < 0) panic("mm_struct %p has no context ID", next); isync(); - val = CTX_TO_VSID(id, 0); - if (!kuep_is_disabled()) - val |= SR_NX; - if (!kuap_is_disabled()) - val |= SR_KS; - - update_user_segments(val); + update_user_segments(next->context.sr0); if (IS_ENABLED(CONFIG_BDI_SWITCH)) abatron_pteptrs[1] = next->pgd; diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile index 1b56d3af47d4..2d50cac499c5 100644 --- a/arch/powerpc/mm/book3s64/Makefile +++ b/arch/powerpc/mm/book3s64/Makefile @@ -2,20 +2,23 @@ ccflags-y := $(NO_MINIMAL_TOC) +obj-y += mmu_context.o pgtable.o trace.o +ifdef CONFIG_PPC_64S_HASH_MMU CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE) - -obj-y += hash_pgtable.o hash_utils.o slb.o \ - mmu_context.o pgtable.o hash_tlb.o -obj-$(CONFIG_PPC_NATIVE) += hash_native.o -obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o +obj-y += hash_pgtable.o hash_utils.o hash_tlb.o slb.o +obj-$(CONFIG_PPC_HASH_MMU_NATIVE) += hash_native.o obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o obj-$(CONFIG_PPC_64K_PAGES) += hash_64k.o -obj-$(CONFIG_HUGETLB_PAGE) += hash_hugetlbpage.o +obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o +obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o +endif + +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o + +obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o ifdef CONFIG_HUGETLB_PAGE obj-$(CONFIG_PPC_RADIX_MMU) += radix_hugetlbpage.o endif -obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o -obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o obj-$(CONFIG_SPAPR_TCE_IOMMU) += iommu_api.o obj-$(CONFIG_PPC_PKEY) += pkeys.o diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c index d8279bfe68ea..623a7b7ab38b 100644 --- a/arch/powerpc/mm/book3s64/hash_native.c +++ b/arch/powerpc/mm/book3s64/hash_native.c @@ -43,110 +43,6 @@ static DEFINE_RAW_SPINLOCK(native_tlbie_lock); -static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is) -{ - unsigned long rb; - - rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); - - asm volatile("tlbiel %0" : : "r" (rb)); -} - -/* - * tlbiel instruction for hash, set invalidation - * i.e., r=1 and is=01 or is=10 or is=11 - */ -static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is, - unsigned int pid, - unsigned int ric, unsigned int prs) -{ - unsigned long rb; - unsigned long rs; - unsigned int r = 0; /* hash format */ - - rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); - rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); - - asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) - : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r) - : "memory"); -} - - -static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is) -{ - unsigned int set; - - asm volatile("ptesync": : :"memory"); - - for (set = 0; set < num_sets; set++) - tlbiel_hash_set_isa206(set, is); - - ppc_after_tlbiel_barrier(); -} - -static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) -{ - unsigned int set; - - asm volatile("ptesync": : :"memory"); - - /* - * Flush the partition table cache if this is HV mode. - */ - if (early_cpu_has_feature(CPU_FTR_HVMODE)) - tlbiel_hash_set_isa300(0, is, 0, 2, 0); - - /* - * Now invalidate the process table cache. UPRT=0 HPT modes (what - * current hardware implements) do not use the process table, but - * add the flushes anyway. - * - * From ISA v3.0B p. 1078: - * The following forms are invalid. - * * PRS=1, R=0, and RIC!=2 (The only process-scoped - * HPT caching is of the Process Table.) - */ - tlbiel_hash_set_isa300(0, is, 0, 2, 1); - - /* - * Then flush the sets of the TLB proper. Hash mode uses - * partition scoped TLB translations, which may be flushed - * in !HV mode. - */ - for (set = 0; set < num_sets; set++) - tlbiel_hash_set_isa300(set, is, 0, 0, 0); - - ppc_after_tlbiel_barrier(); - - asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); -} - -void hash__tlbiel_all(unsigned int action) -{ - unsigned int is; - - switch (action) { - case TLB_INVAL_SCOPE_GLOBAL: - is = 3; - break; - case TLB_INVAL_SCOPE_LPID: - is = 2; - break; - default: - BUG(); - } - - if (early_cpu_has_feature(CPU_FTR_ARCH_300)) - tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is); - else if (early_cpu_has_feature(CPU_FTR_ARCH_207S)) - tlbiel_all_isa206(POWER8_TLB_SETS, is); - else if (early_cpu_has_feature(CPU_FTR_ARCH_206)) - tlbiel_all_isa206(POWER7_TLB_SETS, is); - else - WARN(1, "%s called on pre-POWER7 CPU\n", __func__); -} - static inline unsigned long ___tlbie(unsigned long vpn, int psize, int apsize, int ssize) { @@ -267,7 +163,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) va |= ssize << 8; sllp = get_sllp_encoding(apsize); va |= sllp << 5; - asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,0", %1) + asm volatile(ASM_FTR_IFSET("tlbiel %0", PPC_TLBIEL_v205(%0, 0), %1) : : "r" (va), "i" (CPU_FTR_ARCH_206) : "memory"); break; @@ -286,7 +182,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) */ va |= (vpn & 0xfe); va |= 1; /* L */ - asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,1", %1) + asm volatile(ASM_FTR_IFSET("tlbiel %0", PPC_TLBIEL_v205(%0, 1), %1) : : "r" (va), "i" (CPU_FTR_ARCH_206) : "memory"); break; diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index ad5eff097d31..7ce8914992e3 100644 --- a/arch/powerpc/mm/book3s64/hash_pgtable.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -16,7 +16,6 @@ #include <mm/mmu_decl.h> -#define CREATE_TRACE_POINTS #include <trace/events/thp.h> #if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE)) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index cfd45245d009..7abf82a698d3 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -99,8 +99,6 @@ */ static unsigned long _SDR1; -struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; -EXPORT_SYMBOL_GPL(mmu_psize_defs); u8 hpte_page_sizes[1 << LP_BITS]; EXPORT_SYMBOL_GPL(hpte_page_sizes); @@ -114,9 +112,6 @@ EXPORT_SYMBOL_GPL(mmu_linear_psize); int mmu_virtual_psize = MMU_PAGE_4K; int mmu_vmalloc_psize = MMU_PAGE_4K; EXPORT_SYMBOL_GPL(mmu_vmalloc_psize); -#ifdef CONFIG_SPARSEMEM_VMEMMAP -int mmu_vmemmap_psize = MMU_PAGE_4K; -#endif int mmu_io_psize = MMU_PAGE_4K; int mmu_kernel_ssize = MMU_SEGSIZE_256M; EXPORT_SYMBOL_GPL(mmu_kernel_ssize); @@ -175,6 +170,110 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = { }, }; +static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is) +{ + unsigned long rb; + + rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); + + asm volatile("tlbiel %0" : : "r" (rb)); +} + +/* + * tlbiel instruction for hash, set invalidation + * i.e., r=1 and is=01 or is=10 or is=11 + */ +static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is, + unsigned int pid, + unsigned int ric, unsigned int prs) +{ + unsigned long rb; + unsigned long rs; + unsigned int r = 0; /* hash format */ + + rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); + rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); + + asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) + : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r) + : "memory"); +} + + +static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is) +{ + unsigned int set; + + asm volatile("ptesync": : :"memory"); + + for (set = 0; set < num_sets; set++) + tlbiel_hash_set_isa206(set, is); + + ppc_after_tlbiel_barrier(); +} + +static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) +{ + unsigned int set; + + asm volatile("ptesync": : :"memory"); + + /* + * Flush the partition table cache if this is HV mode. + */ + if (early_cpu_has_feature(CPU_FTR_HVMODE)) + tlbiel_hash_set_isa300(0, is, 0, 2, 0); + + /* + * Now invalidate the process table cache. UPRT=0 HPT modes (what + * current hardware implements) do not use the process table, but + * add the flushes anyway. + * + * From ISA v3.0B p. 1078: + * The following forms are invalid. + * * PRS=1, R=0, and RIC!=2 (The only process-scoped + * HPT caching is of the Process Table.) + */ + tlbiel_hash_set_isa300(0, is, 0, 2, 1); + + /* + * Then flush the sets of the TLB proper. Hash mode uses + * partition scoped TLB translations, which may be flushed + * in !HV mode. + */ + for (set = 0; set < num_sets; set++) + tlbiel_hash_set_isa300(set, is, 0, 0, 0); + + ppc_after_tlbiel_barrier(); + + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); +} + +void hash__tlbiel_all(unsigned int action) +{ + unsigned int is; + + switch (action) { + case TLB_INVAL_SCOPE_GLOBAL: + is = 3; + break; + case TLB_INVAL_SCOPE_LPID: + is = 2; + break; + default: + BUG(); + } + + if (early_cpu_has_feature(CPU_FTR_ARCH_300)) + tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is); + else if (early_cpu_has_feature(CPU_FTR_ARCH_207S)) + tlbiel_all_isa206(POWER8_TLB_SETS, is); + else if (early_cpu_has_feature(CPU_FTR_ARCH_206)) + tlbiel_all_isa206(POWER7_TLB_SETS, is); + else + WARN(1, "%s called on pre-POWER7 CPU\n", __func__); +} + /* * 'R' and 'C' update notes: * - Under pHyp or KVM, the updatepp path will not set C, thus it *will* @@ -563,7 +662,7 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node, } #endif /* CONFIG_HUGETLB_PAGE */ -static void mmu_psize_set_default_penc(void) +static void __init mmu_psize_set_default_penc(void) { int bpsize, apsize; for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) @@ -573,7 +672,7 @@ static void mmu_psize_set_default_penc(void) #ifdef CONFIG_PPC_64K_PAGES -static bool might_have_hea(void) +static bool __init might_have_hea(void) { /* * The HEA ethernet adapter requires awareness of the @@ -644,7 +743,7 @@ static void __init htab_scan_page_sizes(void) * low-order N bits as the encoding for the 2^(12+N) byte page size * (if it exists). */ -static void init_hpte_page_sizes(void) +static void __init init_hpte_page_sizes(void) { long int ap, bp; long int shift, penc; @@ -1091,7 +1190,7 @@ void __init hash__early_init_mmu(void) ps3_early_mm_init(); else if (firmware_has_feature(FW_FEATURE_LPAR)) hpte_init_pseries(); - else if (IS_ENABLED(CONFIG_PPC_NATIVE)) + else if (IS_ENABLED(CONFIG_PPC_HASH_MMU_NATIVE)) hpte_init_native(); if (!mmu_hash_ops.hpte_insert) diff --git a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c b/arch/powerpc/mm/book3s64/hugetlbpage.c index a688e1324ae5..ea8f83afb0ae 100644 --- a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/hugetlbpage.c @@ -16,6 +16,7 @@ unsigned int hpage_shift; EXPORT_SYMBOL(hpage_shift); +#ifdef CONFIG_PPC_64S_HASH_MMU int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, unsigned int shift, unsigned int mmu_psize) @@ -122,6 +123,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } +#endif pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) @@ -148,7 +150,7 @@ void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } -void hugetlbpage_init_default(void) +void __init hugetlbpage_init_default(void) { /* Set default large page size. Currently, we pick 16M or 1M * depending on what is available diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c index c10fc8a72fb3..c766e4c26e42 100644 --- a/arch/powerpc/mm/book3s64/mmu_context.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -31,7 +31,8 @@ static int alloc_context_id(int min_id, int max_id) return ida_alloc_range(&mmu_context_ida, min_id, max_id, GFP_KERNEL); } -void hash__reserve_context_id(int id) +#ifdef CONFIG_PPC_64S_HASH_MMU +void __init hash__reserve_context_id(int id) { int result = ida_alloc_range(&mmu_context_ida, id, id, GFP_KERNEL); @@ -50,7 +51,9 @@ int hash__alloc_context_id(void) return alloc_context_id(MIN_USER_CONTEXT, max); } EXPORT_SYMBOL_GPL(hash__alloc_context_id); +#endif +#ifdef CONFIG_PPC_64S_HASH_MMU static int realloc_context_ids(mm_context_t *ctx) { int i, id; @@ -150,6 +153,13 @@ void hash__setup_new_exec(void) slb_setup_new_exec(); } +#else +static inline int hash__init_new_context(struct mm_struct *mm) +{ + BUILD_BUG(); + return 0; +} +#endif static int radix__init_new_context(struct mm_struct *mm) { @@ -175,7 +185,9 @@ static int radix__init_new_context(struct mm_struct *mm) */ asm volatile("ptesync;isync" : : : "memory"); +#ifdef CONFIG_PPC_64S_HASH_MMU mm->context.hash_context = NULL; +#endif return index; } @@ -213,14 +225,22 @@ EXPORT_SYMBOL_GPL(__destroy_context); static void destroy_contexts(mm_context_t *ctx) { - int index, context_id; + if (radix_enabled()) { + ida_free(&mmu_context_ida, ctx->id); + } else { +#ifdef CONFIG_PPC_64S_HASH_MMU + int index, context_id; - for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) { - context_id = ctx->extended_id[index]; - if (context_id) - ida_free(&mmu_context_ida, context_id); + for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) { + context_id = ctx->extended_id[index]; + if (context_id) + ida_free(&mmu_context_ida, context_id); + } + kfree(ctx->hash_context); +#else + BUILD_BUG(); // radix_enabled() should be constant true +#endif } - kfree(ctx->hash_context); } static void pmd_frag_destroy(void *pmd_frag) diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 9e16c7b1a6c5..79ce3c22a29d 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -22,6 +22,13 @@ #include "internal.h" +struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; +EXPORT_SYMBOL_GPL(mmu_psize_defs); + +#ifdef CONFIG_SPARSEMEM_VMEMMAP +int mmu_vmemmap_psize = MMU_PAGE_4K; +#endif + unsigned long __pmd_frag_nr; EXPORT_SYMBOL(__pmd_frag_nr); unsigned long __pmd_frag_size_shift; @@ -207,17 +214,12 @@ void __init mmu_partition_table_init(void) unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; unsigned long ptcr; - BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large."); /* Initialize the Partition Table with no entries */ partition_tb = memblock_alloc(patb_size, patb_size); if (!partition_tb) panic("%s: Failed to allocate %lu bytes align=0x%lx\n", __func__, patb_size, patb_size); - /* - * update partition table control register, - * 64 K size. - */ ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12); set_ptcr_when_no_uv(ptcr); powernv_set_nmmu_ptcr(ptcr); @@ -526,3 +528,23 @@ static int __init pgtable_debugfs_setup(void) return 0; } arch_initcall(pgtable_debugfs_setup); + +#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_ARCH_HAS_MEMREMAP_COMPAT_ALIGN) +/* + * Override the generic version in mm/memremap.c. + * + * With hash translation, the direct-map range is mapped with just one + * page size selected by htab_init_page_sizes(). Consult + * mmu_psize_defs[] to determine the minimum page size alignment. +*/ +unsigned long memremap_compat_align(void) +{ + if (!radix_enabled()) { + unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift; + return max(SUBSECTION_SIZE, 1UL << shift); + } + + return SUBSECTION_SIZE; +} +EXPORT_SYMBOL_GPL(memremap_compat_align); +#endif diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index a2d9ad138709..753e62ba67af 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -66,7 +66,7 @@ static int __init dt_scan_storage_keys(unsigned long node, return 1; } -static int scan_pkey_feature(void) +static int __init scan_pkey_feature(void) { int ret; int pkeys_total = 0; diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 3a600bd7fbc6..def04631a74d 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -33,7 +33,6 @@ #include <trace/events/thp.h> -unsigned int mmu_pid_bits; unsigned int mmu_base_pid; unsigned long radix_mem_block_size __ro_after_init; @@ -335,7 +334,7 @@ static void __init radix_init_pgtable(void) u64 i; /* We don't support slb for radix */ - mmu_slb_size = 0; + slb_set_size(0); /* * Create the linear mapping @@ -357,18 +356,13 @@ static void __init radix_init_pgtable(void) -1, PAGE_KERNEL)); } - /* Find out how many PID bits are supported */ if (!cpu_has_feature(CPU_FTR_HVMODE) && cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) { /* * Older versions of KVM on these machines perfer if the * guest only uses the low 19 PID bits. */ - if (!mmu_pid_bits) - mmu_pid_bits = 19; - } else { - if (!mmu_pid_bits) - mmu_pid_bits = 20; + mmu_pid_bits = 19; } mmu_base_pid = 1; @@ -449,11 +443,6 @@ static int __init radix_dt_scan_page_sizes(unsigned long node, if (type == NULL || strcmp(type, "cpu") != 0) return 0; - /* Find MMU PID size */ - prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size); - if (prop && size == 4) - mmu_pid_bits = be32_to_cpup(prop); - /* Grab page size encodings */ prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size); if (!prop) @@ -510,7 +499,7 @@ static int __init probe_memory_block_size(unsigned long node, const char *uname, return 1; } -static unsigned long radix_memory_block_size(void) +static unsigned long __init radix_memory_block_size(void) { unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE; @@ -528,7 +517,7 @@ static unsigned long radix_memory_block_size(void) #else /* CONFIG_MEMORY_HOTPLUG */ -static unsigned long radix_memory_block_size(void) +static unsigned long __init radix_memory_block_size(void) { return 1UL * 1024 * 1024 * 1024; } @@ -572,22 +561,11 @@ void __init radix__early_init_devtree(void) return; } -static void radix_init_amor(void) -{ - /* - * In HV mode, we init AMOR (Authority Mask Override Register) so that - * the hypervisor and guest can setup IAMR (Instruction Authority Mask - * Register), enable key 0 and set it to 1. - * - * AMOR = 0b1100 .... 0000 (Mask for key 0 is 11) - */ - mtspr(SPRN_AMOR, (3ul << 62)); -} - void __init radix__early_init_mmu(void) { unsigned long lpcr; +#ifdef CONFIG_PPC_64S_HASH_MMU #ifdef CONFIG_PPC_64K_PAGES /* PAGE_SIZE mappings */ mmu_virtual_psize = MMU_PAGE_64K; @@ -605,6 +583,7 @@ void __init radix__early_init_mmu(void) } else mmu_vmemmap_psize = mmu_virtual_psize; #endif +#endif /* * initialize page table size */ @@ -644,7 +623,6 @@ void __init radix__early_init_mmu(void) lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); radix_init_partition_table(); - radix_init_amor(); } else { radix_init_pseries(); } @@ -668,8 +646,6 @@ void radix__early_init_mmu_secondary(void) set_ptcr_when_no_uv(__pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); - - radix_init_amor(); } radix__switch_mmu_context(NULL, &init_mm); @@ -1100,7 +1076,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) int pud_clear_huge(pud_t *pud) { - if (pud_huge(*pud)) { + if (pud_is_leaf(*pud)) { pud_clear(pud); return 1; } @@ -1147,7 +1123,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) int pmd_clear_huge(pmd_t *pmd) { - if (pmd_huge(*pmd)) { + if (pmd_is_leaf(*pmd)) { pmd_clear(pmd); return 1; } diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index f0037bcc47a0..31f4cef3adac 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -868,19 +868,3 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault) return err; } } - -DEFINE_INTERRUPT_HANDLER(do_bad_slb_fault) -{ - int err = regs->result; - - if (err == -EFAULT) { - if (user_mode(regs)) - _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar); - else - bad_page_fault(regs, SIGSEGV); - } else if (err == -EINVAL) { - unrecoverable_exception(regs); - } else { - BUG(); - } -} diff --git a/arch/powerpc/mm/book3s64/trace.c b/arch/powerpc/mm/book3s64/trace.c new file mode 100644 index 000000000000..b86e7b906257 --- /dev/null +++ b/arch/powerpc/mm/book3s64/trace.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * This file is for defining trace points and trace related helpers. + */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define CREATE_TRACE_POINTS +#include <trace/events/thp.h> +#endif diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index 8acd00178956..c1cb21a00884 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -82,6 +82,7 @@ out_unlock: } EXPORT_SYMBOL_GPL(copro_handle_mm_fault); +#ifdef CONFIG_PPC_64S_HASH_MMU int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) { u64 vsid, vsidkey; @@ -146,3 +147,4 @@ void copro_flush_all_slbs(struct mm_struct *mm) cxl_slbia(mm); } EXPORT_SYMBOL_GPL(copro_flush_all_slbs); +#endif diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index a8d0ce85d39a..2d4a411c7c85 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -35,6 +35,7 @@ #include <linux/kfence.h> #include <linux/pkeys.h> +#include <asm/asm-prototypes.h> #include <asm/firmware.h> #include <asm/interrupt.h> #include <asm/page.h> @@ -620,4 +621,27 @@ DEFINE_INTERRUPT_HANDLER(do_bad_page_fault_segv) { bad_page_fault(regs, SIGSEGV); } + +/* + * In radix, segment interrupts indicate the EA is not addressable by the + * page table geometry, so they are always sent here. + * + * In hash, this is called if do_slb_fault returns error. Typically it is + * because the EA was outside the region allowed by software. + */ +DEFINE_INTERRUPT_HANDLER(do_bad_segment_interrupt) +{ + int err = regs->result; + + if (err == -EFAULT) { + if (user_mode(regs)) + _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar); + else + bad_page_fault(regs, SIGSEGV); + } else if (err == -EINVAL) { + unrecoverable_exception(regs); + } else { + BUG(); + } +} #endif diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 82d8b368ca6d..ddead41e2194 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -542,20 +542,26 @@ retry: return page; } -#ifdef CONFIG_PPC_MM_SLICES +#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA +static inline int file_to_psize(struct file *file) +{ + struct hstate *hstate = hstate_file(file); + return shift_to_mmu_psize(huge_page_shift(hstate)); +} + unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { - struct hstate *hstate = hstate_file(file); - int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); - #ifdef CONFIG_PPC_RADIX_MMU if (radix_enabled()) return radix__hugetlb_get_unmapped_area(file, addr, len, pgoff, flags); #endif - return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1); +#ifdef CONFIG_PPC_MM_SLICES + return slice_get_unmapped_area(addr, len, flags, file_to_psize(file), 1); +#endif + BUG(); } #endif diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 3a82f89827a5..119ef491f797 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -20,6 +20,7 @@ #include <linux/pgtable.h> #include <asm/pgalloc.h> #include <asm/kup.h> +#include <asm/smp.h> phys_addr_t memstart_addr __ro_after_init = (phys_addr_t)~0ull; EXPORT_SYMBOL_GPL(memstart_addr); @@ -33,6 +34,9 @@ bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP); static int __init parse_nosmep(char *p) { + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + return 0; + disable_kuep = true; pr_warn("Disabling Kernel Userspace Execution Prevention\n"); return 0; @@ -47,6 +51,23 @@ static int __init parse_nosmap(char *p) } early_param("nosmap", parse_nosmap); +void __weak setup_kuep(bool disabled) +{ + if (!IS_ENABLED(CONFIG_PPC_KUEP) || disabled) + return; + + if (smp_processor_id() != boot_cpuid) + return; + + pr_info("Activating Kernel Userspace Execution Prevention\n"); +} + +void setup_kup(void) +{ + setup_kuap(disable_kuap); + setup_kuep(disable_kuep); +} + #define CTOR(shift) static void ctor_##shift(void *addr) \ { \ memset(addr, 0, sizeof(void *) << (shift)); \ diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 386be136026e..35f46bf54281 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -370,6 +370,9 @@ void register_page_bootmem_memmap(unsigned long section_nr, #endif /* CONFIG_SPARSEMEM_VMEMMAP */ #ifdef CONFIG_PPC_BOOK3S_64 +unsigned int mmu_lpid_bits; +unsigned int mmu_pid_bits; + static bool disable_radix = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT); static int __init parse_disable_radix(char *p) @@ -437,11 +440,56 @@ static void __init early_check_vec5(void) } } +static int __init dt_scan_mmu_pid_width(unsigned long node, + const char *uname, int depth, + void *data) +{ + int size = 0; + const __be32 *prop; + const char *type = of_get_flat_dt_prop(node, "device_type", NULL); + + /* We are scanning "cpu" nodes only */ + if (type == NULL || strcmp(type, "cpu") != 0) + return 0; + + /* Find MMU LPID, PID register size */ + prop = of_get_flat_dt_prop(node, "ibm,mmu-lpid-bits", &size); + if (prop && size == 4) + mmu_lpid_bits = be32_to_cpup(prop); + + prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size); + if (prop && size == 4) + mmu_pid_bits = be32_to_cpup(prop); + + if (!mmu_pid_bits && !mmu_lpid_bits) + return 0; + + return 1; +} + void __init mmu_early_init_devtree(void) { + bool hvmode = !!(mfmsr() & MSR_HV); + /* Disable radix mode based on kernel command line. */ - if (disable_radix) - cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + if (disable_radix) { + if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + else + pr_warn("WARNING: Ignoring cmdline option disable_radix\n"); + } + + of_scan_flat_dt(dt_scan_mmu_pid_width, NULL); + if (hvmode && !mmu_lpid_bits) { + if (early_cpu_has_feature(CPU_FTR_ARCH_207S)) + mmu_lpid_bits = 12; /* POWER8-10 */ + else + mmu_lpid_bits = 10; /* POWER7 */ + } + if (!mmu_pid_bits) { + if (early_cpu_has_feature(CPU_FTR_ARCH_300)) + mmu_pid_bits = 20; /* POWER9-10 */ + } /* * Check /chosen/ibm,architecture-vec-5 if running as a guest. @@ -449,11 +497,12 @@ void __init mmu_early_init_devtree(void) * even though the ibm,architecture-vec-5 property created by * skiboot doesn't have the necessary bits set. */ - if (!(mfmsr() & MSR_HV)) + if (!hvmode) early_check_vec5(); if (early_radix_enabled()) { radix__early_init_devtree(); + /* * We have finalized the translation we are going to use by now. * Radix mode is not limited by RMA / VRMA addressing. @@ -463,5 +512,9 @@ void __init mmu_early_init_devtree(void) memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } else hash__early_init_devtree(); + + if (!(cur_cpu_spec->mmu_features & MMU_FTR_HPTE_TABLE) && + !(cur_cpu_spec->mmu_features & MMU_FTR_TYPE_RADIX)) + panic("kernel does not support any MMU type offered by platform"); } #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c index 57342154d2b0..4f12504fb405 100644 --- a/arch/powerpc/mm/ioremap.c +++ b/arch/powerpc/mm/ioremap.c @@ -98,23 +98,3 @@ void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size, return NULL; } - -#ifdef CONFIG_ZONE_DEVICE -/* - * Override the generic version in mm/memremap.c. - * - * With hash translation, the direct-map range is mapped with just one - * page size selected by htab_init_page_sizes(). Consult - * mmu_psize_defs[] to determine the minimum page size alignment. -*/ -unsigned long memremap_compat_align(void) -{ - unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift; - - if (radix_enabled()) - return SUBSECTION_SIZE; - return max(SUBSECTION_SIZE, 1UL << shift); - -} -EXPORT_SYMBOL_GPL(memremap_compat_align); -#endif diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c index 202bd260a009..35b287b0a8da 100644 --- a/arch/powerpc/mm/kasan/book3s_32.c +++ b/arch/powerpc/mm/kasan/book3s_32.c @@ -19,7 +19,8 @@ int __init kasan_init_region(void *start, size_t size) block = memblock_alloc(k_size, k_size_base); if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) { - int k_size_more = 1 << (ffs(k_size - k_size_base) - 1); + int shift = ffs(k_size - k_size_base); + int k_size_more = shift ? 1 << (shift - 1) : 0; setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL); if (k_size_more >= SZ_128K) diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c index aad7c47e0030..ea821d0ffe16 100644 --- a/arch/powerpc/mm/maccess.c +++ b/arch/powerpc/mm/maccess.c @@ -11,20 +11,3 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { return is_kernel_addr((unsigned long)unsafe_src); } - -int copy_inst_from_kernel_nofault(struct ppc_inst *inst, u32 *src) -{ - unsigned int val, suffix; - int err; - - err = copy_from_kernel_nofault(&val, src, sizeof(val)); - if (err) - return err; - if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) { - err = copy_from_kernel_nofault(&suffix, src + 1, sizeof(suffix)); - *inst = ppc_inst_prefix(val, suffix); - } else { - *inst = ppc_inst(val); - } - return err; -} diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index bd5d91a31183..8e301cd8925b 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -26,7 +26,6 @@ #include <mm/mmu_decl.h> unsigned long long memory_limit; -bool init_mem_is_free; unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); @@ -312,7 +311,6 @@ void free_initmem(void) { ppc_md.progress = ppc_printk_progress; mark_initmem_nx(); - init_mem_is_free = true; free_initmem_default(POISON_FREE_INITMEM); } diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index ae683fdc716c..c475cf810aa8 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c @@ -80,6 +80,7 @@ static inline unsigned long mmap_base(unsigned long rnd, return PAGE_ALIGN(DEFAULT_MAP_WINDOW - gap - rnd); } +#ifdef HAVE_ARCH_UNMAPPED_AREA #ifdef CONFIG_PPC_RADIX_MMU /* * Same function as generic code used only for radix, because we don't need to overload @@ -181,11 +182,42 @@ radix__arch_get_unmapped_area_topdown(struct file *filp, */ return radix__arch_get_unmapped_area(filp, addr0, len, pgoff, flags); } +#endif + +unsigned long arch_get_unmapped_area(struct file *filp, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags) +{ +#ifdef CONFIG_PPC_MM_SLICES + return slice_get_unmapped_area(addr, len, flags, + mm_ctx_user_psize(¤t->mm->context), 0); +#else + BUG(); +#endif +} + +unsigned long arch_get_unmapped_area_topdown(struct file *filp, + const unsigned long addr0, + const unsigned long len, + const unsigned long pgoff, + const unsigned long flags) +{ +#ifdef CONFIG_PPC_MM_SLICES + return slice_get_unmapped_area(addr0, len, flags, + mm_ctx_user_psize(¤t->mm->context), 1); +#else + BUG(); +#endif +} +#endif /* HAVE_ARCH_UNMAPPED_AREA */ static void radix__arch_pick_mmap_layout(struct mm_struct *mm, unsigned long random_factor, struct rlimit *rlim_stack) { +#ifdef CONFIG_PPC_RADIX_MMU if (mmap_is_legacy(rlim_stack)) { mm->mmap_base = TASK_UNMAPPED_BASE; mm->get_unmapped_area = radix__arch_get_unmapped_area; @@ -193,13 +225,9 @@ static void radix__arch_pick_mmap_layout(struct mm_struct *mm, mm->mmap_base = mmap_base(random_factor, rlim_stack); mm->get_unmapped_area = radix__arch_get_unmapped_area_topdown; } -} -#else -/* dummy */ -extern void radix__arch_pick_mmap_layout(struct mm_struct *mm, - unsigned long random_factor, - struct rlimit *rlim_stack); #endif +} + /* * This function, called very early during the creation of a new * process VM image, sets up which VM layout function to use: diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index 74246536b832..1fb9c99f8679 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -18,6 +18,12 @@ static inline void switch_mm_pgdir(struct task_struct *tsk, { /* 32-bit keeps track of the current PGDIR in the thread struct */ tsk->thread.pgdir = mm->pgd; +#ifdef CONFIG_PPC_BOOK3S_32 + tsk->thread.sr0 = mm->context.sr0; +#endif +#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP) + tsk->thread.pid = mm->context.id; +#endif } #elif defined(CONFIG_PPC_BOOK3E_64) static inline void switch_mm_pgdir(struct task_struct *tsk, @@ -25,6 +31,9 @@ static inline void switch_mm_pgdir(struct task_struct *tsk, { /* 64-bit Book3E keeps track of current PGD in the PACA */ get_paca()->pgd = mm->pgd; +#ifdef CONFIG_PPC_KUAP + tsk->thread.pid = mm->context.id; +#endif } #else static inline void switch_mm_pgdir(struct task_struct *tsk, @@ -81,7 +90,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * context */ if (cpu_has_feature(CPU_FTR_ALTIVEC)) - asm volatile ("dssall"); + asm volatile (PPC_DSSALL); if (!new_on_cpu) membarrier_arch_switch_mm(prev, next, tsk); diff --git a/arch/powerpc/mm/nohash/44x.c b/arch/powerpc/mm/nohash/44x.c index e079f26b267e..1beae802bb1c 100644 --- a/arch/powerpc/mm/nohash/44x.c +++ b/arch/powerpc/mm/nohash/44x.c @@ -38,7 +38,7 @@ int icache_44x_need_flush; unsigned long tlb_47x_boltmap[1024/8]; -static void ppc44x_update_tlb_hwater(void) +static void __init ppc44x_update_tlb_hwater(void) { /* The TLB miss handlers hard codes the watermark in a cmpli * instruction to improve performances rather than loading it @@ -122,7 +122,7 @@ static void __init ppc47x_update_boltmap(void) /* * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU */ -static void ppc47x_pin_tlb(unsigned int virt, unsigned int phys) +static void __init ppc47x_pin_tlb(unsigned int virt, unsigned int phys) { unsigned int rA; int bolted; @@ -240,19 +240,3 @@ void __init mmu_init_secondary(int cpu) } } #endif /* CONFIG_SMP */ - -#ifdef CONFIG_PPC_KUEP -void setup_kuep(bool disabled) -{ - if (smp_processor_id() != boot_cpuid) - return; - - if (disabled) - patch_instruction_site(&patch__tlb_44x_kuep, ppc_inst(PPC_RAW_NOP())); - else - pr_info("Activating Kernel Userspace Execution Prevention\n"); - - if (IS_ENABLED(CONFIG_PPC_47x) && disabled) - patch_instruction_site(&patch__tlb_47x_kuep, ppc_inst(PPC_RAW_NOP())); -} -#endif diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 0df9fe29dd56..27f9186ae374 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -8,11 +8,7 @@ */ #include <linux/memblock.h> -#include <linux/mmu_context.h> #include <linux/hugetlb.h> -#include <asm/fixmap.h> -#include <asm/code-patching.h> -#include <asm/inst.h> #include <mm/mmu_decl.h> @@ -212,35 +208,6 @@ void __init setup_initial_memory_limit(phys_addr_t first_memblock_base, memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M)); } -#ifdef CONFIG_PPC_KUEP -void __init setup_kuep(bool disabled) -{ - if (disabled) - return; - - pr_info("Activating Kernel Userspace Execution Prevention\n"); - - mtspr(SPRN_MI_AP, MI_APG_KUEP); -} -#endif - -#ifdef CONFIG_PPC_KUAP -struct static_key_false disable_kuap_key; -EXPORT_SYMBOL(disable_kuap_key); - -void __init setup_kuap(bool disabled) -{ - if (disabled) { - static_branch_enable(&disable_kuap_key); - return; - } - - pr_info("Activating Kernel Userspace Access Protection\n"); - - mtspr(SPRN_MD_AP, MD_APG_KUAP); -} -#endif - int pud_clear_huge(pud_t *pud) { return 0; diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile index b1f630d423d8..b467a25ee155 100644 --- a/arch/powerpc/mm/nohash/Makefile +++ b/arch/powerpc/mm/nohash/Makefile @@ -2,7 +2,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -obj-y += mmu_context.o tlb.o tlb_low.o +obj-y += mmu_context.o tlb.o tlb_low.o kup.o obj-$(CONFIG_PPC_BOOK3E_64) += tlb_low_64e.o book3e_pgtable.o obj-$(CONFIG_40x) += 40x.o obj-$(CONFIG_44x) += 44x.o diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c index 77884e24281d..7d4368d055a6 100644 --- a/arch/powerpc/mm/nohash/book3e_pgtable.c +++ b/arch/powerpc/mm/nohash/book3e_pgtable.c @@ -10,6 +10,7 @@ #include <asm/pgalloc.h> #include <asm/tlb.h> #include <asm/dma.h> +#include <asm/code-patching.h> #include <mm/mmu_decl.h> @@ -115,3 +116,17 @@ int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) smp_wmb(); return 0; } + +void __patch_exception(int exc, unsigned long addr) +{ + unsigned int *ibase = &interrupt_base_book3e; + + /* + * Our exceptions vectors start with a NOP and -then- a branch + * to deal with single stepping from userspace which stops on + * the second instruction. Thus we need to patch the second + * instruction of the exception, not the first one. + */ + + patch_branch(ibase + (exc / 4) + 1, addr, 0); +} diff --git a/arch/powerpc/mm/nohash/fsl_book3e.c b/arch/powerpc/mm/nohash/fsl_book3e.c index b231a54f540c..dfe715e0f70a 100644 --- a/arch/powerpc/mm/nohash/fsl_book3e.c +++ b/arch/powerpc/mm/nohash/fsl_book3e.c @@ -60,11 +60,6 @@ struct tlbcamrange { phys_addr_t phys; } tlbcam_addrs[NUM_TLBCAMS]; -unsigned long tlbcam_sz(int idx) -{ - return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1; -} - #ifdef CONFIG_FSL_BOOKE /* * Return PA for this VA if it is mapped by a CAM, or 0 @@ -264,6 +259,11 @@ void __init MMU_init_hw(void) flush_instruction_cache(); } +static unsigned long __init tlbcam_sz(int idx) +{ + return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1; +} + void __init adjust_total_lowmem(void) { unsigned long ram; diff --git a/arch/powerpc/mm/nohash/kup.c b/arch/powerpc/mm/nohash/kup.c new file mode 100644 index 000000000000..552becf90e97 --- /dev/null +++ b/arch/powerpc/mm/nohash/kup.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * This file contains the routines for initializing kernel userspace protection + */ + +#include <linux/export.h> +#include <linux/init.h> +#include <linux/jump_label.h> +#include <linux/printk.h> +#include <linux/smp.h> + +#include <asm/kup.h> +#include <asm/smp.h> + +#ifdef CONFIG_PPC_KUAP +struct static_key_false disable_kuap_key; +EXPORT_SYMBOL(disable_kuap_key); + +void setup_kuap(bool disabled) +{ + if (disabled) { + if (IS_ENABLED(CONFIG_40x)) + disable_kuep = true; + if (smp_processor_id() == boot_cpuid) + static_branch_enable(&disable_kuap_key); + return; + } + + pr_info("Activating Kernel Userspace Access Protection\n"); + + __prevent_user_access(KUAP_READ_WRITE); +} +#endif diff --git a/arch/powerpc/mm/nohash/mmu_context.c b/arch/powerpc/mm/nohash/mmu_context.c index 44b2b5e7cabe..85b048f04c56 100644 --- a/arch/powerpc/mm/nohash/mmu_context.c +++ b/arch/powerpc/mm/nohash/mmu_context.c @@ -33,6 +33,7 @@ #include <asm/mmu_context.h> #include <asm/tlbflush.h> #include <asm/smp.h> +#include <asm/kup.h> #include <mm/mmu_decl.h> @@ -217,7 +218,7 @@ static void set_context(unsigned long id, pgd_t *pgd) /* sync */ mb(); - } else { + } else if (kuap_is_disabled()) { if (IS_ENABLED(CONFIG_40x)) mb(); /* sync */ @@ -305,6 +306,9 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, if (IS_ENABLED(CONFIG_BDI_SWITCH)) abatron_pteptrs[1] = next->pgd; set_context(id, next->pgd); +#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP) + tsk->thread.pid = id; +#endif raw_spin_unlock(&context_lock); } diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index 647bf454a0fa..fd2c77af5c55 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -150,7 +150,6 @@ static inline int mmu_get_tsize(int psize) */ #ifdef CONFIG_PPC64 -int mmu_linear_psize; /* Page size used for the linear mapping */ int mmu_pte_psize; /* Page size used for PTE pages */ int mmu_vmemmap_psize; /* Page size used for the virtual mem map */ int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */ @@ -433,7 +432,7 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address) } } -static void setup_page_sizes(void) +static void __init setup_page_sizes(void) { unsigned int tlb0cfg; unsigned int tlb0ps; @@ -571,7 +570,7 @@ out: } } -static void setup_mmu_htw(void) +static void __init setup_mmu_htw(void) { /* * If we want to use HW tablewalk, enable it by patching the TLB miss @@ -657,14 +656,6 @@ static void early_init_this_mmu(void) static void __init early_init_mmu_global(void) { - /* XXX This will have to be decided at runtime, but right - * now our boot and TLB miss code hard wires it. Ideally - * we should find out a suitable page size and patch the - * TLB miss code (either that or use the PACA to store - * the value we want) - */ - mmu_linear_psize = MMU_PAGE_1G; - /* XXX This should be decided at runtime based on supported * page sizes in the TLB, but for now let's assume 16M is * always there and a good fit (which it probably is) diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index 9235e720e357..8b97c4acfebf 100644 --- a/arch/powerpc/mm/nohash/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -128,6 +128,13 @@ END_BTB_FLUSH_SECTION bne tlb_miss_kernel_bolted +tlb_miss_user_bolted: +#ifdef CONFIG_PPC_KUAP + mfspr r10,SPRN_MAS1 + rlwinm. r10,r10,0,0x3fff0000 + beq- tlb_miss_fault_bolted /* KUAP fault */ +#endif + tlb_miss_common_bolted: /* * This is the guts of the TLB miss handler for bolted-linear. @@ -246,7 +253,7 @@ itlb_miss_fault_bolted: cmpldi cr0,r15,0 /* Check for user region */ oris r11,r11,_PAGE_ACCESSED@h - beq tlb_miss_common_bolted + beq tlb_miss_user_bolted b itlb_miss_kernel_bolted #ifdef CONFIG_PPC_FSL_BOOK3E @@ -676,6 +683,11 @@ finish_normal_tlb_miss: /* Check if required permissions are met */ andc. r15,r11,r14 bne- normal_tlb_miss_access_fault +#ifdef CONFIG_PPC_KUAP + mfspr r11,SPRN_MAS1 + rlwinm. r10,r11,0,0x3fff0000 + beq- normal_tlb_miss_access_fault /* KUAP fault */ +#endif /* Now we build the MAS: * @@ -689,15 +701,17 @@ finish_normal_tlb_miss: * * TODO: mix up code below for better scheduling */ - clrrdi r11,r16,12 /* Clear low crap in EA */ - rlwimi r11,r14,32-19,27,31 /* Insert WIMGE */ - mtspr SPRN_MAS2,r11 + clrrdi r10,r16,12 /* Clear low crap in EA */ + rlwimi r10,r14,32-19,27,31 /* Insert WIMGE */ + mtspr SPRN_MAS2,r10 /* Check page size, if not standard, update MAS1 */ - rldicl r11,r14,64-8,64-8 - cmpldi cr0,r11,BOOK3E_PAGESZ_4K + rldicl r10,r14,64-8,64-8 + cmpldi cr0,r10,BOOK3E_PAGESZ_4K beq- 1f +#ifndef CONFIG_PPC_KUAP mfspr r11,SPRN_MAS1 +#endif rlwimi r11,r14,31,21,24 rlwinm r11,r11,0,21,19 mtspr SPRN_MAS1,r11 @@ -786,7 +800,16 @@ virt_page_table_tlb_miss: mfspr r10,SPRN_MAS1 rlwinm r10,r10,0,16,1 /* Clear TID */ mtspr SPRN_MAS1,r10 +#ifdef CONFIG_PPC_KUAP + b 2f +1: + mfspr r10,SPRN_MAS1 + rlwinm. r10,r10,0,0x3fff0000 + beq- virt_page_table_tlb_miss_fault /* KUAP fault */ +2: +#else 1: +#endif BEGIN_MMU_FTR_SECTION /* Search if we already have a TLB entry for that virtual address, and * if we do, bail out. @@ -1027,6 +1050,11 @@ virt_page_table_tlb_miss_whacko_fault: * avoid too much complication, it will save/restore things for us */ htw_tlb_miss: +#ifdef CONFIG_PPC_KUAP + mfspr r10,SPRN_MAS1 + rlwinm. r10,r10,0,0x3fff0000 + beq- htw_tlb_miss_fault /* KUAP fault */ +#endif /* Search if we already have a TLB entry for that virtual address, and * if we do, bail out. * diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 59d3cfcd7887..9d5f710d2c20 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -134,7 +134,7 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn, return 0; } -static void reset_numa_cpu_lookup_table(void) +static void __init reset_numa_cpu_lookup_table(void) { unsigned int cpu; @@ -372,7 +372,7 @@ void update_numa_distance(struct device_node *node) * ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN} * ibm,numa-distance-table = { N, 1, 2, 4, 5, 1, 6, .... N elements} */ -static void initialize_form2_numa_distance_lookup_table(void) +static void __init initialize_form2_numa_distance_lookup_table(void) { int i, j; struct device_node *root; @@ -581,7 +581,7 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa) return 0; } -static int get_nid_and_numa_distance(struct drmem_lmb *lmb) +static int __init get_nid_and_numa_distance(struct drmem_lmb *lmb) { struct assoc_arrays aa = { .arrays = NULL }; int default_nid = NUMA_NO_NODE; diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index ce9482383144..abb3198bd277 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -81,9 +81,6 @@ static struct page *maybe_pte_to_page(pte_t pte) static pte_t set_pte_filter_hash(pte_t pte) { - if (radix_enabled()) - return pte; - pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || cpu_has_feature(CPU_FTR_NOEXECUTE))) { @@ -112,6 +109,9 @@ static inline pte_t set_pte_filter(pte_t pte) { struct page *pg; + if (radix_enabled()) + return pte; + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) return set_pte_filter_hash(pte); @@ -144,6 +144,9 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, { struct page *pg; + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + return pte; + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) return pte; diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 78c8cf01db5f..175aabf101e8 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -102,7 +102,8 @@ EXPORT_SYMBOL(__pte_frag_size_shift); struct page *p4d_page(p4d_t p4d) { if (p4d_is_leaf(p4d)) { - VM_WARN_ON(!p4d_huge(p4d)); + if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) + VM_WARN_ON(!p4d_huge(p4d)); return pte_page(p4d_pte(p4d)); } return virt_to_page(p4d_pgtable(p4d)); @@ -112,7 +113,8 @@ struct page *p4d_page(p4d_t p4d) struct page *pud_page(pud_t pud) { if (pud_is_leaf(pud)) { - VM_WARN_ON(!pud_huge(pud)); + if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) + VM_WARN_ON(!pud_huge(pud)); return pte_page(pud_pte(pud)); } return virt_to_page(pud_pgtable(pud)); @@ -125,7 +127,13 @@ struct page *pud_page(pud_t pud) struct page *pmd_page(pmd_t pmd) { if (pmd_is_leaf(pmd)) { - VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd))); + /* + * vmalloc_to_page may be called on any vmap address (not only + * vmalloc), and it uses pmd_page() etc., when huge vmap is + * enabled so these checks can't be used. + */ + if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) + VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd))); return pte_page(pmd_pte(pmd)); } return virt_to_page(pmd_page_vaddr(pmd)); diff --git a/arch/powerpc/mm/ptdump/Makefile b/arch/powerpc/mm/ptdump/Makefile index 4050cbb55acf..b533caaf0910 100644 --- a/arch/powerpc/mm/ptdump/Makefile +++ b/arch/powerpc/mm/ptdump/Makefile @@ -10,5 +10,5 @@ obj-$(CONFIG_PPC_BOOK3S_64) += book3s64.o ifdef CONFIG_PTDUMP_DEBUGFS obj-$(CONFIG_PPC_BOOK3S_32) += bats.o segment_regs.o -obj-$(CONFIG_PPC_BOOK3S_64) += hashpagetable.o +obj-$(CONFIG_PPC_64S_HASH_MMU) += hashpagetable.o endif diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 32bfb215c485..8c846982766f 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -123,7 +123,7 @@ static struct ptdump_range ptdump_range[] __ro_after_init = { void pt_dump_size(struct seq_file *m, unsigned long size) { - static const char units[] = "KMGTPE"; + static const char units[] = " KMGTPE"; const char *unit = units; /* Work out what appropriate unit to use */ @@ -176,7 +176,7 @@ static void dump_addr(struct pg_state *st, unsigned long addr) pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1); pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa); - pt_dump_size(st->seq, (addr - st->start_address) >> 10); + pt_dump_size(st->seq, addr - st->start_address); } static void note_prot_wx(struct pg_state *st, unsigned long addr) @@ -315,7 +315,7 @@ static int ptdump_show(struct seq_file *m, void *v) DEFINE_SHOW_ATTRIBUTE(ptdump); -static void build_pgtable_complete_mask(void) +static void __init build_pgtable_complete_mask(void) { unsigned int i, j; diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 82b45b1cb973..f42711f865f3 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -639,26 +639,6 @@ return_addr: } EXPORT_SYMBOL_GPL(slice_get_unmapped_area); -unsigned long arch_get_unmapped_area(struct file *filp, - unsigned long addr, - unsigned long len, - unsigned long pgoff, - unsigned long flags) -{ - return slice_get_unmapped_area(addr, len, flags, - mm_ctx_user_psize(¤t->mm->context), 0); -} - -unsigned long arch_get_unmapped_area_topdown(struct file *filp, - const unsigned long addr0, - const unsigned long len, - const unsigned long pgoff, - const unsigned long flags) -{ - return slice_get_unmapped_area(addr0, len, flags, - mm_ctx_user_psize(¤t->mm->context), 1); -} - unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr) { unsigned char *psizes; diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 7e9b978b768e..b20a2a83a6e7 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -31,7 +31,7 @@ pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ return -ERANGE; \ } \ - EMIT(PPC_INST_BRANCH | (offset & 0x03fffffc)); \ + EMIT(PPC_RAW_BRANCH(offset)); \ } while (0) /* blr; (unconditional 'branch' with link) to absolute address */ @@ -125,8 +125,7 @@ #define COND_LE (CR0_GT | COND_CMP_FALSE) #define SEEN_FUNC 0x20000000 /* might call external helpers */ -#define SEEN_STACK 0x40000000 /* uses BPF stack */ -#define SEEN_TAILCALL 0x80000000 /* uses tail calls */ +#define SEEN_TAILCALL 0x40000000 /* uses tail calls */ #define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */ #define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */ @@ -151,8 +150,15 @@ struct codegen_context { unsigned int idx; unsigned int stack_size; int b2p[ARRAY_SIZE(b2p)]; + unsigned int exentry_idx; }; +#ifdef CONFIG_PPC32 +#define BPF_FIXUP_LEN 3 /* Three instructions => 12 bytes */ +#else +#define BPF_FIXUP_LEN 2 /* Two instructions => 8 bytes */ +#endif + static inline void bpf_flush_icache(void *start, void *end) { smp_wmb(); /* smp write barrier */ @@ -176,11 +182,14 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i) void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, - u32 *addrs, bool extra_pass); + u32 *addrs, int pass); void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); void bpf_jit_realloc_regs(struct codegen_context *ctx); +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx, + int insn_idx, int jmp_off, int dst_reg); + #endif #endif diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 90ce75f0f1e2..d6ffdd0f2309 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -101,6 +101,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) struct bpf_prog *tmp_fp; bool bpf_blinded = false; bool extra_pass = false; + u32 extable_len; + u32 fixup_len; if (!fp->jit_requested) return org_fp; @@ -131,7 +133,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) image = jit_data->image; bpf_hdr = jit_data->header; proglen = jit_data->proglen; - alloclen = proglen + FUNCTION_DESCR_SIZE; extra_pass = true; goto skip_init_ctx; } @@ -149,7 +150,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) cgctx.stack_size = round_up(fp->aux->stack_depth, 16); /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) { /* We hit something illegal or unsupported. */ fp = org_fp; goto out_addrs; @@ -162,7 +163,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) */ if (cgctx.seen & SEEN_TAILCALL) { cgctx.idx = 0; - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) { fp = org_fp; goto out_addrs; } @@ -177,8 +178,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) bpf_jit_build_prologue(0, &cgctx); bpf_jit_build_epilogue(0, &cgctx); + fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4; + extable_len = fp->aux->num_exentries * sizeof(struct exception_table_entry); + proglen = cgctx.idx * 4; - alloclen = proglen + FUNCTION_DESCR_SIZE; + alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len; bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns); if (!bpf_hdr) { @@ -186,6 +190,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) goto out_addrs; } + if (extable_len) + fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE + proglen + fixup_len; + skip_init_ctx: code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); @@ -210,7 +217,7 @@ skip_init_ctx: /* Now build the prologue, body code & epilogue for real. */ cgctx.idx = 0; bpf_jit_build_prologue(code_base, &cgctx); - if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass)) { + if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass)) { bpf_jit_binary_free(bpf_hdr); fp = org_fp; goto out_addrs; @@ -238,7 +245,7 @@ skip_codegen_passes: fp->bpf_func = (void *)image; fp->jited = 1; - fp->jited_len = alloclen; + fp->jited_len = proglen + FUNCTION_DESCR_SIZE; bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); if (!fp->is_func || extra_pass) { @@ -262,3 +269,52 @@ out: return fp; } + +/* + * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling + * this function, as this only applies to BPF_PROBE_MEM, for now. + */ +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx, + int insn_idx, int jmp_off, int dst_reg) +{ + off_t offset; + unsigned long pc; + struct exception_table_entry *ex; + u32 *fixup; + + /* Populate extable entries only in the last pass */ + if (pass != 2) + return 0; + + if (!fp->aux->extable || + WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries)) + return -EINVAL; + + pc = (unsigned long)&image[insn_idx]; + + fixup = (void *)fp->aux->extable - + (fp->aux->num_exentries * BPF_FIXUP_LEN * 4) + + (ctx->exentry_idx * BPF_FIXUP_LEN * 4); + + fixup[0] = PPC_RAW_LI(dst_reg, 0); + if (IS_ENABLED(CONFIG_PPC32)) + fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit register too */ + + fixup[BPF_FIXUP_LEN - 1] = + PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]); + + ex = &fp->aux->extable[ctx->exentry_idx]; + + offset = pc - (long)&ex->insn; + if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) + return -ERANGE; + ex->insn = offset; + + offset = (long)fixup - (long)&ex->fixup; + if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) + return -ERANGE; + ex->fixup = offset; + + ctx->exentry_idx++; + return 0; +} diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 8a4faa05f9e4..faaebd446cad 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -268,7 +268,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o /* Assemble the body code between the prologue & epilogue */ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, - u32 *addrs, bool extra_pass) + u32 *addrs, int pass) { const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; @@ -284,6 +284,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg); u32 src_reg_h = src_reg - 1; u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG); + u32 size = BPF_SIZE(code); s16 off = insn[i].off; s32 imm = insn[i].imm; bool func_addr_fixed; @@ -812,23 +813,91 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * BPF_LDX */ case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */ - EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); - if (!fp->aux->verifier_zext) - EMIT(PPC_RAW_LI(dst_reg_h, 0)); - break; + case BPF_LDX | BPF_PROBE_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ - EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); - if (!fp->aux->verifier_zext) - EMIT(PPC_RAW_LI(dst_reg_h, 0)); - break; + case BPF_LDX | BPF_PROBE_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); - if (!fp->aux->verifier_zext) - EMIT(PPC_RAW_LI(dst_reg_h, 0)); - break; + case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ - EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off)); - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4)); + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + /* + * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid + * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM + * load only if addr is kernel address (see is_kernel_addr()), otherwise + * set dst_reg=0 and move on. + */ + if (BPF_MODE(code) == BPF_PROBE_MEM) { + PPC_LI32(_R0, TASK_SIZE - off); + EMIT(PPC_RAW_CMPLW(src_reg, _R0)); + PPC_BCC(COND_GT, (ctx->idx + 5) * 4); + EMIT(PPC_RAW_LI(dst_reg, 0)); + /* + * For BPF_DW case, "li reg_h,0" would be needed when + * !fp->aux->verifier_zext. Emit NOP otherwise. + * + * Note that "li reg_h,0" is emitted for BPF_B/H/W case, + * if necessary. So, jump there insted of emitting an + * additional "li reg_h,0" instruction. + */ + if (size == BPF_DW && !fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + else + EMIT(PPC_RAW_NOP()); + /* + * Need to jump two instructions instead of one for BPF_DW case + * as there are two load instructions for dst_reg_h & dst_reg + * respectively. + */ + if (size == BPF_DW) + PPC_JMP((ctx->idx + 3) * 4); + else + PPC_JMP((ctx->idx + 2) * 4); + } + + switch (size) { + case BPF_B: + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); + break; + case BPF_H: + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); + break; + case BPF_W: + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); + break; + case BPF_DW: + EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off)); + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4)); + break; + } + + if (size != BPF_DW && !fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + + if (BPF_MODE(code) == BPF_PROBE_MEM) { + int insn_idx = ctx->idx - 1; + int jmp_off = 4; + + /* + * In case of BPF_DW, two lwz instructions are emitted, one + * for higher 32-bit and another for lower 32-bit. So, set + * ex->insn to the first of the two and jump over both + * instructions in fixup. + * + * Similarly, with !verifier_zext, two instructions are + * emitted for BPF_B/H/W case. So, set ex->insn to the + * instruction that could fault and skip over both + * instructions. + */ + if (size == BPF_DW || !fp->aux->verifier_zext) { + insn_idx -= 1; + jmp_off += 4; + } + + ret = bpf_add_extable_entry(fp, image, pass, ctx, insn_idx, + jmp_off, dst_reg); + if (ret) + return ret; + } break; /* @@ -862,7 +931,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_JMP | BPF_CALL: ctx->seen |= SEEN_FUNC; - ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, + ret = bpf_jit_get_func_addr(fp, &insn[i], false, &func_addr, &func_addr_fixed); if (ret < 0) return ret; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 8571aafcc9e1..9eae8d8ed340 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -297,7 +297,7 @@ asm ( /* Assemble the body code between the prologue & epilogue */ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, - u32 *addrs, bool extra_pass) + u32 *addrs, int pass) { enum stf_barrier_type stf_barrier = stf_barrier_type_get(); const struct bpf_insn *insn = fp->insnsi; @@ -311,6 +311,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * u32 code = insn[i].code; u32 dst_reg = b2p[insn[i].dst_reg]; u32 src_reg = b2p[insn[i].src_reg]; + u32 size = BPF_SIZE(code); s16 off = insn[i].off; s32 imm = insn[i].imm; bool func_addr_fixed; @@ -778,25 +779,66 @@ emit_clear: */ /* dst = *(u8 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_B: - EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); - if (insn_is_zext(&insn[i + 1])) - addrs[++i] = ctx->idx * 4; - break; + case BPF_LDX | BPF_PROBE_MEM | BPF_B: /* dst = *(u16 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_H: - EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); - if (insn_is_zext(&insn[i + 1])) - addrs[++i] = ctx->idx * 4; - break; + case BPF_LDX | BPF_PROBE_MEM | BPF_H: /* dst = *(u32 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_W: - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); - if (insn_is_zext(&insn[i + 1])) - addrs[++i] = ctx->idx * 4; - break; + case BPF_LDX | BPF_PROBE_MEM | BPF_W: /* dst = *(u64 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_DW: - PPC_BPF_LL(dst_reg, src_reg, off); + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + /* + * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid + * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM + * load only if addr is kernel address (see is_kernel_addr()), otherwise + * set dst_reg=0 and move on. + */ + if (BPF_MODE(code) == BPF_PROBE_MEM) { + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], src_reg, off)); + if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) + PPC_LI64(b2p[TMP_REG_2], 0x8000000000000000ul); + else /* BOOK3S_64 */ + PPC_LI64(b2p[TMP_REG_2], PAGE_OFFSET); + EMIT(PPC_RAW_CMPLD(b2p[TMP_REG_1], b2p[TMP_REG_2])); + PPC_BCC(COND_GT, (ctx->idx + 4) * 4); + EMIT(PPC_RAW_LI(dst_reg, 0)); + /* + * Check if 'off' is word aligned because PPC_BPF_LL() + * (BPF_DW case) generates two instructions if 'off' is not + * word-aligned and one instruction otherwise. + */ + if (BPF_SIZE(code) == BPF_DW && (off & 3)) + PPC_JMP((ctx->idx + 3) * 4); + else + PPC_JMP((ctx->idx + 2) * 4); + } + + switch (size) { + case BPF_B: + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); + break; + case BPF_H: + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); + break; + case BPF_W: + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); + break; + case BPF_DW: + PPC_BPF_LL(dst_reg, src_reg, off); + break; + } + + if (size != BPF_DW && insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; + + if (BPF_MODE(code) == BPF_PROBE_MEM) { + ret = bpf_add_extable_entry(fp, image, pass, ctx, ctx->idx - 1, + 4, dst_reg); + if (ret) + return ret; + } break; /* @@ -831,7 +873,7 @@ emit_clear: case BPF_JMP | BPF_CALL: ctx->seen |= SEEN_FUNC; - ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, + ret = bpf_jit_get_func_addr(fp, &insn[i], false, &func_addr, &func_addr_fixed); if (ret < 0) return ret; diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c index f970d1510d3d..4738c4dbf567 100644 --- a/arch/powerpc/perf/8xx-pmu.c +++ b/arch/powerpc/perf/8xx-pmu.c @@ -153,7 +153,7 @@ static void mpc8xx_pmu_read(struct perf_event *event) static void mpc8xx_pmu_del(struct perf_event *event, int flags) { - struct ppc_inst insn = ppc_inst(PPC_RAW_MFSPR(10, SPRN_SPRG_SCRATCH2)); + ppc_inst_t insn = ppc_inst(PPC_RAW_MFSPR(10, SPRN_SPRG_SCRATCH2)); mpc8xx_pmu_read(event); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 73e62e9b179b..a684901b6965 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -17,6 +17,7 @@ #include <asm/firmware.h> #include <asm/ptrace.h> #include <asm/code-patching.h> +#include <asm/hw_irq.h> #include <asm/interrupt.h> #ifdef CONFIG_PPC64 @@ -857,6 +858,19 @@ static void write_pmc(int idx, unsigned long val) } } +static int any_pmc_overflown(struct cpu_hw_events *cpuhw) +{ + int i, idx; + + for (i = 0; i < cpuhw->n_events; i++) { + idx = cpuhw->event[i]->hw.idx; + if ((idx) && ((int)read_pmc(idx) < 0)) + return idx; + } + + return 0; +} + /* Called from sysrq_handle_showregs() */ void perf_event_print_debug(void) { @@ -1281,11 +1295,13 @@ static void power_pmu_disable(struct pmu *pmu) /* * Set the 'freeze counters' bit, clear EBE/BHRBA/PMCC/PMAO/FC56 + * Also clear PMXE to disable PMI's getting triggered in some + * corner cases during PMU disable. */ val = mmcr0 = mfspr(SPRN_MMCR0); val |= MMCR0_FC; val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO | - MMCR0_FC56); + MMCR0_PMXE | MMCR0_FC56); /* Set mmcr0 PMCCEXT for p10 */ if (ppmu->flags & PPMU_ARCH_31) val |= MMCR0_PMCCEXT; @@ -1299,6 +1315,23 @@ static void power_pmu_disable(struct pmu *pmu) mb(); isync(); + /* + * Some corner cases could clear the PMU counter overflow + * while a masked PMI is pending. One such case is when + * a PMI happens during interrupt replay and perf counter + * values are cleared by PMU callbacks before replay. + * + * If any PMC corresponding to the active PMU events are + * overflown, disable the interrupt by clearing the paca + * bit for PMI since we are disabling the PMU now. + * Otherwise provide a warning if there is PMI pending, but + * no counter is found overflown. + */ + if (any_pmc_overflown(cpuhw)) + clear_pmi_irq_pending(); + else + WARN_ON(pmi_irq_pending()); + val = mmcra = cpuhw->mmcr.mmcra; /* @@ -1390,6 +1423,15 @@ static void power_pmu_enable(struct pmu *pmu) * (possibly updated for removal of events). */ if (!cpuhw->n_added) { + /* + * If there is any active event with an overflown PMC + * value, set back PACA_IRQ_PMI which would have been + * cleared in power_pmu_disable(). + */ + hard_irq_disable(); + if (any_pmc_overflown(cpuhw)) + set_pmi_irq_pending(); + mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE); mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1); if (ppmu->flags & PPMU_ARCH_31) @@ -2337,6 +2379,14 @@ static void __perf_event_interrupt(struct pt_regs *regs) break; } } + + /* + * Clear PACA_IRQ_PMI in case it was set by + * set_pmi_irq_pending() when PMU was enabled + * after accounting for interrupts. + */ + clear_pmi_irq_pending(); + if (!active) /* reset non active counters that have overflowed */ write_pmc(i + 1, 0); @@ -2356,6 +2406,13 @@ static void __perf_event_interrupt(struct pt_regs *regs) } } } + + /* + * During system wide profling or while specific CPU is monitored for an + * event, some corner cases could cause PMC to overflow in idle path. This + * will trigger a PMI after waking up from idle. Since counter values are _not_ + * saved/restored in idle path, can lead to below "Can't find PMC" message. + */ if (unlikely(!found) && !arch_irq_disabled_regs(regs)) printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n"); @@ -2381,6 +2438,36 @@ static void perf_event_interrupt(struct pt_regs *regs) perf_sample_event_took(sched_clock() - start_clock); } +/* + * If the perf subsystem wants performance monitor interrupts as soon as + * possible (e.g., to sample the instruction address and stack chain), + * this should return true. The IRQ masking code can then enable MSR[EE] + * in some places (e.g., interrupt handlers) that allows PMI interrupts + * though to improve accuracy of profiles, at the cost of some performance. + * + * The PMU counters can be enabled by other means (e.g., sysfs raw SPR + * access), but in that case there is no need for prompt PMI handling. + * + * This currently returns true if any perf counter is being used. It + * could possibly return false if only events are being counted rather than + * samples being taken, but for now this is good enough. + */ +bool power_pmu_wants_prompt_pmi(void) +{ + struct cpu_hw_events *cpuhw; + + /* + * This could simply test local_paca->pmcregs_in_use if that were not + * under ifdef KVM. + */ + + if (!ppmu) + return false; + + cpuhw = this_cpu_ptr(&cpu_hw_events); + return cpuhw->n_events; +} + static int power_pmu_prepare_cpu(unsigned int cpu) { struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); @@ -2392,7 +2479,7 @@ static int power_pmu_prepare_cpu(unsigned int cpu) return 0; } -int register_power_pmu(struct power_pmu *pmu) +int __init register_power_pmu(struct power_pmu *pmu) { if (ppmu) return -EBUSY; /* something's already registered */ @@ -2419,8 +2506,24 @@ int register_power_pmu(struct power_pmu *pmu) } #ifdef CONFIG_PPC64 +static bool pmu_override = false; +static unsigned long pmu_override_val; +static void do_pmu_override(void *data) +{ + ppc_set_pmu_inuse(1); + if (pmu_override_val) + mtspr(SPRN_MMCR1, pmu_override_val); + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC); +} + static int __init init_ppc64_pmu(void) { + if (cpu_has_feature(CPU_FTR_HVMODE) && pmu_override) { + pr_warn("disabling perf due to pmu_override= command line option.\n"); + on_each_cpu(do_pmu_override, NULL, 1); + return 0; + } + /* run through all the pmu drivers one at a time */ if (!init_power5_pmu()) return 0; @@ -2442,4 +2545,23 @@ static int __init init_ppc64_pmu(void) return init_generic_compat_pmu(); } early_initcall(init_ppc64_pmu); + +static int __init pmu_setup(char *str) +{ + unsigned long val; + + if (!early_cpu_has_feature(CPU_FTR_HVMODE)) + return 0; + + pmu_override = true; + + if (kstrtoul(str, 0, &val)) + val = 0; + + pmu_override_val = val; + + return 1; +} +__setup("pmu_override=", pmu_setup); + #endif diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c index 695975227e60..b6e25f75109d 100644 --- a/arch/powerpc/perf/generic-compat-pmu.c +++ b/arch/powerpc/perf/generic-compat-pmu.c @@ -307,7 +307,7 @@ static struct power_pmu generic_compat_pmu = { .attr_groups = generic_compat_pmu_attr_groups, }; -int init_generic_compat_pmu(void) +int __init init_generic_compat_pmu(void) { int rc = 0; diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 1816f560a465..1e8aa934e37e 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -756,7 +756,7 @@ static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event, } if (calc_ev_end > ev_end) { - pr_warn("event %zu exceeds it's own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n", + pr_warn("event %zu exceeds its own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n", event_idx, event, ev_end, offset, calc_ev_end); return -1; } diff --git a/arch/powerpc/perf/internal.h b/arch/powerpc/perf/internal.h index 80bbf72bfec2..4c18b5504326 100644 --- a/arch/powerpc/perf/internal.h +++ b/arch/powerpc/perf/internal.h @@ -2,12 +2,12 @@ // // Copyright 2019 Madhavan Srinivasan, IBM Corporation. -extern int init_ppc970_pmu(void); -extern int init_power5_pmu(void); -extern int init_power5p_pmu(void); -extern int init_power6_pmu(void); -extern int init_power7_pmu(void); -extern int init_power8_pmu(void); -extern int init_power9_pmu(void); -extern int init_power10_pmu(void); -extern int init_generic_compat_pmu(void); +int __init init_ppc970_pmu(void); +int __init init_power5_pmu(void); +int __init init_power5p_pmu(void); +int __init init_power6_pmu(void); +int __init init_power7_pmu(void); +int __init init_power8_pmu(void); +int __init init_power9_pmu(void); +int __init init_power10_pmu(void); +int __init init_generic_compat_pmu(void); diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 7ea873ab2e6f..4037ea652522 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -220,22 +220,37 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx) /* Nothing to do */ break; case 1: - ret = PH(LVL, L1); + ret = PH(LVL, L1) | LEVEL(L1) | P(SNOOP, HIT); break; case 2: - ret = PH(LVL, L2); + ret = PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT); break; case 3: - ret = PH(LVL, L3); + ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); break; case 4: - if (sub_idx <= 1) - ret = PH(LVL, LOC_RAM); - else if (sub_idx > 1 && sub_idx <= 2) - ret = PH(LVL, REM_RAM1); - else - ret = PH(LVL, REM_RAM2); - ret |= P(SNOOP, HIT); + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + ret = P(SNOOP, HIT); + + if (sub_idx == 1) + ret |= PH(LVL, LOC_RAM) | LEVEL(RAM); + else if (sub_idx == 2 || sub_idx == 3) + ret |= P(LVL, HIT) | LEVEL(PMEM); + else if (sub_idx == 4) + ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | P(HOPS, 2); + else if (sub_idx == 5 || sub_idx == 7) + ret |= P(LVL, HIT) | LEVEL(PMEM) | REM; + else if (sub_idx == 6) + ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | P(HOPS, 3); + } else { + if (sub_idx <= 1) + ret = PH(LVL, LOC_RAM); + else if (sub_idx > 1 && sub_idx <= 2) + ret = PH(LVL, REM_RAM1); + else + ret = PH(LVL, REM_RAM2); + ret |= P(SNOOP, HIT); + } break; case 5: if (cpu_has_feature(CPU_FTR_ARCH_31)) { @@ -261,11 +276,26 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx) } break; case 6: - ret = PH(LVL, REM_CCE2); - if ((sub_idx == 0) || (sub_idx == 2)) - ret |= P(SNOOP, HIT); - else if ((sub_idx == 1) || (sub_idx == 3)) - ret |= P(SNOOP, HITM); + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (sub_idx == 0) + ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HIT) | P(HOPS, 2); + else if (sub_idx == 1) + ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HITM) | P(HOPS, 2); + else if (sub_idx == 2) + ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HIT) | P(HOPS, 3); + else if (sub_idx == 3) + ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HITM) | P(HOPS, 3); + } else { + ret = PH(LVL, REM_CCE2); + if (sub_idx == 0 || sub_idx == 2) + ret |= P(SNOOP, HIT); + else if (sub_idx == 1 || sub_idx == 3) + ret |= P(SNOOP, HITM); + } break; case 7: ret = PM(LVL, L1); diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c index 9dd75f385837..0975ad0b42c4 100644 --- a/arch/powerpc/perf/power10-pmu.c +++ b/arch/powerpc/perf/power10-pmu.c @@ -592,7 +592,7 @@ static struct power_pmu power10_pmu = { .check_attr_config = power10_check_attr_config, }; -int init_power10_pmu(void) +int __init init_power10_pmu(void) { unsigned int pvr; int rc; diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c index 18732267993a..753b4740ef64 100644 --- a/arch/powerpc/perf/power5+-pmu.c +++ b/arch/powerpc/perf/power5+-pmu.c @@ -677,7 +677,7 @@ static struct power_pmu power5p_pmu = { .cache_events = &power5p_cache_events, }; -int init_power5p_pmu(void) +int __init init_power5p_pmu(void) { if (!cur_cpu_spec->oprofile_cpu_type || (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+") diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c index cb611c1e7abe..1f83c4cba0aa 100644 --- a/arch/powerpc/perf/power5-pmu.c +++ b/arch/powerpc/perf/power5-pmu.c @@ -618,7 +618,7 @@ static struct power_pmu power5_pmu = { .flags = PPMU_HAS_SSLOT, }; -int init_power5_pmu(void) +int __init init_power5_pmu(void) { if (!cur_cpu_spec->oprofile_cpu_type || strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5")) diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c index 69ef38216418..aec746f86804 100644 --- a/arch/powerpc/perf/power6-pmu.c +++ b/arch/powerpc/perf/power6-pmu.c @@ -539,7 +539,7 @@ static struct power_pmu power6_pmu = { .cache_events = &power6_cache_events, }; -int init_power6_pmu(void) +int __init init_power6_pmu(void) { if (!cur_cpu_spec->oprofile_cpu_type || strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6")) diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index 894c17f9a762..99b5ba314ea7 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -445,7 +445,7 @@ static struct power_pmu power7_pmu = { .cache_events = &power7_cache_events, }; -int init_power7_pmu(void) +int __init init_power7_pmu(void) { if (!cur_cpu_spec->oprofile_cpu_type || strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7")) diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 5282e8415ddf..f21194b5604a 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -378,7 +378,7 @@ static struct power_pmu power8_pmu = { .bhrb_nr = 32, }; -int init_power8_pmu(void) +int __init init_power8_pmu(void) { int rc; diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index ff3382140d7e..4b7c17e36100 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -452,7 +452,7 @@ static struct power_pmu power9_pmu = { .check_attr_config = power9_check_attr_config, }; -int init_power9_pmu(void) +int __init init_power9_pmu(void) { int rc = 0; unsigned int pvr = mfspr(SPRN_PVR); diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c index 1f8263785286..09802482ba72 100644 --- a/arch/powerpc/perf/ppc970-pmu.c +++ b/arch/powerpc/perf/ppc970-pmu.c @@ -489,7 +489,7 @@ static struct power_pmu ppc970_pmu = { .flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING, }; -int init_ppc970_pmu(void) +int __init init_ppc970_pmu(void) { if (!cur_cpu_spec->oprofile_cpu_type || (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970") diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c index 823397c802de..af13a59d2f60 100644 --- a/arch/powerpc/platforms/44x/fsp2.c +++ b/arch/powerpc/platforms/44x/fsp2.c @@ -197,7 +197,7 @@ static irqreturn_t rst_wrn_handler(int irq, void *data) { } } -static void node_irq_request(const char *compat, irq_handler_t errirq_handler) +static void __init node_irq_request(const char *compat, irq_handler_t errirq_handler) { struct device_node *np; unsigned int irq; @@ -222,7 +222,7 @@ static void node_irq_request(const char *compat, irq_handler_t errirq_handler) } } -static void critical_irq_setup(void) +static void __init critical_irq_setup(void) { node_irq_request(FSP2_CMU_ERR, cmu_err_handler); node_irq_request(FSP2_BUS_ERR, bus_err_handler); diff --git a/arch/powerpc/platforms/4xx/cpm.c b/arch/powerpc/platforms/4xx/cpm.c index ae8b812c9202..2571841625a2 100644 --- a/arch/powerpc/platforms/4xx/cpm.c +++ b/arch/powerpc/platforms/4xx/cpm.c @@ -163,7 +163,7 @@ static ssize_t cpm_idle_store(struct kobject *kobj, static struct kobj_attribute cpm_idle_attr = __ATTR(idle, 0644, cpm_idle_show, cpm_idle_store); -static void cpm_idle_config_sysfs(void) +static void __init cpm_idle_config_sysfs(void) { struct device *dev; unsigned long ret; @@ -231,7 +231,7 @@ static const struct platform_suspend_ops cpm_suspend_ops = { .enter = cpm_suspend_enter, }; -static int cpm_get_uint_property(struct device_node *np, +static int __init cpm_get_uint_property(struct device_node *np, const char *name) { int len; diff --git a/arch/powerpc/platforms/4xx/pci.c b/arch/powerpc/platforms/4xx/pci.c index c13d64c3b019..24f41e178cbc 100644 --- a/arch/powerpc/platforms/4xx/pci.c +++ b/arch/powerpc/platforms/4xx/pci.c @@ -1273,7 +1273,7 @@ static int __init ppc405ex_pciex_core_init(struct device_node *np) return 2; } -static void ppc405ex_pcie_phy_reset(struct ppc4xx_pciex_port *port) +static void __init ppc405ex_pcie_phy_reset(struct ppc4xx_pciex_port *port) { /* Assert the PE0_PHY reset */ mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x01010000); diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c index 30342b60aa63..0b03d812baae 100644 --- a/arch/powerpc/platforms/512x/clock-commonclk.c +++ b/arch/powerpc/platforms/512x/clock-commonclk.c @@ -97,7 +97,7 @@ static enum soc_type { MPC512x_SOC_MPC5125, } soc; -static void mpc512x_clk_determine_soc(void) +static void __init mpc512x_clk_determine_soc(void) { if (of_machine_is_compatible("fsl,mpc5121")) { soc = MPC512x_SOC_MPC5121; @@ -113,98 +113,98 @@ static void mpc512x_clk_determine_soc(void) } } -static bool soc_has_mbx(void) +static bool __init soc_has_mbx(void) { if (soc == MPC512x_SOC_MPC5121) return true; return false; } -static bool soc_has_axe(void) +static bool __init soc_has_axe(void) { if (soc == MPC512x_SOC_MPC5125) return false; return true; } -static bool soc_has_viu(void) +static bool __init soc_has_viu(void) { if (soc == MPC512x_SOC_MPC5125) return false; return true; } -static bool soc_has_spdif(void) +static bool __init soc_has_spdif(void) { if (soc == MPC512x_SOC_MPC5125) return false; return true; } -static bool soc_has_pata(void) +static bool __init soc_has_pata(void) { if (soc == MPC512x_SOC_MPC5125) return false; return true; } -static bool soc_has_sata(void) +static bool __init soc_has_sata(void) { if (soc == MPC512x_SOC_MPC5125) return false; return true; } -static bool soc_has_pci(void) +static bool __init soc_has_pci(void) { if (soc == MPC512x_SOC_MPC5125) return false; return true; } -static bool soc_has_fec2(void) +static bool __init soc_has_fec2(void) { if (soc == MPC512x_SOC_MPC5125) return true; return false; } -static int soc_max_pscnum(void) +static int __init soc_max_pscnum(void) { if (soc == MPC512x_SOC_MPC5125) return 10; return 12; } -static bool soc_has_sdhc2(void) +static bool __init soc_has_sdhc2(void) { if (soc == MPC512x_SOC_MPC5125) return true; return false; } -static bool soc_has_nfc_5125(void) +static bool __init soc_has_nfc_5125(void) { if (soc == MPC512x_SOC_MPC5125) return true; return false; } -static bool soc_has_outclk(void) +static bool __init soc_has_outclk(void) { if (soc == MPC512x_SOC_MPC5125) return true; return false; } -static bool soc_has_cpmf_0_bypass(void) +static bool __init soc_has_cpmf_0_bypass(void) { if (soc == MPC512x_SOC_MPC5125) return true; return false; } -static bool soc_has_mclk_mux0_canin(void) +static bool __init soc_has_mclk_mux0_canin(void) { if (soc == MPC512x_SOC_MPC5125) return true; @@ -294,7 +294,7 @@ static inline int get_bit_field(uint32_t __iomem *reg, uint8_t pos, uint8_t len) } /* get the SPMF and translate it into the "sys pll" multiplier */ -static int get_spmf_mult(void) +static int __init get_spmf_mult(void) { static int spmf_to_mult[] = { 68, 1, 12, 16, 20, 24, 28, 32, @@ -312,7 +312,7 @@ static int get_spmf_mult(void) * values returned from here are a multiple of the real factor since the * divide ratio is fractional */ -static int get_sys_div_x2(void) +static int __init get_sys_div_x2(void) { static int sysdiv_code_to_x2[] = { 4, 5, 6, 7, 8, 9, 10, 14, @@ -333,7 +333,7 @@ static int get_sys_div_x2(void) * values returned from here are a multiple of the real factor since the * multiplier ratio is fractional */ -static int get_cpmf_mult_x2(void) +static int __init get_cpmf_mult_x2(void) { static int cpmf_to_mult_x36[] = { /* 0b000 is "times 36" */ @@ -379,7 +379,7 @@ static const struct clk_div_table divtab_1234[] = { { .div = 0, }, }; -static int get_freq_from_dt(char *propname) +static int __init get_freq_from_dt(char *propname) { struct device_node *np; const unsigned int *prop; @@ -396,7 +396,7 @@ static int get_freq_from_dt(char *propname) return val; } -static void mpc512x_clk_preset_data(void) +static void __init mpc512x_clk_preset_data(void) { size_t i; @@ -418,7 +418,7 @@ static void mpc512x_clk_preset_data(void) * SYS -> CSB -> IPS) from the REF clock rate and the returned mul/div * values */ -static void mpc512x_clk_setup_ref_clock(struct device_node *np, int bus_freq, +static void __init mpc512x_clk_setup_ref_clock(struct device_node *np, int bus_freq, int *sys_mul, int *sys_div, int *ips_div) { @@ -592,7 +592,7 @@ static struct mclk_setup_data mclk_outclk_data[] = { }; /* setup the MCLK clock subtree of an individual PSC/MSCAN/SPDIF */ -static void mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx) +static void __init mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx) { size_t clks_idx_pub, clks_idx_int; u32 __iomem *mccr_reg; /* MCLK control register (mux, en, div) */ @@ -701,7 +701,7 @@ static void mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx) /* }}} MCLK helpers */ -static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq) +static void __init mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq) { int sys_mul, sys_div, ips_div; int mul, div; @@ -937,7 +937,7 @@ static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq) * registers the set of public clocks (those listed in the dt-bindings/ * header file) for OF lookups, keeps the intermediates private to us */ -static void mpc5121_clk_register_of_provider(struct device_node *np) +static void __init mpc5121_clk_register_of_provider(struct device_node *np) { clk_data.clks = clks; clk_data.clk_num = MPC512x_CLK_LAST_PUBLIC + 1; /* _not_ ARRAY_SIZE() */ @@ -948,7 +948,7 @@ static void mpc5121_clk_register_of_provider(struct device_node *np) * temporary support for the period of time between introduction of CCF * support and the adjustment of peripheral drivers to OF based lookups */ -static void mpc5121_clk_provide_migration_support(void) +static void __init mpc5121_clk_provide_migration_support(void) { /* @@ -1009,7 +1009,7 @@ static void mpc5121_clk_provide_migration_support(void) * case of not yet adjusted device tree data, where clock related specs * are missing) */ -static void mpc5121_clk_provide_backwards_compat(void) +static void __init mpc5121_clk_provide_backwards_compat(void) { enum did_reg_flags { DID_REG_PSC = BIT(0), diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h index fff225901e2f..2f3c60e373e1 100644 --- a/arch/powerpc/platforms/512x/mpc512x.h +++ b/arch/powerpc/platforms/512x/mpc512x.h @@ -12,8 +12,8 @@ extern void __init mpc512x_init_early(void); extern void __init mpc512x_init(void); extern void __init mpc512x_setup_arch(void); extern int __init mpc5121_clk_init(void); -extern const char *mpc512x_select_psc_compat(void); -extern const char *mpc512x_select_reset_compat(void); +const char *__init mpc512x_select_psc_compat(void); +const char *__init mpc512x_select_reset_compat(void); extern void __noreturn mpc512x_restart(char *cmd); #endif /* __MPC512X_H__ */ diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index 7a9ae9591d60..e3411663edad 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -352,7 +352,7 @@ static void __init mpc512x_declare_of_platform_devices(void) #define DEFAULT_FIFO_SIZE 16 -const char *mpc512x_select_psc_compat(void) +const char *__init mpc512x_select_psc_compat(void) { if (of_machine_is_compatible("fsl,mpc5121")) return "fsl,mpc5121-psc"; @@ -363,7 +363,7 @@ const char *mpc512x_select_psc_compat(void) return NULL; } -const char *mpc512x_select_reset_compat(void) +const char *__init mpc512x_select_reset_compat(void) { if (of_machine_is_compatible("fsl,mpc5121")) return "fsl,mpc5121-reset"; diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig index 99d60acc20c8..b72ed2950ca8 100644 --- a/arch/powerpc/platforms/52xx/Kconfig +++ b/arch/powerpc/platforms/52xx/Kconfig @@ -34,7 +34,7 @@ config PPC_EFIKA bool "bPlan Efika 5k2. MPC5200B based computer" depends on PPC_MPC52xx select PPC_RTAS - select PPC_NATIVE + select PPC_HASH_MMU_NATIVE config PPC_LITE5200 bool "Freescale Lite5200 Eval Board" diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c index 108e1e4d2683..d9eed0decb28 100644 --- a/arch/powerpc/platforms/83xx/km83xx.c +++ b/arch/powerpc/platforms/83xx/km83xx.c @@ -39,7 +39,7 @@ #define SVR_REV(svr) (((svr) >> 0) & 0xFFFF) /* Revision field */ -static void quirk_mpc8360e_qe_enet10(void) +static void __init quirk_mpc8360e_qe_enet10(void) { /* * handle mpc8360E Erratum QE_ENET10: diff --git a/arch/powerpc/platforms/83xx/mpc834x_mds.c b/arch/powerpc/platforms/83xx/mpc834x_mds.c index 6d91bdce0a18..0713deffb40c 100644 --- a/arch/powerpc/platforms/83xx/mpc834x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc834x_mds.c @@ -35,7 +35,7 @@ #include "mpc83xx.h" #define BCSR5_INT_USB 0x02 -static int mpc834xemds_usb_cfg(void) +static int __init mpc834xemds_usb_cfg(void) { struct device_node *np; void __iomem *bcsr_regs = NULL; diff --git a/arch/powerpc/platforms/83xx/mpc837x_mds.c b/arch/powerpc/platforms/83xx/mpc837x_mds.c index f28d166ea7db..fc88ab97f6e3 100644 --- a/arch/powerpc/platforms/83xx/mpc837x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc837x_mds.c @@ -23,7 +23,7 @@ #define BCSR12_USB_SER_PIN 0x80 #define BCSR12_USB_SER_DEVICE 0x02 -static int mpc837xmds_usb_cfg(void) +static int __init mpc837xmds_usb_cfg(void) { struct device_node *np; const void *phy_type, *mode; diff --git a/arch/powerpc/platforms/83xx/mpc837x_rdb.c b/arch/powerpc/platforms/83xx/mpc837x_rdb.c index 7fb7684c256b..5d48c6842098 100644 --- a/arch/powerpc/platforms/83xx/mpc837x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc837x_rdb.c @@ -18,7 +18,7 @@ #include "mpc83xx.h" -static void mpc837x_rdb_sd_cfg(void) +static void __init mpc837x_rdb_sd_cfg(void) { void __iomem *im; diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h index a30d30588cf6..aea803ba3a15 100644 --- a/arch/powerpc/platforms/83xx/mpc83xx.h +++ b/arch/powerpc/platforms/83xx/mpc83xx.h @@ -68,9 +68,9 @@ extern void __noreturn mpc83xx_restart(char *cmd); extern long mpc83xx_time_init(void); -extern int mpc837x_usb_cfg(void); -extern int mpc834x_usb_cfg(void); -extern int mpc831x_usb_cfg(void); +int __init mpc837x_usb_cfg(void); +int __init mpc834x_usb_cfg(void); +int __init mpc831x_usb_cfg(void); extern void mpc83xx_ipic_init_IRQ(void); #ifdef CONFIG_PCI diff --git a/arch/powerpc/platforms/83xx/usb.c b/arch/powerpc/platforms/83xx/usb.c index 3d247d726ed5..b0bda20aaccf 100644 --- a/arch/powerpc/platforms/83xx/usb.c +++ b/arch/powerpc/platforms/83xx/usb.c @@ -20,7 +20,7 @@ #ifdef CONFIG_PPC_MPC834x -int mpc834x_usb_cfg(void) +int __init mpc834x_usb_cfg(void) { unsigned long sccr, sicrl, sicrh; void __iomem *immap; @@ -96,7 +96,7 @@ int mpc834x_usb_cfg(void) #endif /* CONFIG_PPC_MPC834x */ #ifdef CONFIG_PPC_MPC831x -int mpc831x_usb_cfg(void) +int __init mpc831x_usb_cfg(void) { u32 temp; void __iomem *immap, *usb_regs; @@ -209,7 +209,7 @@ out: #endif /* CONFIG_PPC_MPC831x */ #ifdef CONFIG_PPC_MPC837x -int mpc837x_usb_cfg(void) +int __init mpc837x_usb_cfg(void) { void __iomem *immap; struct device_node *np = NULL; diff --git a/arch/powerpc/platforms/85xx/c293pcie.c b/arch/powerpc/platforms/85xx/c293pcie.c index 8d9a2503dd0f..58a398c89e97 100644 --- a/arch/powerpc/platforms/85xx/c293pcie.c +++ b/arch/powerpc/platforms/85xx/c293pcie.c @@ -19,7 +19,7 @@ #include "mpc85xx.h" -void __init c293_pcie_pic_init(void) +static void __init c293_pcie_pic_init(void) { struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, 0, 256, " OpenPIC "); diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c index 83a0f7a1f0de..743c65e4d8e4 100644 --- a/arch/powerpc/platforms/85xx/ge_imp3a.c +++ b/arch/powerpc/platforms/85xx/ge_imp3a.c @@ -78,7 +78,7 @@ void __init ge_imp3a_pic_init(void) of_node_put(cascade_node); } -static void ge_imp3a_pci_assign_primary(void) +static void __init ge_imp3a_pci_assign_primary(void) { #ifdef CONFIG_PCI struct device_node *np; diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c index 172d2b7cfeb7..5bd487030256 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -282,7 +282,7 @@ machine_device_initcall(mpc85xx_cds, mpc85xx_cds_8259_attach); #endif /* CONFIG_PPC_I8259 */ -static void mpc85xx_cds_pci_assign_primary(void) +static void __init mpc85xx_cds_pci_assign_primary(void) { #ifdef CONFIG_PCI struct device_node *np; diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c index 4a8af80011a6..f7ac92a8ae97 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c @@ -15,6 +15,8 @@ #include <asm/io.h> #include <asm/fsl_pm.h> +#include "smp.h" + static struct ccsr_guts __iomem *guts; #ifdef CONFIG_FSL_PMC diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index d7081e9af65c..a1c6a7827c8f 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -366,7 +366,7 @@ struct smp_ops_t smp_85xx_ops = { #ifdef CONFIG_PPC32 atomic_t kexec_down_cpus = ATOMIC_INIT(0); -void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) +static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) { local_irq_disable(); @@ -384,7 +384,7 @@ static void mpc85xx_smp_kexec_down(void *arg) ppc_md.kexec_cpu_down(0,1); } #else -void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) +static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) { int cpu = smp_processor_id(); int sibling = cpu_last_thread_sibling(cpu); diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c index 199a137c0ddb..3768c86b9629 100644 --- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c +++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c @@ -271,7 +271,7 @@ static const struct irq_domain_ops socrates_fpga_pic_host_ops = { .xlate = socrates_fpga_pic_host_xlate, }; -void socrates_fpga_pic_init(struct device_node *pic) +void __init socrates_fpga_pic_init(struct device_node *pic) { unsigned long flags; int i; diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.h b/arch/powerpc/platforms/85xx/socrates_fpga_pic.h index c592b8bc94dd..c50b23794a06 100644 --- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.h +++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.h @@ -6,6 +6,6 @@ #ifndef SOCRATES_FPGA_PIC_H #define SOCRATES_FPGA_PIC_H -void socrates_fpga_pic_init(struct device_node *pic); +void __init socrates_fpga_pic_init(struct device_node *pic); #endif diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c index d54e1ae56997..397e158c1edb 100644 --- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c +++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c @@ -45,7 +45,7 @@ void __init xes_mpc85xx_pic_init(void) mpic_init(mpic); } -static void xes_mpc85xx_configure_l2(void __iomem *l2_base) +static void __init xes_mpc85xx_configure_l2(void __iomem *l2_base) { volatile uint32_t ctl, tmp; @@ -72,7 +72,7 @@ static void xes_mpc85xx_configure_l2(void __iomem *l2_base) asm volatile("msync; isync"); } -static void xes_mpc85xx_fixups(void) +static void __init xes_mpc85xx_fixups(void) { struct device_node *np; int err; diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index e02d29a9d12f..d41dad227de8 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -40,9 +40,9 @@ config EPAPR_PARAVIRT In case of doubt, say Y -config PPC_NATIVE +config PPC_HASH_MMU_NATIVE bool - depends on PPC_BOOK3S_32 || PPC64 + depends on PPC_BOOK3S help Support for running natively on the hardware, i.e. without a hypervisor. This option is not user-selectable but should diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index a208997ade88..87bc1929ee5a 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -30,8 +30,6 @@ config PPC_BOOK3S_32 bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx" imply PPC_FPU select PPC_HAVE_PMU_SUPPORT - select PPC_HAVE_KUEP - select PPC_HAVE_KUAP select HAVE_ARCH_VMAP_STACK config PPC_85xx @@ -42,8 +40,7 @@ config PPC_8xx bool "Freescale 8xx" select ARCH_SUPPORTS_HUGETLBFS select FSL_SOC - select PPC_HAVE_KUEP - select PPC_HAVE_KUAP + select PPC_KUEP select HAVE_ARCH_VMAP_STACK select HUGETLBFS @@ -53,6 +50,7 @@ config 40x select PPC_UDBG_16550 select 4xx_SOC select HAVE_PCI + select PPC_KUEP if PPC_KUAP config 44x bool "AMCC 44x, 46x or 47x" @@ -61,7 +59,7 @@ config 44x select 4xx_SOC select HAVE_PCI select PHYS_64BIT - select PPC_HAVE_KUEP + select PPC_KUEP endchoice @@ -105,9 +103,7 @@ config PPC_BOOK3S_64 select HAVE_MOVE_PMD select HAVE_MOVE_PUD select IRQ_WORK - select PPC_MM_SLICES - select PPC_HAVE_KUEP - select PPC_HAVE_KUAP + select PPC_64S_HASH_MMU if !PPC_RADIX_MMU config PPC_BOOK3E_64 bool "Embedded processors" @@ -130,11 +126,13 @@ choice config GENERIC_CPU bool "Generic (POWER4 and above)" depends on PPC64 && !CPU_LITTLE_ENDIAN + select PPC_64S_HASH_MMU if PPC_BOOK3S_64 config GENERIC_CPU bool "Generic (POWER8 and above)" depends on PPC64 && CPU_LITTLE_ENDIAN select ARCH_HAS_FAST_MULTIPLIER + select PPC_64S_HASH_MMU config GENERIC_CPU bool "Generic 32 bits powerpc" @@ -143,24 +141,29 @@ config GENERIC_CPU config CELL_CPU bool "Cell Broadband Engine" depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN + select PPC_64S_HASH_MMU config POWER5_CPU bool "POWER5" depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN + select PPC_64S_HASH_MMU config POWER6_CPU bool "POWER6" depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN + select PPC_64S_HASH_MMU config POWER7_CPU bool "POWER7" depends on PPC_BOOK3S_64 select ARCH_HAS_FAST_MULTIPLIER + select PPC_64S_HASH_MMU config POWER8_CPU bool "POWER8" depends on PPC_BOOK3S_64 select ARCH_HAS_FAST_MULTIPLIER + select PPC_64S_HASH_MMU config POWER9_CPU bool "POWER9" @@ -278,6 +281,11 @@ config BOOKE depends on E500 || 44x || PPC_BOOK3E default y +config BOOKE_OR_40x + bool + depends on BOOKE || 40x + default y + config FSL_BOOKE bool depends on E500 && PPC32 @@ -290,6 +298,7 @@ config PPC_FSL_BOOK3E select FSL_EMB_PERFMON select PPC_SMP_MUXED_IPI select PPC_DOORBELL + select PPC_KUEP default y if FSL_BOOKE config PTE_64BIT @@ -364,6 +373,22 @@ config SPE If in doubt, say Y here. +config PPC_64S_HASH_MMU + bool "Hash MMU Support" + depends on PPC_BOOK3S_64 + select PPC_MM_SLICES + default y + help + Enable support for the Power ISA Hash style MMU. This is implemented + by all IBM Power and other 64-bit Book3S CPUs before ISA v3.0. The + OpenPOWER ISA does not mandate the hash MMU and some CPUs do not + implement it (e.g., Microwatt). + + Note that POWER9 PowerVM platforms only support the hash + MMU. From POWER10 radix is also supported by PowerVM. + + If you're unsure, say Y. + config PPC_RADIX_MMU bool "Radix MMU Support" depends on PPC_BOOK3S_64 @@ -375,7 +400,8 @@ config PPC_RADIX_MMU you can probably disable this. config PPC_RADIX_MMU_DEFAULT - bool "Default to using the Radix MMU when possible" + bool "Default to using the Radix MMU when possible" if PPC_64S_HASH_MMU + depends on PPC_BOOK3S_64 depends on PPC_RADIX_MMU default y help @@ -387,24 +413,16 @@ config PPC_RADIX_MMU_DEFAULT If you're unsure, say Y. -config PPC_HAVE_KUEP - bool - config PPC_KUEP - bool "Kernel Userspace Execution Prevention" - depends on PPC_HAVE_KUEP - default y + bool "Kernel Userspace Execution Prevention" if !40x + default y if !40x help Enable support for Kernel Userspace Execution Prevention (KUEP) If you're unsure, say Y. -config PPC_HAVE_KUAP - bool - config PPC_KUAP bool "Kernel Userspace Access Protection" - depends on PPC_HAVE_KUAP default y help Enable support for Kernel Userspace Access Protection (KUAP) @@ -413,7 +431,7 @@ config PPC_KUAP config PPC_KUAP_DEBUG bool "Extra debugging for Kernel Userspace Access Protection" - depends on PPC_KUAP && (PPC_RADIX_MMU || PPC32) + depends on PPC_KUAP help Add extra debugging for Kernel Userspace Access Protection (KUAP) If you're unsure, say N. diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index cb70c5f25bc6..34669b060f36 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 config PPC_CELL + select PPC_64S_HASH_MMU if PPC64 bool config PPC_CELL_COMMON @@ -8,7 +9,7 @@ config PPC_CELL_COMMON select PPC_DCR_MMIO select PPC_INDIRECT_PIO select PPC_INDIRECT_MMIO - select PPC_NATIVE + select PPC_HASH_MMU_NATIVE select PPC_RTAS select IRQ_EDGE_EOI_HANDLER diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c index c2a0678d85db..1c4c53bec66c 100644 --- a/arch/powerpc/platforms/cell/cbe_regs.c +++ b/arch/powerpc/platforms/cell/cbe_regs.c @@ -165,7 +165,7 @@ u32 cbe_node_to_cpu(int node) } EXPORT_SYMBOL_GPL(cbe_node_to_cpu); -static struct device_node *cbe_get_be_node(int cpu_id) +static struct device_node *__init cbe_get_be_node(int cpu_id) { struct device_node *np; diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index fa08699aedeb..25e726bf0172 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -253,7 +253,7 @@ static irqreturn_t ioc_interrupt(int irq, void *data) return IRQ_HANDLED; } -static int cell_iommu_find_ioc(int nid, unsigned long *base) +static int __init cell_iommu_find_ioc(int nid, unsigned long *base) { struct device_node *np; struct resource r; @@ -293,7 +293,7 @@ static int cell_iommu_find_ioc(int nid, unsigned long *base) return -ENODEV; } -static void cell_iommu_setup_stab(struct cbe_iommu *iommu, +static void __init cell_iommu_setup_stab(struct cbe_iommu *iommu, unsigned long dbase, unsigned long dsize, unsigned long fbase, unsigned long fsize) { @@ -313,7 +313,7 @@ static void cell_iommu_setup_stab(struct cbe_iommu *iommu, memset(iommu->stab, 0, stab_size); } -static unsigned long *cell_iommu_alloc_ptab(struct cbe_iommu *iommu, +static unsigned long *__init cell_iommu_alloc_ptab(struct cbe_iommu *iommu, unsigned long base, unsigned long size, unsigned long gap_base, unsigned long gap_size, unsigned long page_shift) { @@ -373,7 +373,7 @@ static unsigned long *cell_iommu_alloc_ptab(struct cbe_iommu *iommu, return ptab; } -static void cell_iommu_enable_hardware(struct cbe_iommu *iommu) +static void __init cell_iommu_enable_hardware(struct cbe_iommu *iommu) { int ret; unsigned long reg, xlate_base; @@ -413,7 +413,7 @@ static void cell_iommu_enable_hardware(struct cbe_iommu *iommu) out_be64(iommu->cmd_regs + IOC_IOCmd_Cfg, reg); } -static void cell_iommu_setup_hardware(struct cbe_iommu *iommu, +static void __init cell_iommu_setup_hardware(struct cbe_iommu *iommu, unsigned long base, unsigned long size) { cell_iommu_setup_stab(iommu, base, size, 0, 0); @@ -858,7 +858,7 @@ static bool cell_pci_iommu_bypass_supported(struct pci_dev *pdev, u64 mask) cell_iommu_get_fixed_address(&pdev->dev) != OF_BAD_ADDR; } -static void insert_16M_pte(unsigned long addr, unsigned long *ptab, +static void __init insert_16M_pte(unsigned long addr, unsigned long *ptab, unsigned long base_pte) { unsigned long segment, offset; @@ -873,7 +873,7 @@ static void insert_16M_pte(unsigned long addr, unsigned long *ptab, ptab[offset] = base_pte | (__pa(addr) & CBE_IOPTE_RPN_Mask); } -static void cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu, +static void __init cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu, struct device_node *np, unsigned long dbase, unsigned long dsize, unsigned long fbase, unsigned long fsize) { @@ -977,6 +977,7 @@ static int __init cell_iommu_fixed_mapping_init(void) if (hbase < dbase || (hend > (dbase + dsize))) { pr_debug("iommu: hash window doesn't fit in" "real DMA window\n"); + of_node_put(np); return -1; } } diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c index 5b9a7e9f144b..dff8d5e7ab82 100644 --- a/arch/powerpc/platforms/cell/pervasive.c +++ b/arch/powerpc/platforms/cell/pervasive.c @@ -78,6 +78,7 @@ static int cbe_system_reset_exception(struct pt_regs *regs) switch (regs->msr & SRR1_WAKEMASK) { case SRR1_WAKEDEC: set_dec(1); + break; case SRR1_WAKEEE: /* * Handle these when interrupts get re-enabled and we take diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index bc48234443b6..83cea9e7ee72 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -387,7 +387,7 @@ spu_irq_class_2(int irq, void *data) return stat ? IRQ_HANDLED : IRQ_NONE; } -static int spu_request_irqs(struct spu *spu) +static int __init spu_request_irqs(struct spu *spu) { int ret = 0; @@ -540,7 +540,7 @@ void spu_remove_dev_attr_group(struct attribute_group *attrs) } EXPORT_SYMBOL_GPL(spu_remove_dev_attr_group); -static int spu_create_dev(struct spu *spu) +static int __init spu_create_dev(struct spu *spu) { int ret; @@ -711,7 +711,7 @@ static void crash_kexec_stop_spus(void) } } -static void crash_register_spus(struct list_head *list) +static void __init crash_register_spus(struct list_head *list) { struct spu *spu; int ret; diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c index 8e9ef65240c3..ddf8742f09a3 100644 --- a/arch/powerpc/platforms/cell/spu_manage.c +++ b/arch/powerpc/platforms/cell/spu_manage.c @@ -186,7 +186,7 @@ err: return -EINVAL; } -static int spu_map_resource(struct spu *spu, int nr, +static int __init spu_map_resource(struct spu *spu, int nr, void __iomem** virt, unsigned long *phys) { struct device_node *np = spu->devnode; @@ -361,7 +361,7 @@ static void disable_spu_by_master_run(struct spu_context *ctx) static int qs20_reg_idxs[QS20_SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 }; static int qs20_reg_memory[QS20_SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 }; -static struct spu *spu_lookup_reg(int node, u32 reg) +static struct spu *__init spu_lookup_reg(int node, u32 reg) { struct spu *spu; const u32 *spu_reg; @@ -374,7 +374,7 @@ static struct spu *spu_lookup_reg(int node, u32 reg) return NULL; } -static void init_affinity_qs20_harcoded(void) +static void __init init_affinity_qs20_harcoded(void) { int node, i; struct spu *last_spu, *spu; @@ -396,7 +396,7 @@ static void init_affinity_qs20_harcoded(void) } } -static int of_has_vicinity(void) +static int __init of_has_vicinity(void) { struct device_node *dn; @@ -409,7 +409,7 @@ static int of_has_vicinity(void) return 0; } -static struct spu *devnode_spu(int cbe, struct device_node *dn) +static struct spu *__init devnode_spu(int cbe, struct device_node *dn) { struct spu *spu; @@ -419,7 +419,7 @@ static struct spu *devnode_spu(int cbe, struct device_node *dn) return NULL; } -static struct spu * +static struct spu * __init neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid) { struct spu *spu; @@ -440,7 +440,7 @@ neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid) return NULL; } -static void init_affinity_node(int cbe) +static void __init init_affinity_node(int cbe) { struct spu *spu, *last_spu; struct device_node *vic_dn, *last_spu_dn; @@ -494,7 +494,7 @@ static void init_affinity_node(int cbe) } } -static void init_affinity_fw(void) +static void __init init_affinity_fw(void) { int cbe; diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index cb25acccd746..4c702192412f 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -648,7 +648,7 @@ static void spufs_exit_isolated_loader(void) get_order(isolated_loader_size)); } -static void +static void __init spufs_init_isolated_loader(void) { struct device_node *dn; diff --git a/arch/powerpc/platforms/chrp/Kconfig b/arch/powerpc/platforms/chrp/Kconfig index 9b5c5505718a..ff30ed579a39 100644 --- a/arch/powerpc/platforms/chrp/Kconfig +++ b/arch/powerpc/platforms/chrp/Kconfig @@ -11,6 +11,6 @@ config PPC_CHRP select RTAS_ERROR_LOGGING select PPC_MPC106 select PPC_UDBG_16550 - select PPC_NATIVE + select PPC_HASH_MMU_NATIVE select FORCE_PCI default y diff --git a/arch/powerpc/platforms/chrp/pegasos_eth.c b/arch/powerpc/platforms/chrp/pegasos_eth.c index 485cf5ef73d4..5c4f1a9ca154 100644 --- a/arch/powerpc/platforms/chrp/pegasos_eth.c +++ b/arch/powerpc/platforms/chrp/pegasos_eth.c @@ -113,7 +113,7 @@ static struct platform_device *mv643xx_eth_pd_devs[] __initdata = { static void __iomem *mv643xx_reg_base; -static int Enable_SRAM(void) +static int __init Enable_SRAM(void) { u32 ALong; diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig index 4c6d703a4284..c54786f8461e 100644 --- a/arch/powerpc/platforms/embedded6xx/Kconfig +++ b/arch/powerpc/platforms/embedded6xx/Kconfig @@ -55,7 +55,7 @@ config MVME5100 select FORCE_PCI select PPC_INDIRECT_PCI select PPC_I8259 - select PPC_NATIVE + select PPC_HASH_MMU_NATIVE select PPC_UDBG_16550 help This option enables support for the Motorola (now Emerson) MVME5100 diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index 15396333a90b..380b4285cce4 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -153,7 +153,7 @@ static void __hlwd_quiesce(void __iomem *io_base) out_be32(io_base + HW_BROADWAY_ICR, 0xffffffff); } -static struct irq_domain *hlwd_pic_init(struct device_node *np) +static struct irq_domain *__init hlwd_pic_init(struct device_node *np) { struct irq_domain *irq_domain; struct resource res; @@ -197,7 +197,7 @@ unsigned int hlwd_pic_get_irq(void) * */ -void hlwd_pic_probe(void) +void __init hlwd_pic_probe(void) { struct irq_domain *host; struct device_node *np; @@ -214,6 +214,7 @@ void hlwd_pic_probe(void) irq_set_chained_handler(cascade_virq, hlwd_pic_irq_cascade); hlwd_irq_host = host; + of_node_put(np); break; } } diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.h b/arch/powerpc/platforms/embedded6xx/hlwd-pic.h index f18eeeef0815..c2fa42e191dc 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.h +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.h @@ -11,7 +11,7 @@ #define __HLWD_PIC_H extern unsigned int hlwd_pic_get_irq(void); -extern void hlwd_pic_probe(void); +void __init hlwd_pic_probe(void); extern void hlwd_quiesce(void); #endif diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index 7a85b117f7a4..07e71ba3e846 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -50,7 +50,7 @@ static int holly_exclude_device(struct pci_controller *hose, u_char bus, return PCIBIOS_SUCCESSFUL; } -static void holly_remap_bridge(void) +static void __init holly_remap_bridge(void) { u32 lut_val, lut_addr; int i; diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c index ed45db70a781..5aea46566233 100644 --- a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c +++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c @@ -194,7 +194,7 @@ static int ug_udbg_getc_poll(void) /* * Retrieves and prepares the virtual address needed to access the hardware. */ -static void __iomem *ug_udbg_setup_exi_io_base(struct device_node *np) +static void __iomem *__init ug_udbg_setup_exi_io_base(struct device_node *np) { void __iomem *exi_io_base = NULL; phys_addr_t paddr; @@ -212,7 +212,7 @@ static void __iomem *ug_udbg_setup_exi_io_base(struct device_node *np) /* * Checks if a USB Gecko adapter is inserted in any memory card slot. */ -static void __iomem *ug_udbg_probe(void __iomem *exi_io_base) +static void __iomem *__init ug_udbg_probe(void __iomem *exi_io_base) { int i; diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index a802ef957d63..f60ade584bb2 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -69,7 +69,7 @@ static void __noreturn wii_spin(void) cpu_relax(); } -static void __iomem *wii_ioremap_hw_regs(char *name, char *compatible) +static void __iomem *__init wii_ioremap_hw_regs(char *name, char *compatible) { void __iomem *hw_regs = NULL; struct device_node *np; diff --git a/arch/powerpc/platforms/maple/Kconfig b/arch/powerpc/platforms/maple/Kconfig index 86ae210bee9a..4c058cc57c90 100644 --- a/arch/powerpc/platforms/maple/Kconfig +++ b/arch/powerpc/platforms/maple/Kconfig @@ -9,7 +9,8 @@ config PPC_MAPLE select GENERIC_TBSYNC select PPC_UDBG_16550 select PPC_970_NAP - select PPC_NATIVE + select PPC_64S_HASH_MMU + select PPC_HASH_MMU_NATIVE select PPC_RTAS select MMIO_NVRAM select ATA_NONSTANDARD if ATA diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig index 8f6a81978461..5e320f49583a 100644 --- a/arch/powerpc/platforms/microwatt/Kconfig +++ b/arch/powerpc/platforms/microwatt/Kconfig @@ -5,7 +5,6 @@ config PPC_MICROWATT select PPC_XICS select PPC_ICS_NATIVE select PPC_ICP_NATIVE - select PPC_NATIVE select PPC_UDBG_16550 select ARCH_RANDOM help diff --git a/arch/powerpc/platforms/microwatt/rng.c b/arch/powerpc/platforms/microwatt/rng.c index 3d8ee6eb7dad..7bc4d1cbfaf0 100644 --- a/arch/powerpc/platforms/microwatt/rng.c +++ b/arch/powerpc/platforms/microwatt/rng.c @@ -14,7 +14,7 @@ #define DARN_ERR 0xFFFFFFFFFFFFFFFFul -int microwatt_get_random_darn(unsigned long *v) +static int microwatt_get_random_darn(unsigned long *v) { unsigned long val; diff --git a/arch/powerpc/platforms/pasemi/Kconfig b/arch/powerpc/platforms/pasemi/Kconfig index c52731a7773f..85ae18ddd911 100644 --- a/arch/powerpc/platforms/pasemi/Kconfig +++ b/arch/powerpc/platforms/pasemi/Kconfig @@ -5,7 +5,8 @@ config PPC_PASEMI select MPIC select FORCE_PCI select PPC_UDBG_16550 - select PPC_NATIVE + select PPC_64S_HASH_MMU + select PPC_HASH_MMU_NATIVE select MPIC_BROKEN_REGREAD help This option enables support for PA Semi's PWRficient line diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c index e8af72282682..ea1e41451408 100644 --- a/arch/powerpc/platforms/pasemi/msi.c +++ b/arch/powerpc/platforms/pasemi/msi.c @@ -130,7 +130,7 @@ static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return 0; } -int mpic_pasemi_msi_init(struct mpic *mpic) +int __init mpic_pasemi_msi_init(struct mpic *mpic) { int rc; struct pci_controller *phb; diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h index 70b56048ed1b..3f277a200fd8 100644 --- a/arch/powerpc/platforms/pasemi/pasemi.h +++ b/arch/powerpc/platforms/pasemi/pasemi.h @@ -7,7 +7,7 @@ extern void pas_pci_init(void); extern void pas_pci_irq_fixup(struct pci_dev *dev); extern void pas_pci_dma_dev_setup(struct pci_dev *dev); -extern void __iomem *pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset); +void __iomem *__init pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset); extern void __init pasemi_map_registers(void); diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c index 8779b107d872..d4b922759d6e 100644 --- a/arch/powerpc/platforms/pasemi/pci.c +++ b/arch/powerpc/platforms/pasemi/pci.c @@ -287,7 +287,7 @@ void __init pas_pci_init(void) } } -void __iomem *pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset) +void __iomem *__init pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset) { struct pci_controller *hose; diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index 376797eb7894..f974bfe7fde1 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -212,7 +212,7 @@ static void sb600_8259_cascade(struct irq_desc *desc) chip->irq_eoi(&desc->irq_data); } -static void nemo_init_IRQ(struct mpic *mpic) +static void __init nemo_init_IRQ(struct mpic *mpic) { struct device_node *np; int gpio_virq; diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig index b97bf12801eb..130707ec9f99 100644 --- a/arch/powerpc/platforms/powermac/Kconfig +++ b/arch/powerpc/platforms/powermac/Kconfig @@ -6,7 +6,8 @@ config PPC_PMAC select FORCE_PCI select PPC_INDIRECT_PCI if PPC32 select PPC_MPC106 if PPC32 - select PPC_NATIVE + select PPC_64S_HASH_MMU if PPC64 + select PPC_HASH_MMU_NATIVE select ZONE_DMA if PPC32 default y diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S index ced225415486..b8ae56e9f414 100644 --- a/arch/powerpc/platforms/powermac/cache.S +++ b/arch/powerpc/platforms/powermac/cache.S @@ -48,7 +48,7 @@ flush_disable_75x: /* Stop DST streams */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) @@ -197,7 +197,7 @@ flush_disable_745x: isync /* Stop prefetch streams */ - DSSALL + PPC_DSSALL sync /* Disable L2 prefetching */ diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 5c77b9a24c0e..e67c624f35a2 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -1530,7 +1530,7 @@ static long g5_reset_cpu(struct device_node *node, long param, long value) * This takes the second CPU off the bus on dual CPU machines * running UP */ -void g5_phy_disable_cpu1(void) +void __init g5_phy_disable_cpu1(void) { if (uninorth_maj == 3) UN_OUT(U3_API_PHY_CONFIG_1, 0); diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index f77a59b5c2e1..df89d916236d 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -582,6 +582,7 @@ static void __init kw_i2c_add(struct pmac_i2c_host_kw *host, bus->close = kw_i2c_close; bus->xfer = kw_i2c_xfer; mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); lockdep_set_class(&bus->mutex, &bus->lock_key); if (controller == busnode) bus->flags = pmac_i2c_multibus; @@ -810,6 +811,7 @@ static void __init pmu_i2c_probe(void) bus->hostdata = bus + 1; bus->xfer = pmu_i2c_xfer; mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); lockdep_set_class(&bus->mutex, &bus->lock_key); bus->flags = pmac_i2c_multibus; list_add(&bus->link, &pmac_i2c_busses); @@ -933,6 +935,7 @@ static void __init smu_i2c_probe(void) bus->hostdata = bus + 1; bus->xfer = smu_i2c_xfer; mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); lockdep_set_class(&bus->mutex, &bus->lock_key); bus->flags = 0; list_add(&bus->link, &pmac_i2c_busses); diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c index 853ccc4480e2..de8fcb607290 100644 --- a/arch/powerpc/platforms/powermac/nvram.c +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -258,7 +258,7 @@ static u32 core99_calc_adler(u8 *buffer) return (high << 16) | low; } -static u32 core99_check(u8* datas) +static u32 __init core99_check(u8 *datas) { struct core99_header* hdr99 = (struct core99_header*)datas; diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c index f5422506d4b0..9c2947a3edd5 100644 --- a/arch/powerpc/platforms/powermac/pfunc_base.c +++ b/arch/powerpc/platforms/powermac/pfunc_base.c @@ -93,7 +93,7 @@ static struct pmf_handlers macio_gpio_handlers = { .delay = macio_do_delay, }; -static void macio_gpio_init_one(struct macio_chip *macio) +static void __init macio_gpio_init_one(struct macio_chip *macio) { struct device_node *gparent, *gp; @@ -265,7 +265,7 @@ static struct pmf_handlers macio_mmio_handlers = { .delay = macio_do_delay, }; -static void macio_mmio_init_one(struct macio_chip *macio) +static void __init macio_mmio_init_one(struct macio_chip *macio) { DBG("Installing MMIO functions for macio %pOF\n", macio->of_node); @@ -294,7 +294,7 @@ static struct pmf_handlers unin_mmio_handlers = { .delay = macio_do_delay, }; -static void uninorth_install_pfunc(void) +static void __init uninorth_install_pfunc(void) { struct device_node *np; diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 4921bccf0376..bb0566633af5 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -18,6 +18,7 @@ #include <linux/interrupt.h> #include <linux/syscore_ops.h> #include <linux/adb.h> +#include <linux/minmax.h> #include <linux/pmu.h> #include <asm/sections.h> @@ -311,11 +312,8 @@ static void __init pmac_pic_probe_oldstyle(void) /* Check ordering of master & slave */ if (of_device_is_compatible(master, "gatwick")) { - struct device_node *tmp; BUG_ON(slave == NULL); - tmp = master; - master = slave; - slave = tmp; + swap(master, slave); } /* We found a slave */ diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 13e8a8a9841c..974d4b49867b 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -166,7 +166,7 @@ static void pmac_show_cpuinfo(struct seq_file *m) } #ifndef CONFIG_ADB_CUDA -int find_via_cuda(void) +int __init find_via_cuda(void) { struct device_node *dn = of_find_node_by_name(NULL, "via-cuda"); @@ -180,7 +180,7 @@ int find_via_cuda(void) #endif #ifndef CONFIG_ADB_PMU -int find_via_pmu(void) +int __init find_via_pmu(void) { struct device_node *dn = of_find_node_by_name(NULL, "via-pmu"); @@ -194,7 +194,7 @@ int find_via_pmu(void) #endif #ifndef CONFIG_PMAC_SMU -int smu_init(void) +int __init smu_init(void) { /* should check and warn if SMU is present */ return 0; diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 3256a316e884..da1efdc30d6c 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -186,7 +186,7 @@ static const struct irq_domain_ops psurge_host_ops = { .map = psurge_host_map, }; -static int psurge_secondary_ipi_init(void) +static int __init psurge_secondary_ipi_init(void) { int rc = -ENOMEM; @@ -875,7 +875,7 @@ static int smp_core99_cpu_online(unsigned int cpu) static void __init smp_core99_bringup_done(void) { - extern void g5_phy_disable_cpu1(void); + extern void __init g5_phy_disable_cpu1(void); /* Close i2c bus if it was used for tb sync */ if (pmac_tb_clock_chip_host) diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c index f286bdfe8346..965827ac2e9c 100644 --- a/arch/powerpc/platforms/powermac/udbg_scc.c +++ b/arch/powerpc/platforms/powermac/udbg_scc.c @@ -62,7 +62,7 @@ static unsigned char scc_inittab[] = { 3, 0xc1, /* rx enable, 8 bits */ }; -void udbg_scc_init(int force_scc) +void __init udbg_scc_init(int force_scc) { const u32 *reg; unsigned long addr; diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index 043eefbbdd28..161dfe024085 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -2,7 +2,7 @@ config PPC_POWERNV depends on PPC64 && PPC_BOOK3S bool "IBM PowerNV (Non-Virtualized) platform support" - select PPC_NATIVE + select PPC_HASH_MMU_NATIVE if PPC_64S_HASH_MMU select PPC_XICS select PPC_ICP_NATIVE select PPC_XIVE_NATIVE diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index e3ffdc8e8567..9942289f379b 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -62,7 +62,7 @@ static bool deepest_stop_found; static unsigned long power7_offline_type; -static int pnv_save_sprs_for_deep_states(void) +static int __init pnv_save_sprs_for_deep_states(void) { int cpu; int rc; @@ -146,9 +146,13 @@ EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); static void pnv_fastsleep_workaround_apply(void *info) { + int cpu = smp_processor_id(); int rc; int *err = info; + if (cpu_first_thread_sibling(cpu) != cpu) + return; + rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, OPAL_CONFIG_IDLE_APPLY); if (rc) @@ -175,7 +179,6 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - cpumask_t primary_thread_mask; int err; u8 val; @@ -200,10 +203,7 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, power7_fastsleep_workaround_exit = false; cpus_read_lock(); - primary_thread_mask = cpu_online_cores_map(); - on_each_cpu_mask(&primary_thread_mask, - pnv_fastsleep_workaround_apply, - &err, 1); + on_each_cpu(pnv_fastsleep_workaround_apply, &err, 1); cpus_read_unlock(); if (err) { pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply"); @@ -306,8 +306,8 @@ struct p7_sprs { /* per thread SPRs that get lost in shallow states */ u64 amr; u64 iamr; - u64 amor; u64 uamor; + /* amor is restored to constant ~0 */ }; static unsigned long power7_idle_insn(unsigned long type) @@ -378,7 +378,6 @@ static unsigned long power7_idle_insn(unsigned long type) if (cpu_has_feature(CPU_FTR_ARCH_207S)) { sprs.amr = mfspr(SPRN_AMR); sprs.iamr = mfspr(SPRN_IAMR); - sprs.amor = mfspr(SPRN_AMOR); sprs.uamor = mfspr(SPRN_UAMOR); } @@ -397,7 +396,7 @@ static unsigned long power7_idle_insn(unsigned long type) */ mtspr(SPRN_AMR, sprs.amr); mtspr(SPRN_IAMR, sprs.iamr); - mtspr(SPRN_AMOR, sprs.amor); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_UAMOR, sprs.uamor); } } @@ -492,12 +491,14 @@ subcore_woken: mtspr(SPRN_SPRG3, local_paca->sprg_vdso); +#ifdef CONFIG_PPC_64S_HASH_MMU /* * The SLB has to be restored here, but it sometimes still * contains entries, so the __ variant must be used to prevent * multi hits. */ __slb_restore_bolted_realmode(); +#endif return srr1; } @@ -589,7 +590,6 @@ struct p9_sprs { u64 purr; u64 spurr; u64 dscr; - u64 wort; u64 ciabr; u64 mmcra; @@ -687,7 +687,6 @@ static unsigned long power9_idle_stop(unsigned long psscr) sprs.amr = mfspr(SPRN_AMR); sprs.iamr = mfspr(SPRN_IAMR); - sprs.amor = mfspr(SPRN_AMOR); sprs.uamor = mfspr(SPRN_UAMOR); srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ @@ -708,7 +707,7 @@ static unsigned long power9_idle_stop(unsigned long psscr) */ mtspr(SPRN_AMR, sprs.amr); mtspr(SPRN_IAMR, sprs.iamr); - mtspr(SPRN_AMOR, sprs.amor); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_UAMOR, sprs.uamor); /* @@ -1124,7 +1123,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu) * stop instruction */ -int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) +int __init validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) { int err = 0; @@ -1318,7 +1317,7 @@ static void __init pnv_probe_idle_states(void) * which is the number of cpuidle states discovered through device-tree. */ -static int pnv_parse_cpuidle_dt(void) +static int __init pnv_parse_cpuidle_dt(void) { struct device_node *np; int nr_idle_states, i; diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c index 5b9736bbc2aa..0331f1973f0e 100644 --- a/arch/powerpc/platforms/powernv/opal-core.c +++ b/arch/powerpc/platforms/powernv/opal-core.c @@ -89,7 +89,7 @@ static inline int is_opalcore_usable(void) return (oc_conf && oc_conf->opalcorebuf != NULL) ? 1 : 0; } -static Elf64_Word *append_elf64_note(Elf64_Word *buf, char *name, +static Elf64_Word *__init append_elf64_note(Elf64_Word *buf, char *name, u32 type, void *data, size_t data_len) { @@ -108,7 +108,7 @@ static Elf64_Word *append_elf64_note(Elf64_Word *buf, char *name, return buf; } -static void fill_prstatus(struct elf_prstatus *prstatus, int pir, +static void __init fill_prstatus(struct elf_prstatus *prstatus, int pir, struct pt_regs *regs) { memset(prstatus, 0, sizeof(struct elf_prstatus)); @@ -134,7 +134,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus, int pir, } } -static Elf64_Word *auxv_to_elf64_notes(Elf64_Word *buf, +static Elf64_Word *__init auxv_to_elf64_notes(Elf64_Word *buf, u64 opal_boot_entry) { Elf64_Off *bufp = (Elf64_Off *)oc_conf->auxv_buf; diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 717d1d30ade5..410ed5b9de29 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -208,11 +208,12 @@ static struct attribute *dump_default_attrs[] = { &ack_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(dump_default); static struct kobj_type dump_ktype = { .sysfs_ops = &dump_sysfs_ops, .release = &dump_release, - .default_attrs = dump_default_attrs, + .default_groups = dump_default_groups, }; static int64_t dump_read_info(uint32_t *dump_id, uint32_t *dump_size, uint32_t *dump_type) diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 5821b0fa8614..554fdd7f88b8 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -144,11 +144,12 @@ static struct attribute *elog_default_attrs[] = { &ack_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(elog_default); static struct kobj_type elog_ktype = { .sysfs_ops = &elog_sysfs_ops, .release = &elog_release, - .default_attrs = elog_default_attrs, + .default_groups = elog_default_groups, }; /* Maximum size of a single log on FSP is 16KB */ diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c index 9a360ced663b..c8ad057c7221 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.c +++ b/arch/powerpc/platforms/powernv/opal-fadump.c @@ -112,7 +112,7 @@ static void opal_fadump_update_config(struct fw_dump *fadump_conf, * This function is called in the capture kernel to get configuration details * from metadata setup by the first kernel. */ -static void opal_fadump_get_config(struct fw_dump *fadump_conf, +static void __init opal_fadump_get_config(struct fw_dump *fadump_conf, const struct opal_fadump_mem_struct *fdm) { unsigned long base, size, last_end, hole_size; diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 05d3832019b9..3fea5da6d1b3 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -200,13 +200,13 @@ static void disable_nest_pmu_counters(void) static void disable_core_pmu_counters(void) { - cpumask_t cores_map; int cpu, rc; cpus_read_lock(); /* Disable the IMC Core functions */ - cores_map = cpu_online_cores_map(); - for_each_cpu(cpu, &cores_map) { + for_each_online_cpu(cpu) { + if (cpu_first_thread_sibling(cpu) != cpu) + continue; rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(cpu)); if (rc) diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c index 1e5d51db40f8..5390c888db16 100644 --- a/arch/powerpc/platforms/powernv/opal-lpc.c +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -396,6 +396,7 @@ void __init opal_lpc_init(void) if (!of_get_property(np, "primary", NULL)) continue; opal_lpc_chip_id = of_get_ibm_chip_id(np); + of_node_put(np); break; } if (opal_lpc_chip_id < 0) diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index d3b6e135c18b..22d6efe17b0d 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -105,7 +105,7 @@ static struct bin_attribute opal_msglog_attr = { .read = opal_msglog_read }; -struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name) +struct memcons *__init memcons_init(struct device_node *node, const char *mc_prop_name) { u64 mcaddr; struct memcons *mc; @@ -133,7 +133,7 @@ out_err: return NULL; } -u32 memcons_get_size(struct memcons *mc) +u32 __init memcons_get_size(struct memcons *mc) { return be32_to_cpu(mc->ibuf_size) + be32_to_cpu(mc->obuf_size); } diff --git a/arch/powerpc/platforms/powernv/opal-power.c b/arch/powerpc/platforms/powernv/opal-power.c index 2a3717fc24ea..db99ffcb7b82 100644 --- a/arch/powerpc/platforms/powernv/opal-power.c +++ b/arch/powerpc/platforms/powernv/opal-power.c @@ -53,7 +53,7 @@ static bool detect_epow(void) } /* Check for existing EPOW, DPO events */ -static bool poweroff_pending(void) +static bool __init poweroff_pending(void) { int rc; __be64 opal_dpo_timeout; diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c index c16d44f6f1d1..64506b46e77b 100644 --- a/arch/powerpc/platforms/powernv/opal-powercap.c +++ b/arch/powerpc/platforms/powernv/opal-powercap.c @@ -129,7 +129,7 @@ out_token: return ret; } -static void powercap_add_attr(int handle, const char *name, +static void __init powercap_add_attr(int handle, const char *name, struct powercap_attr *attr) { attr->handle = handle; diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c index 44d7dacb33a2..a9bcf9217e64 100644 --- a/arch/powerpc/platforms/powernv/opal-rtc.c +++ b/arch/powerpc/platforms/powernv/opal-rtc.c @@ -18,7 +18,7 @@ #include <asm/firmware.h> #include <asm/machdep.h> -static void opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm) +static void __init opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm) { tm->tm_year = ((bcd2bin(y_m_d >> 24) * 100) + bcd2bin((y_m_d >> 16) & 0xff)) - 1900; diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c index f8ae1fb0c102..8fba7d25ae56 100644 --- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c +++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c @@ -126,7 +126,7 @@ static void add_attr(int handle, struct sg_attr *attr, int index) attr->attr.store = ops_info[index].store; } -static int add_attr_group(const __be32 *ops, int len, struct sensor_group *sg, +static int __init add_attr_group(const __be32 *ops, int len, struct sensor_group *sg, u32 handle) { int i, j; @@ -144,7 +144,7 @@ static int add_attr_group(const __be32 *ops, int len, struct sensor_group *sg, return sysfs_create_group(sg_kobj, &sg->sg); } -static int get_nr_attrs(const __be32 *ops, int len) +static int __init get_nr_attrs(const __be32 *ops, int len) { int i, j; int nr_attrs = 0; diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index e9d18519e650..55a8fbfdb5b2 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -73,7 +73,7 @@ static struct task_struct *kopald_tsk; static struct opal_msg *opal_msg; static u32 opal_msg_size __ro_after_init; -void opal_configure_cores(void) +void __init opal_configure_cores(void) { u64 reinit_flags = 0; @@ -779,7 +779,7 @@ out: return !!recover_addr; } -static int opal_sysfs_init(void) +static int __init opal_sysfs_init(void) { opal_kobj = kobject_create_and_add("opal", firmware_kobj); if (!opal_kobj) { @@ -937,7 +937,7 @@ static void __init opal_dump_region_init(void) "rc = %d\n", rc); } -static void opal_pdev_init(const char *compatible) +static void __init opal_pdev_init(const char *compatible) { struct device_node *np; @@ -981,7 +981,7 @@ void opal_wake_poller(void) wake_up_process(kopald_tsk); } -static void opal_init_heartbeat(void) +static void __init opal_init_heartbeat(void) { /* Old firwmware, we assume the HVC heartbeat is sufficient */ if (of_property_read_u32(opal_node, "ibm,heartbeat-ms", diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 8913c86009d9..b722ac902269 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2265,7 +2265,7 @@ static const struct irq_domain_ops pnv_irq_domain_ops = { .free = pnv_irq_domain_free, }; -static int pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int count) +static int __init pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int count) { struct pnv_phb *phb = hose->private_data; struct irq_domain *parent = irq_get_default_host(); @@ -2298,7 +2298,7 @@ static int pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int co return 0; } -static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) +static void __init pnv_pci_init_ioda_msis(struct pnv_phb *phb) { unsigned int count; const __be32 *prop = of_get_property(phb->hose->dn, diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 11df4e16a1cc..e297bf4abfcb 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -39,7 +39,7 @@ bool cpu_core_split_required(void); struct memcons; ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count); -u32 memcons_get_size(struct memcons *mc); -struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name); +u32 __init memcons_get_size(struct memcons *mc); +struct memcons *__init memcons_init(struct device_node *node, const char *mc_prop_name); #endif /* _POWERNV_H */ diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 72c25295c1c2..b4386714494a 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -80,7 +80,7 @@ static int powernv_get_random_darn(unsigned long *v) return 1; } -static int initialise_darn(void) +static int __init initialise_darn(void) { unsigned long val; int i; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index ad56a54ac9c5..105d889abd51 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -40,7 +40,7 @@ #include "powernv.h" -static bool fw_feature_is(const char *state, const char *name, +static bool __init fw_feature_is(const char *state, const char *name, struct device_node *fw_features) { struct device_node *np; @@ -55,7 +55,7 @@ static bool fw_feature_is(const char *state, const char *name, return rc; } -static void init_fw_feat_flags(struct device_node *np) +static void __init init_fw_feat_flags(struct device_node *np) { if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np)) security_ftr_set(SEC_FTR_SPEC_BAR_ORI31); @@ -98,7 +98,7 @@ static void init_fw_feat_flags(struct device_node *np) security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); } -static void pnv_setup_security_mitigations(void) +static void __init pnv_setup_security_mitigations(void) { struct device_node *np, *fw_features; enum l1d_flush_type type; @@ -123,10 +123,14 @@ static void pnv_setup_security_mitigations(void) } /* - * If we are non-Power9 bare metal, we don't need to flush on kernel - * entry or after user access: they fix a P9 specific vulnerability. + * The issues addressed by the entry and uaccess flush don't affect P7 + * or P8, so on bare metal disable them explicitly in case firmware does + * not include the features to disable them. POWER9 and newer processors + * should have the appropriate firmware flags. */ - if (!pvr_version_is(PVR_POWER9)) { + if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p) || + pvr_version_is(PVR_POWER8E) || pvr_version_is(PVR_POWER8NVL) || + pvr_version_is(PVR_POWER8)) { security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); } @@ -207,6 +211,7 @@ static void __init pnv_init(void) #endif add_preferred_console("hvc", 0, NULL); +#ifdef CONFIG_PPC_64S_HASH_MMU if (!radix_enabled()) { size_t size = sizeof(struct slb_entry) * mmu_slb_size; int i; @@ -219,6 +224,7 @@ static void __init pnv_init(void) cpu_to_node(i)); } } +#endif } static void __init pnv_init_IRQ(void) diff --git a/arch/powerpc/platforms/ps3/gelic_udbg.c b/arch/powerpc/platforms/ps3/gelic_udbg.c index cba4f8f5b8d7..6b298010fd84 100644 --- a/arch/powerpc/platforms/ps3/gelic_udbg.c +++ b/arch/powerpc/platforms/ps3/gelic_udbg.c @@ -113,7 +113,7 @@ static int unmap_dma_mem(int bus_id, int dev_id, u64 bus_addr, size_t len) return lv1_free_device_dma_region(bus_id, dev_id, real_bus_addr); } -static void gelic_debug_init(void) +static void __init gelic_debug_init(void) { s64 result; u64 v2; diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index 9c44f335c0b9..5ce924611b94 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -41,7 +41,7 @@ enum { PAGE_SHIFT_16M = 24U, }; -static unsigned long make_page_sizes(unsigned long a, unsigned long b) +static unsigned long __init make_page_sizes(unsigned long a, unsigned long b) { return (a << 56) | (b << 48); } @@ -215,7 +215,7 @@ notrace void ps3_mm_vas_destroy(void) } } -static int ps3_mm_get_repository_highmem(struct mem_region *r) +static int __init ps3_mm_get_repository_highmem(struct mem_region *r) { int result; diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c index e8530371aed6..cb844e0add2b 100644 --- a/arch/powerpc/platforms/ps3/os-area.c +++ b/arch/powerpc/platforms/ps3/os-area.c @@ -501,7 +501,7 @@ static int db_set_64(struct os_area_db *db, const struct os_area_db_id *id, return -1; } -static int db_get_64(const struct os_area_db *db, +static int __init db_get_64(const struct os_area_db *db, const struct os_area_db_id *id, uint64_t *value) { struct db_iterator i; @@ -517,7 +517,7 @@ static int db_get_64(const struct os_area_db *db, return -1; } -static int db_get_rtc_diff(const struct os_area_db *db, int64_t *rtc_diff) +static int __init db_get_rtc_diff(const struct os_area_db *db, int64_t *rtc_diff) { return db_get_64(db, &os_area_db_id_rtc_diff, (uint64_t*)rtc_diff); } diff --git a/arch/powerpc/platforms/ps3/platform.h b/arch/powerpc/platforms/ps3/platform.h index 07bd39ef71ff..6beecdb0d51f 100644 --- a/arch/powerpc/platforms/ps3/platform.h +++ b/arch/powerpc/platforms/ps3/platform.h @@ -35,7 +35,7 @@ void __init ps3_register_ipi_irq(unsigned int cpu, unsigned int virq); /* smp */ -void smp_init_ps3(void); +void __init smp_init_ps3(void); #ifdef CONFIG_SMP void ps3_smp_cleanup_cpu(int cpu); #else @@ -134,9 +134,9 @@ struct ps3_repository_device { int ps3_repository_find_device(struct ps3_repository_device *repo); int ps3_repository_find_device_by_id(struct ps3_repository_device *repo, u64 bus_id, u64 dev_id); -int ps3_repository_find_devices(enum ps3_bus_type bus_type, +int __init ps3_repository_find_devices(enum ps3_bus_type bus_type, int (*callback)(const struct ps3_repository_device *repo)); -int ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from, +int __init ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from, unsigned int *bus_index); int ps3_repository_find_interrupt(const struct ps3_repository_device *repo, enum ps3_interrupt_type intr_type, unsigned int *interrupt_id); @@ -211,8 +211,8 @@ static inline int ps3_repository_delete_highmem_info(unsigned int region_index) int ps3_repository_read_num_be(unsigned int *num_be); int ps3_repository_read_be_node_id(unsigned int be_index, u64 *node_id); int ps3_repository_read_be_id(u64 node_id, u64 *be_id); -int ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq); -int ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq); +int __init ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq); +int __init ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq); /* repository performance monitor info */ @@ -247,7 +247,7 @@ int ps3_repository_read_spu_resource_id(unsigned int res_index, /* repository vuart info */ -int ps3_repository_read_vuart_av_port(unsigned int *port); -int ps3_repository_read_vuart_sysmgr_port(unsigned int *port); +int __init ps3_repository_read_vuart_av_port(unsigned int *port); +int __init ps3_repository_read_vuart_sysmgr_port(unsigned int *port); #endif diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c index 21712964e76f..205763061a2d 100644 --- a/arch/powerpc/platforms/ps3/repository.c +++ b/arch/powerpc/platforms/ps3/repository.c @@ -413,7 +413,7 @@ found_dev: return 0; } -int ps3_repository_find_devices(enum ps3_bus_type bus_type, +int __init ps3_repository_find_devices(enum ps3_bus_type bus_type, int (*callback)(const struct ps3_repository_device *repo)) { int result = 0; @@ -455,7 +455,7 @@ int ps3_repository_find_devices(enum ps3_bus_type bus_type, return result; } -int ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from, +int __init ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from, unsigned int *bus_index) { unsigned int i; @@ -908,7 +908,7 @@ int ps3_repository_read_boot_dat_size(unsigned int *size) return result; } -int ps3_repository_read_vuart_av_port(unsigned int *port) +int __init ps3_repository_read_vuart_av_port(unsigned int *port) { int result; u64 v1 = 0; @@ -923,7 +923,7 @@ int ps3_repository_read_vuart_av_port(unsigned int *port) return result; } -int ps3_repository_read_vuart_sysmgr_port(unsigned int *port) +int __init ps3_repository_read_vuart_sysmgr_port(unsigned int *port) { int result; u64 v1 = 0; @@ -1005,7 +1005,7 @@ int ps3_repository_read_be_id(u64 node_id, u64 *be_id) be_id, NULL); } -int ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq) +int __init ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq) { return read_node(PS3_LPAR_ID_PME, make_first_field("be", 0), @@ -1015,7 +1015,7 @@ int ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq) tb_freq, NULL); } -int ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq) +int __init ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq) { int result; u64 node_id; @@ -1178,7 +1178,7 @@ int ps3_repository_delete_highmem_info(unsigned int region_index) #if defined(DEBUG) -int ps3_repository_dump_resource_info(const struct ps3_repository_device *repo) +int __init ps3_repository_dump_resource_info(const struct ps3_repository_device *repo) { int result = 0; unsigned int res_index; @@ -1231,7 +1231,7 @@ int ps3_repository_dump_resource_info(const struct ps3_repository_device *repo) return result; } -static int dump_stor_dev_info(struct ps3_repository_device *repo) +static int __init dump_stor_dev_info(struct ps3_repository_device *repo) { int result = 0; unsigned int num_regions, region_index; @@ -1279,7 +1279,7 @@ out: return result; } -static int dump_device_info(struct ps3_repository_device *repo, +static int __init dump_device_info(struct ps3_repository_device *repo, unsigned int num_dev) { int result = 0; @@ -1323,7 +1323,7 @@ static int dump_device_info(struct ps3_repository_device *repo, return result; } -int ps3_repository_dump_bus_info(void) +int __init ps3_repository_dump_bus_info(void) { int result = 0; struct ps3_repository_device repo; diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c index 93b1e73b3529..85295756005a 100644 --- a/arch/powerpc/platforms/ps3/smp.c +++ b/arch/powerpc/platforms/ps3/smp.c @@ -112,7 +112,7 @@ static struct smp_ops_t ps3_smp_ops = { .kick_cpu = smp_generic_kick_cpu, }; -void smp_init_ps3(void) +void __init smp_init_ps3(void) { DBG(" -> %s\n", __func__); smp_ops = &ps3_smp_ops; diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c index 0c252478e556..4a2520ec6d7f 100644 --- a/arch/powerpc/platforms/ps3/spu.c +++ b/arch/powerpc/platforms/ps3/spu.c @@ -137,7 +137,7 @@ u64 ps3_get_spe_id(void *arg) } EXPORT_SYMBOL_GPL(ps3_get_spe_id); -static unsigned long get_vas_id(void) +static unsigned long __init get_vas_id(void) { u64 id; diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 2e57391e0778..f7fd91d153a4 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -17,7 +17,6 @@ config PPC_PSERIES select PPC_RTAS_DAEMON select RTAS_ERROR_LOGGING select PPC_UDBG_16550 - select PPC_NATIVE select PPC_DOORBELL select HOTPLUG_CPU select ARCH_RANDOM @@ -61,10 +60,6 @@ config PSERIES_ENERGY Provides: /sys/devices/system/cpu/pseries_(de)activation_hint_list and /sys/devices/system/cpu/cpuN/pseries_(de)activation_hint -config SCANLOG - tristate "Scanlog dump interface" - depends on RTAS_PROC && PPC_PSERIES - config IO_EVENT_IRQ bool "IO Event Interrupt support" depends on PPC_PSERIES diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 41d8aee98da4..ee60b59024b4 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -8,7 +8,6 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \ firmware.o power.o dlpar.o mobility.o rng.o \ pci.o pci_dlpar.o eeh_pseries.o msi.o obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_SCANLOG) += scanlog.o obj-$(CONFIG_KEXEC_CORE) += kexec.o obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c index be661e919c76..623dfe0d8e1c 100644 --- a/arch/powerpc/platforms/pseries/event_sources.c +++ b/arch/powerpc/platforms/pseries/event_sources.c @@ -8,7 +8,7 @@ #include "pseries.h" -void request_event_sources_irqs(struct device_node *np, +void __init request_event_sources_irqs(struct device_node *np, irq_handler_t handler, const char *name) { diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 5ab44600c8d3..b81fc846d99c 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -864,12 +864,13 @@ static int __init pseries_cpu_hotplug_init(void) /* Processors can be added/removed only on LPAR */ if (firmware_has_feature(FW_FEATURE_LPAR)) { for_each_node(node) { - alloc_bootmem_cpumask_var(&node_recorded_ids_map[node]); + if (!alloc_cpumask_var_node(&node_recorded_ids_map[node], + GFP_KERNEL, node)) + return -ENOMEM; /* Record ids of CPU added at boot time */ - cpumask_or(node_recorded_ids_map[node], - node_recorded_ids_map[node], - cpumask_of_node(node)); + cpumask_copy(node_recorded_ids_map[node], + cpumask_of_node(node)); } of_reconfig_notifier_register(&pseries_smp_nb); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 8f998e55735b..4d991cf840d9 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1654,7 +1654,7 @@ static struct notifier_block iommu_reconfig_nb = { }; /* These are called very early. */ -void iommu_init_early_pSeries(void) +void __init iommu_init_early_pSeries(void) { if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL)) return; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 3df6bdfea475..f8899d506ea4 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -58,6 +58,7 @@ EXPORT_SYMBOL(plpar_hcall); EXPORT_SYMBOL(plpar_hcall9); EXPORT_SYMBOL(plpar_hcall_norets); +#ifdef CONFIG_PPC_64S_HASH_MMU /* * H_BLOCK_REMOVE supported block size for this page size in segment who's base * page size is that page size. @@ -66,6 +67,7 @@ EXPORT_SYMBOL(plpar_hcall_norets); * page size. */ static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init; +#endif /* * Due to the involved complexity, and that the current hypervisor is only @@ -689,7 +691,7 @@ void vpa_init(int cpu) return; } -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU /* * PAPR says this feature is SLB-Buffer but firmware never * reports that. All SPLPAR support SLB shadow buffer. @@ -702,7 +704,7 @@ void vpa_init(int cpu) "cpu %d (hw %d) of area %lx failed with %ld\n", cpu, hwcpu, addr, ret); } -#endif /* CONFIG_PPC_BOOK3S_64 */ +#endif /* CONFIG_PPC_64S_HASH_MMU */ /* * Register dispatch trace log, if one has been allocated. @@ -712,6 +714,36 @@ void vpa_init(int cpu) #ifdef CONFIG_PPC_BOOK3S_64 +static int __init pseries_lpar_register_process_table(unsigned long base, + unsigned long page_size, unsigned long table_size) +{ + long rc; + unsigned long flags = 0; + + if (table_size) + flags |= PROC_TABLE_NEW; + if (radix_enabled()) { + flags |= PROC_TABLE_RADIX; + if (mmu_has_feature(MMU_FTR_GTSE)) + flags |= PROC_TABLE_GTSE; + } else + flags |= PROC_TABLE_HPT_SLB; + for (;;) { + rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, + page_size, table_size); + if (!H_IS_LONG_BUSY(rc)) + break; + mdelay(get_longbusy_msecs(rc)); + } + if (rc != H_SUCCESS) { + pr_err("Failed to register process table (rc=%ld)\n", rc); + BUG(); + } + return rc; +} + +#ifdef CONFIG_PPC_64S_HASH_MMU + static long pSeries_lpar_hpte_insert(unsigned long hpte_group, unsigned long vpn, unsigned long pa, unsigned long rflags, unsigned long vflags, @@ -1680,34 +1712,6 @@ static int pseries_lpar_resize_hpt(unsigned long shift) return 0; } -static int pseries_lpar_register_process_table(unsigned long base, - unsigned long page_size, unsigned long table_size) -{ - long rc; - unsigned long flags = 0; - - if (table_size) - flags |= PROC_TABLE_NEW; - if (radix_enabled()) { - flags |= PROC_TABLE_RADIX; - if (mmu_has_feature(MMU_FTR_GTSE)) - flags |= PROC_TABLE_GTSE; - } else - flags |= PROC_TABLE_HPT_SLB; - for (;;) { - rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, - page_size, table_size); - if (!H_IS_LONG_BUSY(rc)) - break; - mdelay(get_longbusy_msecs(rc)); - } - if (rc != H_SUCCESS) { - pr_err("Failed to register process table (rc=%ld)\n", rc); - BUG(); - } - return rc; -} - void __init hpte_init_pseries(void) { mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate; @@ -1730,9 +1734,10 @@ void __init hpte_init_pseries(void) if (cpu_has_feature(CPU_FTR_ARCH_300)) pseries_lpar_register_process_table(0, 0, 0); } +#endif /* CONFIG_PPC_64S_HASH_MMU */ #ifdef CONFIG_PPC_RADIX_MMU -void radix_init_pseries(void) +void __init radix_init_pseries(void) { pr_info("Using radix MMU under hypervisor\n"); @@ -1932,7 +1937,8 @@ int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data) return rc; } -static unsigned long vsid_unscramble(unsigned long vsid, int ssize) +#ifdef CONFIG_PPC_64S_HASH_MMU +static unsigned long __init vsid_unscramble(unsigned long vsid, int ssize) { unsigned long protovsid; unsigned long va_bits = VA_BITS; @@ -1992,6 +1998,7 @@ static int __init reserve_vrma_context_id(void) return 0; } machine_device_initcall(pseries, reserve_vrma_context_id); +#endif #ifdef CONFIG_DEBUG_FS /* debugfs file interface for vpa data */ diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index f71eac74ea92..c7940fcfc911 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -531,8 +531,9 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) seq_printf(m, "shared_processor_mode=%d\n", lppaca_shared_proc(get_lppaca())); -#ifdef CONFIG_PPC_BOOK3S_64 - seq_printf(m, "slb_size=%d\n", mmu_slb_size); +#ifdef CONFIG_PPC_64S_HASH_MMU + if (!radix_enabled()) + seq_printf(m, "slb_size=%d\n", mmu_slb_size); #endif parse_em_data(m); maxmem_data(m); diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 210a37a065fb..85033f392c78 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -451,11 +451,15 @@ static void prod_others(void) static u16 clamp_slb_size(void) { +#ifdef CONFIG_PPC_64S_HASH_MMU u16 prev = mmu_slb_size; slb_set_size(SLB_MIN_SIZE); return prev; +#else + return 0; +#endif } static int do_suspend(void) diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 3544778e06d0..56c9ef9052e9 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -11,7 +11,7 @@ struct device_node; -extern void request_event_sources_irqs(struct device_node *np, +void __init request_event_sources_irqs(struct device_node *np, irq_handler_t handler, const char *name); #include <linux/of.h> @@ -113,6 +113,11 @@ int dlpar_workqueue_init(void); extern u32 pseries_security_flavor; void pseries_setup_security_mitigations(void); + +#ifdef CONFIG_PPC_64S_HASH_MMU void pseries_lpar_read_hblkrm_characteristics(void); +#else +static inline void pseries_lpar_read_hblkrm_characteristics(void) { } +#endif #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 56092dccfdb8..74c9b1b5bc66 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -526,6 +526,7 @@ static int mce_handle_err_realmode(int disposition, u8 error_type) disposition = RTAS_DISP_FULLY_RECOVERED; break; case MC_ERROR_TYPE_SLB: +#ifdef CONFIG_PPC_64S_HASH_MMU /* * Store the old slb content in paca before flushing. * Print this when we go to virtual mode. @@ -538,6 +539,7 @@ static int mce_handle_err_realmode(int disposition, u8 error_type) slb_save_contents(local_paca->mce_faulty_slbs); flush_and_reload_slb(); disposition = RTAS_DISP_FULLY_RECOVERED; +#endif break; default: break; diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c index f8f73b47b107..35f9cb602c30 100644 --- a/arch/powerpc/platforms/pseries/rtas-fadump.c +++ b/arch/powerpc/platforms/pseries/rtas-fadump.c @@ -39,7 +39,7 @@ static void rtas_fadump_update_config(struct fw_dump *fadump_conf, * This function is called in the capture kernel to get configuration details * setup in the first kernel and passed to the f/w. */ -static void rtas_fadump_get_config(struct fw_dump *fadump_conf, +static void __init rtas_fadump_get_config(struct fw_dump *fadump_conf, const struct rtas_fadump_mem_struct *fdm) { fadump_conf->boot_mem_addr[0] = @@ -247,7 +247,7 @@ static inline int rtas_fadump_gpr_index(u64 id) return i; } -static void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val) +static void __init rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val) { int i; @@ -272,7 +272,7 @@ static void rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val regs->dsisr = (unsigned long)reg_val; } -static struct rtas_fadump_reg_entry* +static struct rtas_fadump_reg_entry* __init rtas_fadump_read_regs(struct rtas_fadump_reg_entry *reg_entry, struct pt_regs *regs) { diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c deleted file mode 100644 index 2879c4f0ceb7..000000000000 --- a/arch/powerpc/platforms/pseries/scanlog.c +++ /dev/null @@ -1,195 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * c 2001 PPC 64 Team, IBM Corp - * - * scan-log-data driver for PPC64 Todd Inglett <tinglett@vnet.ibm.com> - * - * When ppc64 hardware fails the service processor dumps internal state - * of the system. After a reboot the operating system can access a dump - * of this data using this driver. A dump exists if the device-tree - * /chosen/ibm,scan-log-data property exists. - * - * This driver exports /proc/powerpc/scan-log-dump which can be read. - * The driver supports only sequential reads. - * - * The driver looks at a write to the driver for the single word "reset". - * If given, the driver will reset the scanlog so the platform can free it. - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/proc_fs.h> -#include <linux/init.h> -#include <linux/delay.h> -#include <linux/slab.h> -#include <linux/uaccess.h> -#include <asm/rtas.h> -#include <asm/prom.h> - -#define MODULE_VERS "1.0" -#define MODULE_NAME "scanlog" - -/* Status returns from ibm,scan-log-dump */ -#define SCANLOG_COMPLETE 0 -#define SCANLOG_HWERROR -1 -#define SCANLOG_CONTINUE 1 - - -static unsigned int ibm_scan_log_dump; /* RTAS token */ -static unsigned int *scanlog_buffer; /* The data buffer */ - -static ssize_t scanlog_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - unsigned int *data = scanlog_buffer; - int status; - unsigned long len, off; - unsigned int wait_time; - - if (count > RTAS_DATA_BUF_SIZE) - count = RTAS_DATA_BUF_SIZE; - - if (count < 1024) { - /* This is the min supported by this RTAS call. Rather - * than do all the buffering we insist the user code handle - * larger reads. As long as cp works... :) - */ - printk(KERN_ERR "scanlog: cannot perform a small read (%ld)\n", count); - return -EINVAL; - } - - if (!access_ok(buf, count)) - return -EFAULT; - - for (;;) { - wait_time = 500; /* default wait if no data */ - spin_lock(&rtas_data_buf_lock); - memcpy(rtas_data_buf, data, RTAS_DATA_BUF_SIZE); - status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, - (u32) __pa(rtas_data_buf), (u32) count); - memcpy(data, rtas_data_buf, RTAS_DATA_BUF_SIZE); - spin_unlock(&rtas_data_buf_lock); - - pr_debug("scanlog: status=%d, data[0]=%x, data[1]=%x, " \ - "data[2]=%x\n", status, data[0], data[1], data[2]); - switch (status) { - case SCANLOG_COMPLETE: - pr_debug("scanlog: hit eof\n"); - return 0; - case SCANLOG_HWERROR: - pr_debug("scanlog: hardware error reading data\n"); - return -EIO; - case SCANLOG_CONTINUE: - /* We may or may not have data yet */ - len = data[1]; - off = data[2]; - if (len > 0) { - if (copy_to_user(buf, ((char *)data)+off, len)) - return -EFAULT; - return len; - } - /* Break to sleep default time */ - break; - default: - /* Assume extended busy */ - wait_time = rtas_busy_delay_time(status); - if (!wait_time) { - printk(KERN_ERR "scanlog: unknown error " \ - "from rtas: %d\n", status); - return -EIO; - } - } - /* Apparently no data yet. Wait and try again. */ - msleep_interruptible(wait_time); - } - /*NOTREACHED*/ -} - -static ssize_t scanlog_write(struct file * file, const char __user * buf, - size_t count, loff_t *ppos) -{ - char stkbuf[20]; - int status; - - if (count > 19) count = 19; - if (copy_from_user (stkbuf, buf, count)) { - return -EFAULT; - } - stkbuf[count] = 0; - - if (buf) { - if (strncmp(stkbuf, "reset", 5) == 0) { - pr_debug("scanlog: reset scanlog\n"); - status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 0, 0); - pr_debug("scanlog: rtas returns %d\n", status); - } - } - return count; -} - -static int scanlog_open(struct inode * inode, struct file * file) -{ - unsigned int *data = scanlog_buffer; - - if (data[0] != 0) { - /* This imperfect test stops a second copy of the - * data (or a reset while data is being copied) - */ - return -EBUSY; - } - - data[0] = 0; /* re-init so we restart the scan */ - - return 0; -} - -static int scanlog_release(struct inode * inode, struct file * file) -{ - unsigned int *data = scanlog_buffer; - - data[0] = 0; - return 0; -} - -static const struct proc_ops scanlog_proc_ops = { - .proc_read = scanlog_read, - .proc_write = scanlog_write, - .proc_open = scanlog_open, - .proc_release = scanlog_release, - .proc_lseek = noop_llseek, -}; - -static int __init scanlog_init(void) -{ - struct proc_dir_entry *ent; - int err = -ENOMEM; - - ibm_scan_log_dump = rtas_token("ibm,scan-log-dump"); - if (ibm_scan_log_dump == RTAS_UNKNOWN_SERVICE) - return -ENODEV; - - /* Ideally we could allocate a buffer < 4G */ - scanlog_buffer = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); - if (!scanlog_buffer) - goto err; - - ent = proc_create("powerpc/rtas/scan-log-dump", 0400, NULL, - &scanlog_proc_ops); - if (!ent) - goto err; - return 0; -err: - kfree(scanlog_buffer); - return err; -} - -static void __exit scanlog_cleanup(void) -{ - remove_proc_entry("powerpc/rtas/scan-log-dump", NULL); - kfree(scanlog_buffer); -} - -module_init(scanlog_init); -module_exit(scanlog_cleanup); -MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 8a62af5b9c24..83a04d967a59 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -112,7 +112,7 @@ static void __init fwnmi_init(void) u8 *mce_data_buf; unsigned int i; int nr_cpus = num_possible_cpus(); -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU struct slb_entry *slb_ptr; size_t size; #endif @@ -152,7 +152,7 @@ static void __init fwnmi_init(void) (RTAS_ERROR_LOG_MAX * i); } -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU if (!radix_enabled()) { /* Allocate per cpu area to save old slb contents during MCE */ size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus; @@ -447,7 +447,7 @@ void pseries_big_endian_exceptions(void) panic("Could not enable big endian exceptions"); } -void pseries_little_endian_exceptions(void) +void __init pseries_little_endian_exceptions(void) { long rc; @@ -801,7 +801,9 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); pseries_setup_security_mitigations(); +#ifdef CONFIG_PPC_64S_HASH_MMU pseries_lpar_read_hblkrm_characteristics(); +#endif /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); @@ -905,7 +907,7 @@ void pSeries_coalesce_init(void) * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions, * handle that here. (Stolen from parse_system_parameter_string) */ -static void pSeries_cmo_feature_init(void) +static void __init pSeries_cmo_feature_init(void) { char *ptr, *key, *value, *end; int call_status; diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index b043e3936d21..d243ddc58827 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -151,8 +151,15 @@ int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result) if (rc == H_SUCCESS) return 0; - pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n", - hcall, rc, query_type, result); + /* H_FUNCTION means HV does not support VAS so don't print an error */ + if (rc != H_FUNCTION) { + pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n", + (hcall == H_QUERY_VAS_CAPABILITIES) ? + "H_QUERY_VAS_CAPABILITIES" : + "H_QUERY_NX_CAPABILITIES", + rc, query_type, result); + } + return -EIO; } EXPORT_SYMBOL_GPL(h_query_vas_capabilities); @@ -482,7 +489,7 @@ EXPORT_SYMBOL_GPL(vas_unregister_api_pseries); * Get the specific capabilities based on the feature type. * Right now supports GZIP default and GZIP QoS capabilities. */ -static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, +static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type, struct hv_vas_cop_feat_caps *hv_caps) { struct vas_cop_feat_caps *caps; diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index feafcb582e1b..c9f9be4ea26a 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1061,7 +1061,7 @@ static struct attribute *vio_bus_attrs[] = { }; ATTRIBUTE_GROUPS(vio_bus); -static void vio_cmo_sysfs_init(void) +static void __init vio_cmo_sysfs_init(void) { vio_bus_type.dev_groups = vio_cmo_dev_groups; vio_bus_type.bus_groups = vio_bus_groups; @@ -1073,7 +1073,7 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; } static void vio_cmo_bus_remove(struct vio_dev *viodev) {} static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {} static void vio_cmo_bus_init(void) {} -static void vio_cmo_sysfs_init(void) { } +static void __init vio_cmo_sysfs_init(void) { } #endif /* CONFIG_PPC_SMLPAR */ EXPORT_SYMBOL(vio_cmo_entitlement_update); EXPORT_SYMBOL(vio_cmo_set_dev_desired); @@ -1479,7 +1479,7 @@ EXPORT_SYMBOL(vio_register_device_node); * Starting from the root node provide, register the device node for * each child beneath the root. */ -static void vio_bus_scan_register_devices(char *root_name) +static void __init vio_bus_scan_register_devices(char *root_name) { struct device_node *node_root, *node_child; diff --git a/arch/powerpc/sysdev/cpm2.c b/arch/powerpc/sysdev/cpm2.c index 68538b8329f7..3f130312b6e9 100644 --- a/arch/powerpc/sysdev/cpm2.c +++ b/arch/powerpc/sysdev/cpm2.c @@ -135,7 +135,7 @@ void __cpm2_setbrg(uint brg, uint rate, uint clk, int div16, int src) } EXPORT_SYMBOL(__cpm2_setbrg); -int cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode) +int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode) { int ret = 0; int shift; @@ -265,7 +265,7 @@ int cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode) return ret; } -int cpm2_smc_clk_setup(enum cpm_clk_target target, int clock) +int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock) { int ret = 0; int shift; @@ -326,7 +326,7 @@ struct cpm2_ioports { u32 res[3]; }; -void cpm2_set_pin(int port, int pin, int flags) +void __init cpm2_set_pin(int port, int pin, int flags) { struct cpm2_ioports __iomem *iop = (struct cpm2_ioports __iomem *)&cpm2_immr->im_ioport; diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 1d33b7a5ea83..be6b99b1b352 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -226,7 +226,7 @@ static void dart_free(struct iommu_table *tbl, long index, long npages) dart_cache_sync(orig_dp, orig_npages); } -static void allocate_dart(void) +static void __init allocate_dart(void) { unsigned long tmp; diff --git a/arch/powerpc/sysdev/fsl_mpic_err.c b/arch/powerpc/sysdev/fsl_mpic_err.c index 9a98bb212922..df06bb6b838f 100644 --- a/arch/powerpc/sysdev/fsl_mpic_err.c +++ b/arch/powerpc/sysdev/fsl_mpic_err.c @@ -58,7 +58,7 @@ static struct irq_chip fsl_mpic_err_chip = { .irq_unmask = fsl_mpic_unmask_err, }; -int mpic_setup_error_int(struct mpic *mpic, int intvec) +int __init mpic_setup_error_int(struct mpic *mpic, int intvec) { int i; @@ -121,7 +121,7 @@ static irqreturn_t fsl_error_int_handler(int irq, void *data) return IRQ_HANDLED; } -void mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum) +void __init mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum) { unsigned int virq; int ret; diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index b8f76f3fd994..674f047b7820 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1106,7 +1106,7 @@ static const struct of_device_id pci_ids[] = { struct device_node *fsl_pci_primary; -void fsl_pci_assign_primary(void) +void __init fsl_pci_assign_primary(void) { struct device_node *np; diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h index 1d7a41205695..cdbde2e0c96e 100644 --- a/arch/powerpc/sysdev/fsl_pci.h +++ b/arch/powerpc/sysdev/fsl_pci.h @@ -120,7 +120,7 @@ u64 fsl_pci_immrbar_base(struct pci_controller *hose); extern struct device_node *fsl_pci_primary; #ifdef CONFIG_PCI -void fsl_pci_assign_primary(void); +void __init fsl_pci_assign_primary(void); #else static inline void fsl_pci_assign_primary(void) {} #endif diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index dc1a151c63d7..3b1ae98e3ce9 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -208,7 +208,7 @@ static const struct irq_domain_ops i8259_host_ops = { .xlate = i8259_host_xlate, }; -struct irq_domain *i8259_get_host(void) +struct irq_domain *__init i8259_get_host(void) { return i8259_host; } diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index 7638a50a7c38..3f10c9fc3b68 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -767,7 +767,7 @@ struct ipic * __init ipic_init(struct device_node *node, unsigned int flags) return ipic; } -void ipic_set_default_priority(void) +void __init ipic_set_default_priority(void) { ipic_write(primary_ipic->regs, IPIC_SIPRR_A, IPIC_PRIORITY_DEFAULT); ipic_write(primary_ipic->regs, IPIC_SIPRR_B, IPIC_PRIORITY_DEFAULT); diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 995fb2ada507..d5cb48b61bbd 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1323,8 +1323,7 @@ struct mpic * __init mpic_alloc(struct device_node *node, psrc = of_get_property(mpic->node, "protected-sources", &psize); if (psrc) { /* Allocate a bitmap with one bit per interrupt */ - unsigned int mapsize = BITS_TO_LONGS(intvec_top + 1); - mpic->protected = kcalloc(mapsize, sizeof(long), GFP_KERNEL); + mpic->protected = bitmap_zalloc(intvec_top + 1, GFP_KERNEL); BUG_ON(mpic->protected == NULL); for (i = 0; i < psize/sizeof(u32); i++) { if (psrc[i] > intvec_top) @@ -1840,7 +1839,7 @@ unsigned int mpic_get_mcirq(void) } #ifdef CONFIG_SMP -void mpic_request_ipis(void) +void __init mpic_request_ipis(void) { struct mpic *mpic = mpic_primary; int i; diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h index 73a31a429d46..bb460ff57a06 100644 --- a/arch/powerpc/sysdev/mpic.h +++ b/arch/powerpc/sysdev/mpic.h @@ -8,8 +8,8 @@ #ifdef CONFIG_PCI_MSI extern void mpic_msi_reserve_hwirq(struct mpic *mpic, irq_hw_number_t hwirq); -extern int mpic_msi_init_allocator(struct mpic *mpic); -extern int mpic_u3msi_init(struct mpic *mpic); +int __init mpic_msi_init_allocator(struct mpic *mpic); +int __init mpic_u3msi_init(struct mpic *mpic); #else static inline void mpic_msi_reserve_hwirq(struct mpic *mpic, irq_hw_number_t hwirq) @@ -24,7 +24,7 @@ static inline int mpic_u3msi_init(struct mpic *mpic) #endif #if defined(CONFIG_PCI_MSI) && defined(CONFIG_PPC_PASEMI) -int mpic_pasemi_msi_init(struct mpic *mpic); +int __init mpic_pasemi_msi_init(struct mpic *mpic); #else static inline int mpic_pasemi_msi_init(struct mpic *mpic) { return -1; } #endif @@ -37,8 +37,8 @@ extern void mpic_reset_core(int cpu); #ifdef CONFIG_FSL_SOC extern int mpic_map_error_int(struct mpic *mpic, unsigned int virq, irq_hw_number_t hw); -extern void mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum); -extern int mpic_setup_error_int(struct mpic *mpic, int intvec); +void __init mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum); +int __init mpic_setup_error_int(struct mpic *mpic, int intvec); #else static inline int mpic_map_error_int(struct mpic *mpic, unsigned int virq, irq_hw_number_t hw) { diff --git a/arch/powerpc/sysdev/mpic_msi.c b/arch/powerpc/sysdev/mpic_msi.c index 4695c04320ae..f412d6ad0b66 100644 --- a/arch/powerpc/sysdev/mpic_msi.c +++ b/arch/powerpc/sysdev/mpic_msi.c @@ -24,7 +24,7 @@ void mpic_msi_reserve_hwirq(struct mpic *mpic, irq_hw_number_t hwirq) } #ifdef CONFIG_MPIC_U3_HT_IRQS -static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic) +static int __init mpic_msi_reserve_u3_hwirqs(struct mpic *mpic) { irq_hw_number_t hwirq; const struct irq_domain_ops *ops = mpic->irqhost->ops; @@ -68,13 +68,13 @@ static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic) return 0; } #else -static int mpic_msi_reserve_u3_hwirqs(struct mpic *mpic) +static int __init mpic_msi_reserve_u3_hwirqs(struct mpic *mpic) { return -1; } #endif -int mpic_msi_init_allocator(struct mpic *mpic) +int __init mpic_msi_init_allocator(struct mpic *mpic) { int rc; diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c index a42a20280035..444e9ce42d0a 100644 --- a/arch/powerpc/sysdev/mpic_timer.c +++ b/arch/powerpc/sysdev/mpic_timer.c @@ -384,7 +384,7 @@ struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, } EXPORT_SYMBOL(mpic_request_timer); -static int timer_group_get_freq(struct device_node *np, +static int __init timer_group_get_freq(struct device_node *np, struct timer_group_priv *priv) { u32 div; @@ -411,7 +411,7 @@ static int timer_group_get_freq(struct device_node *np, return 0; } -static int timer_group_get_irq(struct device_node *np, +static int __init timer_group_get_irq(struct device_node *np, struct timer_group_priv *priv) { const u32 all_timer[] = { 0, TIMERS_PER_GROUP }; @@ -459,7 +459,7 @@ static int timer_group_get_irq(struct device_node *np, return 0; } -static void timer_group_init(struct device_node *np) +static void __init timer_group_init(struct device_node *np) { struct timer_group_priv *priv; unsigned int i = 0; diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index fa53b6d85ef9..3f4841dfefb5 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -169,7 +169,7 @@ static int u3msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return 0; } -int mpic_u3msi_init(struct mpic *mpic) +int __init mpic_u3msi_init(struct mpic *mpic) { int rc; struct pci_controller *phb; diff --git a/arch/powerpc/sysdev/tsi108_dev.c b/arch/powerpc/sysdev/tsi108_dev.c index 4c4a6efd5e5f..9e13fb35ed5c 100644 --- a/arch/powerpc/sysdev/tsi108_dev.c +++ b/arch/powerpc/sysdev/tsi108_dev.c @@ -51,13 +51,12 @@ phys_addr_t get_csrbase(void) } return tsi108_csr_base; } +EXPORT_SYMBOL(get_csrbase); u32 get_vir_csrbase(void) { return (u32) (ioremap(get_csrbase(), 0x10000)); } - -EXPORT_SYMBOL(get_csrbase); EXPORT_SYMBOL(get_vir_csrbase); static int __init tsi108_eth_of_init(void) diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c index 042bb38fa5c2..1070220f15d5 100644 --- a/arch/powerpc/sysdev/tsi108_pci.c +++ b/arch/powerpc/sysdev/tsi108_pci.c @@ -257,7 +257,7 @@ static void tsi108_pci_int_unmask(u_int irq) mb(); } -static void init_pci_source(void) +static void __init init_pci_source(void) { tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL, 0x0000ff00); diff --git a/arch/powerpc/sysdev/udbg_memcons.c b/arch/powerpc/sysdev/udbg_memcons.c index d38bbeed219b..5020044400dc 100644 --- a/arch/powerpc/sysdev/udbg_memcons.c +++ b/arch/powerpc/sysdev/udbg_memcons.c @@ -92,7 +92,7 @@ int memcons_getc(void) return c; } -void udbg_init_memcons(void) +void __init udbg_init_memcons(void) { udbg_putc = memcons_putc; udbg_getc = memcons_getc; diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c index 6765d9e264a3..cf8db19a4f7d 100644 --- a/arch/powerpc/sysdev/xics/icp-hv.c +++ b/arch/powerpc/sysdev/xics/icp-hv.c @@ -162,7 +162,7 @@ static const struct icp_ops icp_hv_ops = { #endif }; -int icp_hv_init(void) +int __init icp_hv_init(void) { struct device_node *np; diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index 675d708863d5..bda4c32582d9 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -184,7 +184,7 @@ static const struct icp_ops icp_opal_ops = { #endif }; -int icp_opal_init(void) +int __init icp_opal_init(void) { struct device_node *np; diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 244a727c6ba4..f3fb2a12124c 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -121,7 +121,7 @@ void xics_mask_unknown_vec(unsigned int vec) #ifdef CONFIG_SMP -static void xics_request_ipi(void) +static void __init xics_request_ipi(void) { unsigned int ipi; diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 7b69299c2912..1ca5564bda9d 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -85,6 +85,16 @@ static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu); #define XIVE_INVALID_TARGET (-1) /* + * Global toggle to switch on/off StoreEOI + */ +static bool xive_store_eoi = true; + +static bool xive_is_store_eoi(struct xive_irq_data *xd) +{ + return xd->flags & XIVE_IRQ_FLAG_STORE_EOI && xive_store_eoi; +} + +/* * Read the next entry in a queue, return its content if it's valid * or 0 if there is no new entry. * @@ -208,7 +218,7 @@ static notrace u8 xive_esb_read(struct xive_irq_data *xd, u32 offset) { u64 val; - if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI) + if (offset == XIVE_ESB_SET_PQ_10 && xive_is_store_eoi(xd)) offset |= XIVE_ESB_LD_ST_MO; if ((xd->flags & XIVE_IRQ_FLAG_H_INT_ESB) && xive_ops->esb_rw) @@ -227,6 +237,21 @@ static void xive_esb_write(struct xive_irq_data *xd, u32 offset, u64 data) out_be64(xd->eoi_mmio + offset, data); } +#if defined(CONFIG_XMON) || defined(CONFIG_DEBUG_FS) +static void xive_irq_data_dump(struct xive_irq_data *xd, char *buffer, size_t size) +{ + u64 val = xive_esb_read(xd, XIVE_ESB_GET); + + snprintf(buffer, size, "flags=%c%c%c PQ=%c%c 0x%016llx 0x%016llx", + xive_is_store_eoi(xd) ? 'S' : ' ', + xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', + xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', + val & XIVE_ESB_VAL_P ? 'P' : '-', + val & XIVE_ESB_VAL_Q ? 'Q' : '-', + xd->trig_page, xd->eoi_page); +} +#endif + #ifdef CONFIG_XMON static notrace void xive_dump_eq(const char *name, struct xive_q *q) { @@ -252,11 +277,10 @@ notrace void xmon_xive_do_dump(int cpu) #ifdef CONFIG_SMP { - u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET); + char buffer[128]; - xmon_printf("IPI=0x%08x PQ=%c%c ", xc->hw_ipi, - val & XIVE_ESB_VAL_P ? 'P' : '-', - val & XIVE_ESB_VAL_Q ? 'Q' : '-'); + xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer)); + xmon_printf("IPI=0x%08x %s", xc->hw_ipi, buffer); } #endif xive_dump_eq("EQ", &xc->queue[xive_irq_priority]); @@ -291,15 +315,11 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) d = xive_get_irq_data(hw_irq); if (d) { - struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); - u64 val = xive_esb_read(xd, XIVE_ESB_GET); - - xmon_printf("flags=%c%c%c PQ=%c%c", - xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', - xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', - xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', - val & XIVE_ESB_VAL_P ? 'P' : '-', - val & XIVE_ESB_VAL_Q ? 'Q' : '-'); + char buffer[128]; + + xive_irq_data_dump(irq_data_get_irq_handler_data(d), + buffer, sizeof(buffer)); + xmon_printf("%s", buffer); } xmon_printf("\n"); @@ -385,7 +405,7 @@ static void xive_do_source_eoi(struct xive_irq_data *xd) xd->stale_p = false; /* If the XIVE supports the new "store EOI facility, use it */ - if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) { + if (xive_is_store_eoi(xd)) { xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0); return; } @@ -451,6 +471,8 @@ static void xive_do_source_set_mask(struct xive_irq_data *xd, { u64 val; + pr_debug("%s: HW 0x%x %smask\n", __func__, xd->hw_irq, mask ? "" : "un"); + /* * If the interrupt had P set, it may be in a queue. * @@ -612,8 +634,8 @@ static unsigned int xive_irq_startup(struct irq_data *d) xd->saved_p = false; xd->stale_p = false; - pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n", - d->irq, hw_irq, d); + + pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d); /* Pick a target */ target = xive_pick_irq_target(d, irq_data_get_affinity_mask(d)); @@ -654,8 +676,7 @@ static void xive_irq_shutdown(struct irq_data *d) struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - pr_devel("xive_irq_shutdown: irq %d [0x%x] data @%p\n", - d->irq, hw_irq, d); + pr_debug("%s: irq %d [0x%x] data @%p\n", __func__, d->irq, hw_irq, d); if (WARN_ON(xd->target == XIVE_INVALID_TARGET)) return; @@ -679,7 +700,7 @@ static void xive_irq_unmask(struct irq_data *d) { struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); - pr_devel("xive_irq_unmask: irq %d data @%p\n", d->irq, xd); + pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd); xive_do_source_set_mask(xd, false); } @@ -688,7 +709,7 @@ static void xive_irq_mask(struct irq_data *d) { struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); - pr_devel("xive_irq_mask: irq %d data @%p\n", d->irq, xd); + pr_debug("%s: irq %d data @%p\n", __func__, d->irq, xd); xive_do_source_set_mask(xd, true); } @@ -702,7 +723,7 @@ static int xive_irq_set_affinity(struct irq_data *d, u32 target, old_target; int rc = 0; - pr_debug("%s: irq %d/%x\n", __func__, d->irq, hw_irq); + pr_debug("%s: irq %d/0x%x\n", __func__, d->irq, hw_irq); /* Is this valid ? */ if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) @@ -975,7 +996,7 @@ EXPORT_SYMBOL_GPL(is_xive_irq); void xive_cleanup_irq_data(struct xive_irq_data *xd) { - pr_debug("%s for HW %x\n", __func__, xd->hw_irq); + pr_debug("%s for HW 0x%x\n", __func__, xd->hw_irq); if (xd->eoi_mmio) { iounmap(xd->eoi_mmio); @@ -1211,8 +1232,8 @@ static int xive_setup_cpu_ipi(unsigned int cpu) pr_err("Failed to map IPI CPU %d\n", cpu); return -EIO; } - pr_devel("CPU %d HW IPI %x, virq %d, trig_mmio=%p\n", cpu, - xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio); + pr_debug("CPU %d HW IPI 0x%x, virq %d, trig_mmio=%p\n", cpu, + xc->hw_ipi, xive_ipi_irq, xc->ipi_data.trig_mmio); /* Unmask it */ xive_do_source_set_mask(&xc->ipi_data, false); @@ -1390,7 +1411,7 @@ static int xive_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, if (rc) return rc; - pr_debug("%s %d/%lx #%d\n", __func__, virq, hwirq, nr_irqs); + pr_debug("%s %d/0x%lx #%d\n", __func__, virq, hwirq, nr_irqs); for (i = 0; i < nr_irqs; i++) { /* TODO: call xive_irq_domain_map() */ @@ -1504,7 +1525,7 @@ static void xive_setup_cpu(void) #ifdef CONFIG_SMP void xive_smp_setup_cpu(void) { - pr_devel("SMP setup CPU %d\n", smp_processor_id()); + pr_debug("SMP setup CPU %d\n", smp_processor_id()); /* This will have already been done on the boot CPU */ if (smp_processor_id() != boot_cpuid) @@ -1650,10 +1671,10 @@ bool __init xive_core_init(struct device_node *np, const struct xive_ops *ops, ppc_md.get_irq = xive_get_irq; __xive_enabled = true; - pr_devel("Initializing host..\n"); + pr_debug("Initializing host..\n"); xive_init_host(np); - pr_devel("Initializing boot CPU..\n"); + pr_debug("Initializing boot CPU..\n"); /* Allocate per-CPU data and queues */ xive_prepare_cpu(smp_processor_id()); @@ -1691,36 +1712,36 @@ static int __init xive_off(char *arg) } __setup("xive=off", xive_off); -static void xive_debug_show_cpu(struct seq_file *m, int cpu) +static int __init xive_store_eoi_cmdline(char *arg) +{ + if (!arg) + return -EINVAL; + + if (strncmp(arg, "off", 3) == 0) { + pr_info("StoreEOI disabled on kernel command line\n"); + xive_store_eoi = false; + } + return 0; +} +__setup("xive.store-eoi=", xive_store_eoi_cmdline); + +#ifdef CONFIG_DEBUG_FS +static void xive_debug_show_ipi(struct seq_file *m, int cpu) { struct xive_cpu *xc = per_cpu(xive_cpu, cpu); - seq_printf(m, "CPU %d:", cpu); + seq_printf(m, "CPU %d: ", cpu); if (xc) { seq_printf(m, "pp=%02x CPPR=%02x ", xc->pending_prio, xc->cppr); #ifdef CONFIG_SMP { - u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET); + char buffer[128]; - seq_printf(m, "IPI=0x%08x PQ=%c%c ", xc->hw_ipi, - val & XIVE_ESB_VAL_P ? 'P' : '-', - val & XIVE_ESB_VAL_Q ? 'Q' : '-'); + xive_irq_data_dump(&xc->ipi_data, buffer, sizeof(buffer)); + seq_printf(m, "IPI=0x%08x %s", xc->hw_ipi, buffer); } #endif - { - struct xive_q *q = &xc->queue[xive_irq_priority]; - u32 i0, i1, idx; - - if (q->qpage) { - idx = q->idx; - i0 = be32_to_cpup(q->qpage + idx); - idx = (idx + 1) & q->msk; - i1 = be32_to_cpup(q->qpage + idx); - seq_printf(m, "EQ idx=%d T=%d %08x %08x ...", - q->idx, q->toggle, i0, i1); - } - } } seq_puts(m, "\n"); } @@ -1732,8 +1753,7 @@ static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d) u32 target; u8 prio; u32 lirq; - struct xive_irq_data *xd; - u64 val; + char buffer[128]; rc = xive_ops->get_irq_config(hw_irq, &target, &prio, &lirq); if (rc) { @@ -1744,43 +1764,101 @@ static void xive_debug_show_irq(struct seq_file *m, struct irq_data *d) seq_printf(m, "IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", hw_irq, target, prio, lirq); - xd = irq_data_get_irq_handler_data(d); - val = xive_esb_read(xd, XIVE_ESB_GET); - seq_printf(m, "flags=%c%c%c PQ=%c%c", - xd->flags & XIVE_IRQ_FLAG_STORE_EOI ? 'S' : ' ', - xd->flags & XIVE_IRQ_FLAG_LSI ? 'L' : ' ', - xd->flags & XIVE_IRQ_FLAG_H_INT_ESB ? 'H' : ' ', - val & XIVE_ESB_VAL_P ? 'P' : '-', - val & XIVE_ESB_VAL_Q ? 'Q' : '-'); + xive_irq_data_dump(irq_data_get_irq_handler_data(d), buffer, sizeof(buffer)); + seq_puts(m, buffer); seq_puts(m, "\n"); } -static int xive_core_debug_show(struct seq_file *m, void *private) +static int xive_irq_debug_show(struct seq_file *m, void *private) { unsigned int i; struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i); + + if (d) + xive_debug_show_irq(m, d); + } + return 0; +} +DEFINE_SHOW_ATTRIBUTE(xive_irq_debug); + +static int xive_ipi_debug_show(struct seq_file *m, void *private) +{ int cpu; if (xive_ops->debug_show) xive_ops->debug_show(m, private); for_each_possible_cpu(cpu) - xive_debug_show_cpu(m, cpu); + xive_debug_show_ipi(m, cpu); + return 0; +} +DEFINE_SHOW_ATTRIBUTE(xive_ipi_debug); - for_each_irq_desc(i, desc) { - struct irq_data *d = irq_domain_get_irq_data(xive_irq_domain, i); +static void xive_eq_debug_show_one(struct seq_file *m, struct xive_q *q, u8 prio) +{ + int i; - if (d) - xive_debug_show_irq(m, d); + seq_printf(m, "EQ%d idx=%d T=%d\n", prio, q->idx, q->toggle); + if (q->qpage) { + for (i = 0; i < q->msk + 1; i++) { + if (!(i % 8)) + seq_printf(m, "%05d ", i); + seq_printf(m, "%08x%s", be32_to_cpup(q->qpage + i), + (i + 1) % 8 ? " " : "\n"); + } } + seq_puts(m, "\n"); +} + +static int xive_eq_debug_show(struct seq_file *m, void *private) +{ + int cpu = (long)m->private; + struct xive_cpu *xc = per_cpu(xive_cpu, cpu); + + if (xc) + xive_eq_debug_show_one(m, &xc->queue[xive_irq_priority], + xive_irq_priority); return 0; } -DEFINE_SHOW_ATTRIBUTE(xive_core_debug); +DEFINE_SHOW_ATTRIBUTE(xive_eq_debug); + +static void xive_core_debugfs_create(void) +{ + struct dentry *xive_dir; + struct dentry *xive_eq_dir; + long cpu; + char name[16]; + + xive_dir = debugfs_create_dir("xive", arch_debugfs_dir); + if (IS_ERR(xive_dir)) + return; + + debugfs_create_file("ipis", 0400, xive_dir, + NULL, &xive_ipi_debug_fops); + debugfs_create_file("interrupts", 0400, xive_dir, + NULL, &xive_irq_debug_fops); + xive_eq_dir = debugfs_create_dir("eqs", xive_dir); + for_each_possible_cpu(cpu) { + snprintf(name, sizeof(name), "cpu%ld", cpu); + debugfs_create_file(name, 0400, xive_eq_dir, (void *)cpu, + &xive_eq_debug_fops); + } + debugfs_create_bool("store-eoi", 0600, xive_dir, &xive_store_eoi); + + if (xive_ops->debug_create) + xive_ops->debug_create(xive_dir); +} +#else +static inline void xive_core_debugfs_create(void) { } +#endif /* CONFIG_DEBUG_FS */ int xive_core_debug_init(void) { - if (xive_enabled()) - debugfs_create_file("xive", 0400, arch_debugfs_dir, - NULL, &xive_core_debug_fops); + if (xive_enabled() && IS_ENABLED(CONFIG_DEBUG_FS)) + xive_core_debugfs_create(); + return 0; } diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 1aec282cd650..f940428ad13f 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -41,7 +41,7 @@ static u32 xive_queue_shift; static u32 xive_pool_vps = XIVE_INVALID_VP; static struct kmem_cache *xive_provision_cache; static bool xive_has_single_esc; -static bool xive_has_save_restore; +bool xive_has_save_restore; int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) { @@ -63,6 +63,8 @@ int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) opal_flags = be64_to_cpu(flags); if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI) data->flags |= XIVE_IRQ_FLAG_STORE_EOI; + if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI2) + data->flags |= XIVE_IRQ_FLAG_STORE_EOI; if (opal_flags & OPAL_XIVE_IRQ_LSI) data->flags |= XIVE_IRQ_FLAG_LSI; data->eoi_page = be64_to_cpu(eoi_page); @@ -459,6 +461,14 @@ void xive_native_sync_queue(u32 hw_irq) } EXPORT_SYMBOL_GPL(xive_native_sync_queue); +#ifdef CONFIG_DEBUG_FS +static int xive_native_debug_create(struct dentry *xive_dir) +{ + debugfs_create_bool("save-restore", 0600, xive_dir, &xive_has_save_restore); + return 0; +} +#endif + static const struct xive_ops xive_native_ops = { .populate_irq_data = xive_native_populate_irq_data, .configure_irq = xive_native_configure_irq, @@ -476,10 +486,13 @@ static const struct xive_ops xive_native_ops = { .get_ipi = xive_native_get_ipi, .put_ipi = xive_native_put_ipi, #endif /* CONFIG_SMP */ +#ifdef CONFIG_DEBUG_FS + .debug_create = xive_native_debug_create, +#endif /* CONFIG_DEBUG_FS */ .name = "native", }; -static bool xive_parse_provisioning(struct device_node *np) +static bool __init xive_parse_provisioning(struct device_node *np) { int rc; @@ -519,7 +532,7 @@ static bool xive_parse_provisioning(struct device_node *np) return true; } -static void xive_native_setup_pools(void) +static void __init xive_native_setup_pools(void) { /* Allocate a pool big enough */ pr_debug("XIVE: Allocating VP block for pool size %u\n", nr_cpu_ids); diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index f143b6f111ac..928f95004501 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -44,7 +44,7 @@ struct xive_irq_bitmap { static LIST_HEAD(xive_irq_bitmaps); -static int xive_irq_bitmap_add(int base, int count) +static int __init xive_irq_bitmap_add(int base, int count) { struct xive_irq_bitmap *xibm; @@ -173,7 +173,7 @@ static long plpar_int_get_source_info(unsigned long flags, } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_GET_SOURCE_INFO lisn=%ld failed %ld\n", lisn, rc); + pr_err("H_INT_GET_SOURCE_INFO lisn=0x%lx failed %ld\n", lisn, rc); return rc; } @@ -182,8 +182,8 @@ static long plpar_int_get_source_info(unsigned long flags, *trig_page = retbuf[2]; *esb_shift = retbuf[3]; - pr_devel("H_INT_GET_SOURCE_INFO flags=%lx eoi=%lx trig=%lx shift=%lx\n", - retbuf[0], retbuf[1], retbuf[2], retbuf[3]); + pr_debug("H_INT_GET_SOURCE_INFO lisn=0x%lx flags=0x%lx eoi=0x%lx trig=0x%lx shift=0x%lx\n", + lisn, retbuf[0], retbuf[1], retbuf[2], retbuf[3]); return 0; } @@ -200,8 +200,8 @@ static long plpar_int_set_source_config(unsigned long flags, long rc; - pr_devel("H_INT_SET_SOURCE_CONFIG flags=%lx lisn=%lx target=%lx prio=%lx sw_irq=%lx\n", - flags, lisn, target, prio, sw_irq); + pr_debug("H_INT_SET_SOURCE_CONFIG flags=0x%lx lisn=0x%lx target=%ld prio=%ld sw_irq=%ld\n", + flags, lisn, target, prio, sw_irq); do { @@ -210,7 +210,7 @@ static long plpar_int_set_source_config(unsigned long flags, } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_SET_SOURCE_CONFIG lisn=%ld target=%lx prio=%lx failed %ld\n", + pr_err("H_INT_SET_SOURCE_CONFIG lisn=0x%lx target=%ld prio=%ld failed %ld\n", lisn, target, prio, rc); return rc; } @@ -227,7 +227,7 @@ static long plpar_int_get_source_config(unsigned long flags, unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; long rc; - pr_devel("H_INT_GET_SOURCE_CONFIG flags=%lx lisn=%lx\n", flags, lisn); + pr_debug("H_INT_GET_SOURCE_CONFIG flags=0x%lx lisn=0x%lx\n", flags, lisn); do { rc = plpar_hcall(H_INT_GET_SOURCE_CONFIG, retbuf, flags, lisn, @@ -235,7 +235,7 @@ static long plpar_int_get_source_config(unsigned long flags, } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_GET_SOURCE_CONFIG lisn=%ld failed %ld\n", + pr_err("H_INT_GET_SOURCE_CONFIG lisn=0x%lx failed %ld\n", lisn, rc); return rc; } @@ -244,8 +244,8 @@ static long plpar_int_get_source_config(unsigned long flags, *prio = retbuf[1]; *sw_irq = retbuf[2]; - pr_devel("H_INT_GET_SOURCE_CONFIG target=%lx prio=%lx sw_irq=%lx\n", - retbuf[0], retbuf[1], retbuf[2]); + pr_debug("H_INT_GET_SOURCE_CONFIG target=%ld prio=%ld sw_irq=%ld\n", + retbuf[0], retbuf[1], retbuf[2]); return 0; } @@ -273,8 +273,8 @@ static long plpar_int_get_queue_info(unsigned long flags, *esn_page = retbuf[0]; *esn_size = retbuf[1]; - pr_devel("H_INT_GET_QUEUE_INFO page=%lx size=%lx\n", - retbuf[0], retbuf[1]); + pr_debug("H_INT_GET_QUEUE_INFO cpu=%ld prio=%ld page=0x%lx size=0x%lx\n", + target, priority, retbuf[0], retbuf[1]); return 0; } @@ -289,8 +289,8 @@ static long plpar_int_set_queue_config(unsigned long flags, { long rc; - pr_devel("H_INT_SET_QUEUE_CONFIG flags=%lx target=%lx priority=%lx qpage=%lx qsize=%lx\n", - flags, target, priority, qpage, qsize); + pr_debug("H_INT_SET_QUEUE_CONFIG flags=0x%lx target=%ld priority=0x%lx qpage=0x%lx qsize=0x%lx\n", + flags, target, priority, qpage, qsize); do { rc = plpar_hcall_norets(H_INT_SET_QUEUE_CONFIG, flags, target, @@ -298,7 +298,7 @@ static long plpar_int_set_queue_config(unsigned long flags, } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=%lx returned %ld\n", + pr_err("H_INT_SET_QUEUE_CONFIG cpu=%ld prio=%ld qpage=0x%lx returned %ld\n", target, priority, qpage, rc); return rc; } @@ -315,7 +315,7 @@ static long plpar_int_sync(unsigned long flags, unsigned long lisn) } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_SYNC lisn=%ld returned %ld\n", lisn, rc); + pr_err("H_INT_SYNC lisn=0x%lx returned %ld\n", lisn, rc); return rc; } @@ -333,8 +333,8 @@ static long plpar_int_esb(unsigned long flags, unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; long rc; - pr_devel("H_INT_ESB flags=%lx lisn=%lx offset=%lx in=%lx\n", - flags, lisn, offset, in_data); + pr_debug("H_INT_ESB flags=0x%lx lisn=0x%lx offset=0x%lx in=0x%lx\n", + flags, lisn, offset, in_data); do { rc = plpar_hcall(H_INT_ESB, retbuf, flags, lisn, offset, @@ -342,7 +342,7 @@ static long plpar_int_esb(unsigned long flags, } while (plpar_busy_delay(rc)); if (rc) { - pr_err("H_INT_ESB lisn=%ld offset=%ld returned %ld\n", + pr_err("H_INT_ESB lisn=0x%lx offset=0x%lx returned %ld\n", lisn, offset, rc); return rc; } @@ -653,6 +653,9 @@ static int xive_spapr_debug_show(struct seq_file *m, void *private) struct xive_irq_bitmap *xibm; char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + list_for_each_entry(xibm, &xive_irq_bitmaps, list) { memset(buf, 0, PAGE_SIZE); bitmap_print_to_pagebuf(true, buf, xibm->bitmap, xibm->count); @@ -687,7 +690,7 @@ static const struct xive_ops xive_spapr_ops = { /* * get max priority from "/ibm,plat-res-int-priorities" */ -static bool xive_get_max_prio(u8 *max_prio) +static bool __init xive_get_max_prio(u8 *max_prio) { struct device_node *rootdn; const __be32 *reg; @@ -741,7 +744,7 @@ static bool xive_get_max_prio(u8 *max_prio) return true; } -static const u8 *get_vec5_feature(unsigned int index) +static const u8 *__init get_vec5_feature(unsigned int index) { unsigned long root, chosen; int size; diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index 504e7edce358..fe6d95d54af9 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -58,6 +58,7 @@ struct xive_ops { void (*put_ipi)(unsigned int cpu, struct xive_cpu *xc); #endif int (*debug_show)(struct seq_file *m, void *private); + int (*debug_create)(struct dentry *xive_dir); const char *name; }; @@ -72,5 +73,6 @@ static inline u32 xive_alloc_order(u32 queue_shift) } extern bool xive_cmdline_disabled; +extern bool xive_has_save_restore; #endif /* __XIVE_INTERNAL_H */ diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 8b28ff9d98d1..fd72753e8ad5 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -125,7 +125,7 @@ static unsigned bpinstr = 0x7fe00008; /* trap */ static int cmds(struct pt_regs *); static int mread(unsigned long, void *, int); static int mwrite(unsigned long, void *, int); -static int mread_instr(unsigned long, struct ppc_inst *); +static int mread_instr(unsigned long, ppc_inst_t *); static int handle_fault(struct pt_regs *); static void byterev(unsigned char *, int); static void memex(void); @@ -908,7 +908,7 @@ static struct bpt *new_breakpoint(unsigned long a) static void insert_bpts(void) { int i; - struct ppc_inst instr, instr2; + ppc_inst_t instr, instr2; struct bpt *bp, *bp2; bp = bpts; @@ -988,7 +988,7 @@ static void remove_bpts(void) { int i; struct bpt *bp; - struct ppc_inst instr; + ppc_inst_t instr; bp = bpts; for (i = 0; i < NBPTS; ++i, ++bp) { @@ -1159,7 +1159,7 @@ cmds(struct pt_regs *excp) case 'P': show_tasks(); break; -#ifdef CONFIG_PPC_BOOK3S +#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_64S_HASH_MMU) case 'u': dump_segments(); break; @@ -1204,7 +1204,7 @@ static int do_step(struct pt_regs *regs) */ static int do_step(struct pt_regs *regs) { - struct ppc_inst instr; + ppc_inst_t instr; int stepped; force_enable_xmon(); @@ -1459,7 +1459,7 @@ csum(void) */ static long check_bp_loc(unsigned long addr) { - struct ppc_inst instr; + ppc_inst_t instr; addr &= ~3; if (!is_kernel_addr(addr)) { @@ -2107,8 +2107,14 @@ static void dump_300_sprs(void) if (!cpu_has_feature(CPU_FTR_ARCH_300)) return; - printf("pidr = %.16lx tidr = %.16lx\n", - mfspr(SPRN_PID), mfspr(SPRN_TIDR)); + if (cpu_has_feature(CPU_FTR_P9_TIDR)) { + printf("pidr = %.16lx tidr = %.16lx\n", + mfspr(SPRN_PID), mfspr(SPRN_TIDR)); + } else { + printf("pidr = %.16lx\n", + mfspr(SPRN_PID)); + } + printf("psscr = %.16lx\n", hv ? mfspr(SPRN_PSSCR) : mfspr(SPRN_PSSCR_PR)); @@ -2300,7 +2306,7 @@ mwrite(unsigned long adrs, void *buf, int size) } static int -mread_instr(unsigned long adrs, struct ppc_inst *instr) +mread_instr(unsigned long adrs, ppc_inst_t *instr) { volatile int n; @@ -2608,7 +2614,7 @@ static void dump_tracing(void) static void dump_one_paca(int cpu) { struct paca_struct *p; -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU int i = 0; #endif @@ -2650,6 +2656,7 @@ static void dump_one_paca(int cpu) DUMP(p, cpu_start, "%#-*x"); DUMP(p, kexec_state, "%#-*x"); #ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU if (!early_radix_enabled()) { for (i = 0; i < SLB_NUM_BOLTED; i++) { u64 esid, vsid; @@ -2677,6 +2684,7 @@ static void dump_one_paca(int cpu) 22, "slb_cache", i, p->slb_cache[i]); } } +#endif DUMP(p, rfi_flush_fallback_area, "%-*px"); #endif @@ -2809,12 +2817,12 @@ static void dump_all_xives(void) { int cpu; - if (num_possible_cpus() == 0) { + if (num_online_cpus() == 0) { printf("No possible cpus, use 'dx #' to dump individual cpus\n"); return; } - for_each_possible_cpu(cpu) + for_each_online_cpu(cpu) dump_one_xive(cpu); } @@ -3020,7 +3028,7 @@ generic_inst_dump(unsigned long adr, long count, int praddr, { int nr, dotted; unsigned long first_adr; - struct ppc_inst inst, last_inst = ppc_inst(0); + ppc_inst_t inst, last_inst = ppc_inst(0); dotted = 0; for (first_adr = adr; count > 0; --count, adr += ppc_inst_len(inst)) { @@ -3740,7 +3748,7 @@ static void xmon_print_symbol(unsigned long address, const char *mid, printf("%s", after); } -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU void dump_segments(void) { int i; @@ -4128,7 +4136,7 @@ struct spu_info { static struct spu_info spu_info[XMON_NUM_SPUS]; -void xmon_register_spus(struct list_head *list) +void __init xmon_register_spus(struct list_head *list) { struct spu *spu; diff --git a/arch/powerpc/xmon/xmon_bpts.h b/arch/powerpc/xmon/xmon_bpts.h index 57e6fb03de48..377068f52edb 100644 --- a/arch/powerpc/xmon/xmon_bpts.h +++ b/arch/powerpc/xmon/xmon_bpts.h @@ -5,8 +5,8 @@ #define NBPTS 256 #ifndef __ASSEMBLY__ #include <asm/inst.h> -#define BPT_SIZE (sizeof(struct ppc_inst) * 2) -#define BPT_WORDS (BPT_SIZE / sizeof(struct ppc_inst)) +#define BPT_SIZE (sizeof(ppc_inst_t) * 2) +#define BPT_WORDS (BPT_SIZE / sizeof(ppc_inst_t)) extern unsigned int bpt_table[NBPTS * BPT_WORDS]; #endif /* __ASSEMBLY__ */ diff --git a/drivers/macintosh/mediabay.c b/drivers/macintosh/mediabay.c index eab7e83c11c4..b17660c022eb 100644 --- a/drivers/macintosh/mediabay.c +++ b/drivers/macintosh/mediabay.c @@ -703,7 +703,7 @@ static const struct mb_ops keylargo_mb_ops = { * Therefore we do it all by polling the media bay once each tick. */ -static struct of_device_id media_bay_match[] = +static const struct of_device_id media_bay_match[] = { { .name = "media-bay", diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig index 51aecafdcbdf..5efc4151bf58 100644 --- a/drivers/misc/cxl/Kconfig +++ b/drivers/misc/cxl/Kconfig @@ -6,6 +6,7 @@ config CXL_BASE bool select PPC_COPRO_BASE + select PPC_64S_HASH_MMU config CXL tristate "Support for IBM Coherent Accelerators (CXL)" diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile index aa12097668d3..83a7baf5df82 100644 --- a/drivers/misc/lkdtm/Makefile +++ b/drivers/misc/lkdtm/Makefile @@ -11,7 +11,7 @@ lkdtm-$(CONFIG_LKDTM) += usercopy.o lkdtm-$(CONFIG_LKDTM) += stackleak.o lkdtm-$(CONFIG_LKDTM) += cfi.o lkdtm-$(CONFIG_LKDTM) += fortify.o -lkdtm-$(CONFIG_PPC_BOOK3S_64) += powerpc.o +lkdtm-$(CONFIG_PPC_64S_HASH_MMU) += powerpc.o KASAN_SANITIZE_rodata.o := n KASAN_SANITIZE_stackleak.o := n diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c index 609d9ee2acc0..82fb276f7e09 100644 --- a/drivers/misc/lkdtm/core.c +++ b/drivers/misc/lkdtm/core.c @@ -182,7 +182,7 @@ static const struct crashtype crashtypes[] = { CRASHTYPE(FORTIFIED_SUBOBJECT), CRASHTYPE(FORTIFIED_STRSCPY), CRASHTYPE(DOUBLE_FAULT), -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU CRASHTYPE(PPC_SLB_MULTIHIT), #endif }; diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c index e70525eedaae..d881f5e40ad9 100644 --- a/drivers/misc/ocxl/file.c +++ b/drivers/misc/ocxl/file.c @@ -74,7 +74,6 @@ static long afu_ioctl_attach(struct ocxl_context *ctx, { struct ocxl_ioctl_attach arg; u64 amr = 0; - int rc; pr_debug("%s for context %d\n", __func__, ctx->pasid); @@ -86,8 +85,7 @@ static long afu_ioctl_attach(struct ocxl_context *ctx, return -EINVAL; amr = arg.amr & mfspr(SPRN_UAMOR); - rc = ocxl_context_attach(ctx, amr, current->mm); - return rc; + return ocxl_context_attach(ctx, amr, current->mm); } static long afu_ioctl_get_metadata(struct ocxl_context *ctx, diff --git a/include/linux/cuda.h b/include/linux/cuda.h index 45bfe9d61271..daf3e6f98444 100644 --- a/include/linux/cuda.h +++ b/include/linux/cuda.h @@ -12,7 +12,7 @@ #include <uapi/linux/cuda.h> -extern int find_via_cuda(void); +extern int __init find_via_cuda(void); extern int cuda_request(struct adb_request *req, void (*done)(struct adb_request *), int nbytes, ...); extern void cuda_poll(void); diff --git a/include/linux/pmu.h b/include/linux/pmu.h index 52453a24a24f..c677442d007c 100644 --- a/include/linux/pmu.h +++ b/include/linux/pmu.h @@ -13,7 +13,7 @@ #include <uapi/linux/pmu.h> -extern int find_via_pmu(void); +extern int __init find_via_pmu(void); extern int pmu_request(struct adb_request *req, void (*done)(struct adb_request *), int nbytes, ...); diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index bd8860eeb291..1b65042ab1db 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1332,7 +1332,10 @@ union perf_mem_data_src { /* hop level */ #define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ -/* 2-7 available */ +#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ +#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ +#define PERF_MEM_HOPS_3 0x04 /* remote board */ +/* 5-7 available */ #define PERF_MEM_HOPS_SHIFT 43 #define PERF_MEM_S(a, s) \ diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh index b0b20e0b4e30..f43aa4b77fba 100755 --- a/tools/testing/selftests/powerpc/security/mitigation-patching.sh +++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh @@ -44,7 +44,10 @@ mitigations="barrier_nospec stf_barrier count_cache_flush rfi_flush entry_flush for m in $mitigations do - do_one "$m" & + if [[ -f /sys/kernel/debug/powerpc/$m ]] + then + do_one "$m" & + fi done echo "Spawned threads enabling/disabling mitigations ..." diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c index adc2b7294e5f..83647b8277e7 100644 --- a/tools/testing/selftests/powerpc/security/spectre_v2.c +++ b/tools/testing/selftests/powerpc/security/spectre_v2.c @@ -193,7 +193,7 @@ int spectre_v2_test(void) * We are not vulnerable and reporting otherwise, so * missing such a mismatch is safe. */ - if (state == VULNERABLE) + if (miss_percent > 95) return 4; return 1; diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore index ce3375cd8e73..9d0915777fed 100644 --- a/tools/testing/selftests/powerpc/signal/.gitignore +++ b/tools/testing/selftests/powerpc/signal/.gitignore @@ -4,3 +4,5 @@ signal_tm sigfuz sigreturn_vdso sig_sc_double_restart +sigreturn_kernel +sigreturn_unaligned diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile index d6ae54663aed..f679d260afc8 100644 --- a/tools/testing/selftests/powerpc/signal/Makefile +++ b/tools/testing/selftests/powerpc/signal/Makefile @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso sig_sc_double_restart +TEST_GEN_PROGS += sigreturn_kernel +TEST_GEN_PROGS += sigreturn_unaligned CFLAGS += -maltivec $(OUTPUT)/signal_tm: CFLAGS += -mhtm diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c b/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c new file mode 100644 index 000000000000..0a1b6e591eee --- /dev/null +++ b/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that we can't sigreturn to kernel addresses, or to kernel mode. + */ + +#define _GNU_SOURCE + +#include <stdio.h> +#include <signal.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "utils.h" + +#define MSR_PR (1ul << 14) + +static volatile unsigned long long sigreturn_addr; +static volatile unsigned long long sigreturn_msr_mask; + +static void sigusr1_handler(int signo, siginfo_t *si, void *uc_ptr) +{ + ucontext_t *uc = (ucontext_t *)uc_ptr; + + if (sigreturn_addr) + UCONTEXT_NIA(uc) = sigreturn_addr; + + if (sigreturn_msr_mask) + UCONTEXT_MSR(uc) &= sigreturn_msr_mask; +} + +static pid_t fork_child(void) +{ + pid_t pid; + + pid = fork(); + if (pid == 0) { + raise(SIGUSR1); + exit(0); + } + + return pid; +} + +static int expect_segv(pid_t pid) +{ + int child_ret; + + waitpid(pid, &child_ret, 0); + FAIL_IF(WIFEXITED(child_ret)); + FAIL_IF(!WIFSIGNALED(child_ret)); + FAIL_IF(WTERMSIG(child_ret) != 11); + + return 0; +} + +int test_sigreturn_kernel(void) +{ + struct sigaction act; + int child_ret, i; + pid_t pid; + + act.sa_sigaction = sigusr1_handler; + act.sa_flags = SA_SIGINFO; + sigemptyset(&act.sa_mask); + + FAIL_IF(sigaction(SIGUSR1, &act, NULL)); + + for (i = 0; i < 2; i++) { + // Return to kernel + sigreturn_addr = 0xcull << 60; + pid = fork_child(); + expect_segv(pid); + + // Return to kernel virtual + sigreturn_addr = 0xc008ull << 48; + pid = fork_child(); + expect_segv(pid); + + // Return out of range + sigreturn_addr = 0xc010ull << 48; + pid = fork_child(); + expect_segv(pid); + + // Return to no-man's land, just below PAGE_OFFSET + sigreturn_addr = (0xcull << 60) - (64 * 1024); + pid = fork_child(); + expect_segv(pid); + + // Return to no-man's land, above TASK_SIZE_4PB + sigreturn_addr = 0x1ull << 52; + pid = fork_child(); + expect_segv(pid); + + // Return to 0xd space + sigreturn_addr = 0xdull << 60; + pid = fork_child(); + expect_segv(pid); + + // Return to 0xe space + sigreturn_addr = 0xeull << 60; + pid = fork_child(); + expect_segv(pid); + + // Return to 0xf space + sigreturn_addr = 0xfull << 60; + pid = fork_child(); + expect_segv(pid); + + // Attempt to set PR=0 for 2nd loop (should be blocked by kernel) + sigreturn_msr_mask = ~MSR_PR; + } + + printf("All children killed as expected\n"); + + // Don't change address, just MSR, should return to user as normal + sigreturn_addr = 0; + sigreturn_msr_mask = ~MSR_PR; + pid = fork_child(); + waitpid(pid, &child_ret, 0); + FAIL_IF(!WIFEXITED(child_ret)); + FAIL_IF(WIFSIGNALED(child_ret)); + FAIL_IF(WEXITSTATUS(child_ret) != 0); + + return 0; +} + +int main(void) +{ + return test_harness(test_sigreturn_kernel, "sigreturn_kernel"); +} diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c b/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c new file mode 100644 index 000000000000..6e58ee4f0fdf --- /dev/null +++ b/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test sigreturn to an unaligned address, ie. low 2 bits set. + * Nothing bad should happen. + * This was able to trigger warnings with CONFIG_PPC_RFI_SRR_DEBUG=y. + */ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ucontext.h> +#include <unistd.h> + +#include "utils.h" + + +static void sigusr1_handler(int signo, siginfo_t *info, void *ptr) +{ + ucontext_t *uc = ptr; + + UCONTEXT_NIA(uc) |= 3; +} + +static int test_sigreturn_unaligned(void) +{ + struct sigaction action; + + memset(&action, 0, sizeof(action)); + action.sa_sigaction = sigusr1_handler; + action.sa_flags = SA_SIGINFO; + + FAIL_IF(sigaction(SIGUSR1, &action, NULL) == -1); + + raise(SIGUSR1); + + return 0; +} + +int main(void) +{ + return test_harness(test_sigreturn_unaligned, "sigreturn_unaligned"); +} |