From fda0440d82d38e603cd713e997fdfedb97e0ef0c Mon Sep 17 00:00:00 2001 From: Rui Teng Date: Thu, 25 Aug 2016 14:31:10 +0800 Subject: powerpc: Remove suspect CONFIG_PPC_BOOK3E #ifdefs in nohash/64/pgtable.h There are three #ifdef CONFIG_PPC_BOOK3E sections in nohash/64/pgtable.h. And there should be no configurations possible which use nohash/64/pgtable.h but don't also enable CONFIG_PPC_BOOK3E. Suggested-by: Michael Ellerman Signed-off-by: Rui Teng Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/64/pgtable.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 653a1838469d..e9e540a804fc 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -26,15 +26,11 @@ #else #define PMD_CACHE_INDEX PMD_INDEX_SIZE #endif + /* * Define the address range of the kernel non-linear virtual area */ - -#ifdef CONFIG_PPC_BOOK3E #define KERN_VIRT_START ASM_CONST(0x8000000000000000) -#else -#define KERN_VIRT_START ASM_CONST(0xD000000000000000) -#endif #define KERN_VIRT_SIZE ASM_CONST(0x0000100000000000) /* @@ -43,11 +39,7 @@ * (we keep a quarter for the virtual memmap) */ #define VMALLOC_START KERN_VIRT_START -#ifdef CONFIG_PPC_BOOK3E #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 2) -#else -#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) -#endif #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) /* @@ -85,12 +77,8 @@ * Defines the address of the vmemap area, in its own region on * hash table CPUs and after the vmalloc space on Book3E */ -#ifdef CONFIG_PPC_BOOK3E #define VMEMMAP_BASE VMALLOC_END #define VMEMMAP_END KERN_IO_START -#else -#define VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT) -#endif #define vmemmap ((struct page *)VMEMMAP_BASE) -- cgit v1.3.1 From f4329f2ecb149282fdfdd8830a936a56b1497a05 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 13 Oct 2016 14:43:52 +1100 Subject: powerpc/64s: Reduce exception alignment Exception handlers are aligned to 128 bytes (L1 cache) on 64s, which is overkill. It can reduce the icache footprint of any individual exception path. However taken as a whole, the expansion in icache footprint seems likely to be counter-productive and cause more total misses. Create IFETCH_ALIGN_SHIFT/BYTES, which should give optimal ifetch alignment with much more reasonable alignment. This saves 1792 bytes from head_64.o text with an allmodconfig build. Other subarchitectures should define appropriate IFETCH_ALIGN_SHIFT values if this becomes more widely used. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cache.h | 3 +++ arch/powerpc/include/asm/head-64.h | 8 ++++---- arch/powerpc/kernel/exceptions-64s.S | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index ffbafbf76b19..7657aa897a38 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -20,12 +20,15 @@ #endif #else /* CONFIG_PPC64 */ #define L1_CACHE_SHIFT 7 +#define IFETCH_ALIGN_SHIFT 4 /* POWER8,9 */ #endif #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) #define SMP_CACHE_BYTES L1_CACHE_BYTES +#define IFETCH_ALIGN_BYTES (1 << IFETCH_ALIGN_SHIFT) + #if defined(__powerpc64__) && !defined(__ASSEMBLY__) struct ppc64_caches { u32 dsize; /* L1 d-cache size */ diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index ab90c2fa1ea6..fca7033839a9 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -95,12 +95,12 @@ end_##sname: #define __FIXED_SECTION_ENTRY_BEGIN(sname, name, __align) \ USE_FIXED_SECTION(sname); \ - .align __align; \ + .balign __align; \ .global name; \ name: #define FIXED_SECTION_ENTRY_BEGIN(sname, name) \ - __FIXED_SECTION_ENTRY_BEGIN(sname, name, 0) + __FIXED_SECTION_ENTRY_BEGIN(sname, name, IFETCH_ALIGN_BYTES) #define FIXED_SECTION_ENTRY_BEGIN_LOCATION(sname, name, start) \ USE_FIXED_SECTION(sname); \ @@ -203,9 +203,9 @@ name: #define EXC_VIRT_END(name, start, end) \ FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, end) -#define EXC_COMMON_BEGIN(name) \ +#define EXC_COMMON_BEGIN(name) \ USE_TEXT_SECTION(); \ - .align 7; \ + .balign IFETCH_ALIGN_BYTES; \ .global name; \ DEFINE_FIXED_SYMBOL(name); \ name: diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 08ba447a4b3d..50f0c5ab7e02 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1403,7 +1403,7 @@ USE_TEXT_SECTION() /* * Hash table stuff */ - .align 7 + .balign IFETCH_ALIGN_BYTES do_hash_page: #ifdef CONFIG_PPC_STD_MMU_64 andis. r0,r4,0xa410 /* weird error? */ -- cgit v1.3.1 From e3f2c6c39371c0e67c7fd6ebcb8b74ca27ce95d7 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 14 Oct 2016 15:58:28 +1100 Subject: powerpc/asm: Allow including ppc_asm.h in asm files There's no reason to #error if we include ppc_asm.h in asm files, the ifdef already prevents any problems. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc_asm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index c73750b0d9fa..28ab87e5b739 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -10,9 +10,7 @@ #include #include -#ifndef __ASSEMBLY__ -#error __FILE__ should only be used in assembler files -#else +#ifdef __ASSEMBLY__ #define SZL (BITS_PER_LONG/8) @@ -779,5 +777,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601) .long 0xa6037b7d; /* mtsrr1 r11 */ \ .long 0x2400004c /* rfid */ #endif /* !CONFIG_PPC_BOOK3E */ + #endif /* __ASSEMBLY__ */ + #endif /* _ASM_POWERPC_PPC_ASM_H */ -- cgit v1.3.1 From 24bfa6a9e0d4fe414dfc4ad06c93e10c4c37194e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 13 Oct 2016 16:42:53 +1100 Subject: powerpc: EX_TABLE macro for exception tables This macro is taken from s390, and allows more flexibility in changing exception table format. mpe: Put it in ppc_asm.h and only define one version using stringinfy_in_c(). Add some empty definitions and headers to keep the selftests happy. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/futex.h | 14 +- arch/powerpc/include/asm/io.h | 19 +- arch/powerpc/include/asm/ppc_asm.h | 10 + arch/powerpc/include/asm/uaccess.h | 25 +- arch/powerpc/include/asm/word-at-a-time.h | 6 +- arch/powerpc/lib/checksum_32.S | 47 ++-- arch/powerpc/lib/checksum_64.S | 20 +- arch/powerpc/lib/copy_32.S | 55 ++--- arch/powerpc/lib/copyuser_64.S | 271 ++++++++++----------- arch/powerpc/lib/copyuser_power7.S | 20 +- arch/powerpc/lib/ldstfp.S | 24 +- arch/powerpc/lib/sstep.c | 15 +- arch/powerpc/lib/string.S | 11 +- arch/powerpc/lib/string_64.S | 16 +- arch/powerpc/sysdev/fsl_rio.c | 6 +- arch/powerpc/sysdev/tsi108_pci.c | 6 +- .../selftests/powerpc/copyloops/asm/ppc_asm.h | 2 + .../selftests/powerpc/primitives/asm/firmware.h | 0 .../selftests/powerpc/primitives/asm/ppc_asm.h | 1 + .../selftests/powerpc/primitives/asm/processor.h | 0 .../selftests/powerpc/primitives/linux/stringify.h | 0 21 files changed, 241 insertions(+), 327 deletions(-) create mode 100644 tools/testing/selftests/powerpc/primitives/asm/firmware.h create mode 120000 tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h create mode 100644 tools/testing/selftests/powerpc/primitives/asm/processor.h create mode 100644 tools/testing/selftests/powerpc/primitives/linux/stringify.h (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index 2a9cf845473b..eaada6c92344 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -23,10 +23,8 @@ "4: li %1,%3\n" \ "b 3b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - ".align 3\n" \ - PPC_LONG "1b,4b,2b,4b\n" \ - ".previous" \ + EX_TABLE(1b, 4b) \ + EX_TABLE(2b, 4b) \ : "=&r" (oldval), "=&r" (ret) \ : "b" (uaddr), "i" (-EFAULT), "r" (oparg) \ : "cr0", "memory") @@ -104,11 +102,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, "3: .section .fixup,\"ax\"\n\ 4: li %0,%6\n\ b 3b\n\ - .previous\n\ - .section __ex_table,\"a\"\n\ - .align 3\n\ - " PPC_LONG "1b,4b,2b,4b\n\ - .previous" \ + .previous\n" + EX_TABLE(1b, 4b) + EX_TABLE(2b, 4b) : "+r" (ret), "=&r" (prev), "+m" (*uaddr) : "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT) : "cc", "memory"); diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index f6fda8482f60..5ed292431b5b 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -33,6 +33,7 @@ extern struct pci_dev *isa_bridge_pcidev; #include #include #include +#include #include @@ -458,13 +459,10 @@ static inline unsigned int name(unsigned int port) \ "5: li %0,-1\n" \ " b 4b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 2\n" \ - " .long 0b,5b\n" \ - " .long 1b,5b\n" \ - " .long 2b,5b\n" \ - " .long 3b,5b\n" \ - ".previous" \ + EX_TABLE(0b, 5b) \ + EX_TABLE(1b, 5b) \ + EX_TABLE(2b, 5b) \ + EX_TABLE(3b, 5b) \ : "=&r" (x) \ : "r" (port + _IO_BASE) \ : "memory"); \ @@ -479,11 +477,8 @@ static inline void name(unsigned int val, unsigned int port) \ "0:" op " %0,0,%1\n" \ "1: sync\n" \ "2:\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 2\n" \ - " .long 0b,2b\n" \ - " .long 1b,2b\n" \ - ".previous" \ + EX_TABLE(0b, 2b) \ + EX_TABLE(1b, 2b) \ : : "r" (val), "r" (port + _IO_BASE) \ : "memory"); \ } diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 28ab87e5b739..6af8852d1f7f 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -780,4 +780,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601) #endif /* __ASSEMBLY__ */ +/* + * Helper macro for exception table entries + */ +#define EX_TABLE(_fault, _target) \ + stringify_in_c(.section __ex_table,"a";)\ + PPC_LONG_ALIGN stringify_in_c(;) \ + PPC_LONG stringify_in_c(_fault;) \ + PPC_LONG stringify_in_c(_target;) \ + stringify_in_c(.previous) + #endif /* _ASM_POWERPC_PPC_ASM_H */ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index c266227fdd5b..e0b724619c4a 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -132,10 +133,7 @@ extern long __put_user_bad(void); "3: li %0,%3\n" \ " b 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".previous" \ + EX_TABLE(1b, 3b) \ : "=r" (err) \ : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err)) @@ -152,11 +150,8 @@ extern long __put_user_bad(void); "4: li %0,%3\n" \ " b 3b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,4b\n" \ - PPC_LONG "2b,4b\n" \ - ".previous" \ + EX_TABLE(1b, 4b) \ + EX_TABLE(2b, 4b) \ : "=r" (err) \ : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err)) #endif /* __powerpc64__ */ @@ -215,10 +210,7 @@ extern long __get_user_bad(void); " li %1,0\n" \ " b 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".previous" \ + EX_TABLE(1b, 3b) \ : "=r" (err), "=r" (x) \ : "b" (addr), "i" (-EFAULT), "0" (err)) @@ -237,11 +229,8 @@ extern long __get_user_bad(void); " li %1+1,0\n" \ " b 3b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,4b\n" \ - PPC_LONG "2b,4b\n" \ - ".previous" \ + EX_TABLE(1b, 4b) \ + EX_TABLE(2b, 4b) \ : "=r" (err), "=&r" (x) \ : "b" (addr), "i" (-EFAULT), "0" (err)) #endif /* __powerpc64__ */ diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h index 4afe66aa1400..f3f4710d4ff5 100644 --- a/arch/powerpc/include/asm/word-at-a-time.h +++ b/arch/powerpc/include/asm/word-at-a-time.h @@ -7,6 +7,7 @@ #include #include +#include #ifdef __BIG_ENDIAN__ @@ -193,10 +194,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) #endif "b 2b\n" ".previous\n" - ".section __ex_table,\"a\"\n\t" - PPC_LONG_ALIGN "\n\t" - PPC_LONG "1b,3b\n" - ".previous" + EX_TABLE(1b, 3b) : [tmp] "=&b" (tmp), [offset] "=&r" (offset), [ret] "=&r" (ret) : [addr] "b" (addr), "m" (*(unsigned long *)addr)); diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index ea29a5d67743..9a671c774b22 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -103,17 +103,14 @@ EXPORT_SYMBOL(__csum_partial) adde r12,r12,r10 #define CSUM_COPY_16_BYTES_EXCODE(n) \ -.section __ex_table,"a"; \ - .align 2; \ - .long 8 ## n ## 0b,src_error; \ - .long 8 ## n ## 1b,src_error; \ - .long 8 ## n ## 2b,src_error; \ - .long 8 ## n ## 3b,src_error; \ - .long 8 ## n ## 4b,dst_error; \ - .long 8 ## n ## 5b,dst_error; \ - .long 8 ## n ## 6b,dst_error; \ - .long 8 ## n ## 7b,dst_error; \ - .text + EX_TABLE(8 ## n ## 0b, src_error); \ + EX_TABLE(8 ## n ## 1b, src_error); \ + EX_TABLE(8 ## n ## 2b, src_error); \ + EX_TABLE(8 ## n ## 3b, src_error); \ + EX_TABLE(8 ## n ## 4b, dst_error); \ + EX_TABLE(8 ## n ## 5b, dst_error); \ + EX_TABLE(8 ## n ## 6b, dst_error); \ + EX_TABLE(8 ## n ## 7b, dst_error); .text .stabs "arch/powerpc/lib/",N_SO,0,0,0f @@ -263,14 +260,11 @@ dst_error: stw r0,0(r8) blr - .section __ex_table,"a" - .align 2 - .long 70b,src_error - .long 71b,dst_error - .long 72b,src_error - .long 73b,dst_error - .long 54b,dst_error - .text + EX_TABLE(70b, src_error); + EX_TABLE(71b, dst_error); + EX_TABLE(72b, src_error); + EX_TABLE(73b, dst_error); + EX_TABLE(54b, dst_error); /* * this stuff handles faults in the cacheline loop and branches to either @@ -291,12 +285,11 @@ dst_error: #endif #endif - .section __ex_table,"a" - .align 2 - .long 30b,src_error - .long 31b,dst_error - .long 40b,src_error - .long 41b,dst_error - .long 50b,src_error - .long 51b,dst_error + EX_TABLE(30b, src_error); + EX_TABLE(31b, dst_error); + EX_TABLE(40b, src_error); + EX_TABLE(41b, dst_error); + EX_TABLE(50b, src_error); + EX_TABLE(51b, dst_error); + EXPORT_SYMBOL(csum_partial_copy_generic) diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index fd9176671f9f..d0d311e108ff 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -182,34 +182,22 @@ EXPORT_SYMBOL(__csum_partial) .macro srcnr 100: - .section __ex_table,"a" - .align 3 - .llong 100b,.Lsrc_error_nr - .previous + EX_TABLE(100b,.Lsrc_error_nr) .endm .macro source 150: - .section __ex_table,"a" - .align 3 - .llong 150b,.Lsrc_error - .previous + EX_TABLE(150b,.Lsrc_error) .endm .macro dstnr 200: - .section __ex_table,"a" - .align 3 - .llong 200b,.Ldest_error_nr - .previous + EX_TABLE(200b,.Ldest_error_nr) .endm .macro dest 250: - .section __ex_table,"a" - .align 3 - .llong 250b,.Ldest_error - .previous + EX_TABLE(250b,.Ldest_error) .endm /* diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 40cce33b08d6..ff0d894d7ff9 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -49,17 +49,14 @@ 9 ## n ## 1: \ addi r5,r5,-(16 * n); \ b 105f; \ -.section __ex_table,"a"; \ - .align 2; \ - .long 8 ## n ## 0b,9 ## n ## 0b; \ - .long 8 ## n ## 1b,9 ## n ## 0b; \ - .long 8 ## n ## 2b,9 ## n ## 0b; \ - .long 8 ## n ## 3b,9 ## n ## 0b; \ - .long 8 ## n ## 4b,9 ## n ## 1b; \ - .long 8 ## n ## 5b,9 ## n ## 1b; \ - .long 8 ## n ## 6b,9 ## n ## 1b; \ - .long 8 ## n ## 7b,9 ## n ## 1b; \ - .text + EX_TABLE(8 ## n ## 0b,9 ## n ## 0b); \ + EX_TABLE(8 ## n ## 1b,9 ## n ## 0b); \ + EX_TABLE(8 ## n ## 2b,9 ## n ## 0b); \ + EX_TABLE(8 ## n ## 3b,9 ## n ## 0b); \ + EX_TABLE(8 ## n ## 4b,9 ## n ## 1b); \ + EX_TABLE(8 ## n ## 5b,9 ## n ## 1b); \ + EX_TABLE(8 ## n ## 6b,9 ## n ## 1b); \ + EX_TABLE(8 ## n ## 7b,9 ## n ## 1b) .text .stabs "arch/powerpc/lib/",N_SO,0,0,0f @@ -323,13 +320,10 @@ _GLOBAL(__copy_tofrom_user) 73: stwu r9,4(r6) bdnz 72b - .section __ex_table,"a" - .align 2 - .long 70b,100f - .long 71b,101f - .long 72b,102f - .long 73b,103f - .text + EX_TABLE(70b,100f) + EX_TABLE(71b,101f) + EX_TABLE(72b,102f) + EX_TABLE(73b,103f) 58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ clrlwi r5,r5,32-LG_CACHELINE_BYTES @@ -364,10 +358,7 @@ _GLOBAL(__copy_tofrom_user) 53: dcbt r3,r4 54: dcbz r11,r6 - .section __ex_table,"a" - .align 2 - .long 54b,105f - .text + EX_TABLE(54b,105f) /* the main body of the cacheline loop */ COPY_16_BYTES_WITHEX(0) #if L1_CACHE_BYTES >= 32 @@ -500,15 +491,13 @@ _GLOBAL(__copy_tofrom_user) bdnz 114b 120: blr - .section __ex_table,"a" - .align 2 - .long 30b,108b - .long 31b,109b - .long 40b,110b - .long 41b,111b - .long 130b,132b - .long 131b,120b - .long 112b,120b - .long 114b,120b - .text + EX_TABLE(30b,108b) + EX_TABLE(31b,109b) + EX_TABLE(40b,110b) + EX_TABLE(41b,111b) + EX_TABLE(130b,132b) + EX_TABLE(131b,120b) + EX_TABLE(112b,120b) + EX_TABLE(114b,120b) + EXPORT_SYMBOL(__copy_tofrom_user) diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index 60386b2c99bb..aee6e24e81ab 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -394,70 +394,66 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 192: blr /* #bytes not copied in r3 */ - .section __ex_table,"a" - .align 3 - .llong 20b,120b - .llong 220b,320b - .llong 21b,121b - .llong 221b,321b - .llong 70b,170b - .llong 270b,370b - .llong 22b,122b - .llong 222b,322b - .llong 71b,171b - .llong 271b,371b - .llong 72b,172b - .llong 272b,372b - .llong 244b,344b - .llong 245b,345b - .llong 23b,123b - .llong 73b,173b - .llong 44b,144b - .llong 74b,174b - .llong 45b,145b - .llong 75b,175b - .llong 24b,124b - .llong 25b,125b - .llong 26b,126b - .llong 27b,127b - .llong 28b,128b - .llong 29b,129b - .llong 30b,130b - .llong 31b,131b - .llong 32b,132b - .llong 76b,176b - .llong 33b,133b - .llong 77b,177b - .llong 78b,178b - .llong 79b,179b - .llong 80b,180b - .llong 34b,134b - .llong 94b,194b - .llong 95b,195b - .llong 96b,196b - .llong 35b,135b - .llong 81b,181b - .llong 36b,136b - .llong 82b,182b - .llong 37b,137b - .llong 83b,183b - .llong 38b,138b - .llong 39b,139b - .llong 84b,184b - .llong 85b,185b - .llong 40b,140b - .llong 86b,186b - .llong 41b,141b - .llong 87b,187b - .llong 42b,142b - .llong 88b,188b - .llong 43b,143b - .llong 89b,189b - .llong 90b,190b - .llong 91b,191b - .llong 92b,192b - - .text + EX_TABLE(20b,120b) + EX_TABLE(220b,320b) + EX_TABLE(21b,121b) + EX_TABLE(221b,321b) + EX_TABLE(70b,170b) + EX_TABLE(270b,370b) + EX_TABLE(22b,122b) + EX_TABLE(222b,322b) + EX_TABLE(71b,171b) + EX_TABLE(271b,371b) + EX_TABLE(72b,172b) + EX_TABLE(272b,372b) + EX_TABLE(244b,344b) + EX_TABLE(245b,345b) + EX_TABLE(23b,123b) + EX_TABLE(73b,173b) + EX_TABLE(44b,144b) + EX_TABLE(74b,174b) + EX_TABLE(45b,145b) + EX_TABLE(75b,175b) + EX_TABLE(24b,124b) + EX_TABLE(25b,125b) + EX_TABLE(26b,126b) + EX_TABLE(27b,127b) + EX_TABLE(28b,128b) + EX_TABLE(29b,129b) + EX_TABLE(30b,130b) + EX_TABLE(31b,131b) + EX_TABLE(32b,132b) + EX_TABLE(76b,176b) + EX_TABLE(33b,133b) + EX_TABLE(77b,177b) + EX_TABLE(78b,178b) + EX_TABLE(79b,179b) + EX_TABLE(80b,180b) + EX_TABLE(34b,134b) + EX_TABLE(94b,194b) + EX_TABLE(95b,195b) + EX_TABLE(96b,196b) + EX_TABLE(35b,135b) + EX_TABLE(81b,181b) + EX_TABLE(36b,136b) + EX_TABLE(82b,182b) + EX_TABLE(37b,137b) + EX_TABLE(83b,183b) + EX_TABLE(38b,138b) + EX_TABLE(39b,139b) + EX_TABLE(84b,184b) + EX_TABLE(85b,185b) + EX_TABLE(40b,140b) + EX_TABLE(86b,186b) + EX_TABLE(41b,141b) + EX_TABLE(87b,187b) + EX_TABLE(42b,142b) + EX_TABLE(88b,188b) + EX_TABLE(43b,143b) + EX_TABLE(89b,189b) + EX_TABLE(90b,190b) + EX_TABLE(91b,191b) + EX_TABLE(92b,192b) /* * Routine to copy a whole page of data, optimized for POWER4. @@ -598,78 +594,77 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) li r5,4096 b .Ldst_aligned - .section __ex_table,"a" - .align 3 - .llong 20b,100b - .llong 21b,100b - .llong 22b,100b - .llong 23b,100b - .llong 24b,100b - .llong 25b,100b - .llong 26b,100b - .llong 27b,100b - .llong 28b,100b - .llong 29b,100b - .llong 30b,100b - .llong 31b,100b - .llong 32b,100b - .llong 33b,100b - .llong 34b,100b - .llong 35b,100b - .llong 36b,100b - .llong 37b,100b - .llong 38b,100b - .llong 39b,100b - .llong 40b,100b - .llong 41b,100b - .llong 42b,100b - .llong 43b,100b - .llong 44b,100b - .llong 45b,100b - .llong 46b,100b - .llong 47b,100b - .llong 48b,100b - .llong 49b,100b - .llong 50b,100b - .llong 51b,100b - .llong 52b,100b - .llong 53b,100b - .llong 54b,100b - .llong 55b,100b - .llong 56b,100b - .llong 57b,100b - .llong 58b,100b - .llong 59b,100b - .llong 60b,100b - .llong 61b,100b - .llong 62b,100b - .llong 63b,100b - .llong 64b,100b - .llong 65b,100b - .llong 66b,100b - .llong 67b,100b - .llong 68b,100b - .llong 69b,100b - .llong 70b,100b - .llong 71b,100b - .llong 72b,100b - .llong 73b,100b - .llong 74b,100b - .llong 75b,100b - .llong 76b,100b - .llong 77b,100b - .llong 78b,100b - .llong 79b,100b - .llong 80b,100b - .llong 81b,100b - .llong 82b,100b - .llong 83b,100b - .llong 84b,100b - .llong 85b,100b - .llong 86b,100b - .llong 87b,100b - .llong 88b,100b - .llong 89b,100b - .llong 90b,100b - .llong 91b,100b + EX_TABLE(20b,100b) + EX_TABLE(21b,100b) + EX_TABLE(22b,100b) + EX_TABLE(23b,100b) + EX_TABLE(24b,100b) + EX_TABLE(25b,100b) + EX_TABLE(26b,100b) + EX_TABLE(27b,100b) + EX_TABLE(28b,100b) + EX_TABLE(29b,100b) + EX_TABLE(30b,100b) + EX_TABLE(31b,100b) + EX_TABLE(32b,100b) + EX_TABLE(33b,100b) + EX_TABLE(34b,100b) + EX_TABLE(35b,100b) + EX_TABLE(36b,100b) + EX_TABLE(37b,100b) + EX_TABLE(38b,100b) + EX_TABLE(39b,100b) + EX_TABLE(40b,100b) + EX_TABLE(41b,100b) + EX_TABLE(42b,100b) + EX_TABLE(43b,100b) + EX_TABLE(44b,100b) + EX_TABLE(45b,100b) + EX_TABLE(46b,100b) + EX_TABLE(47b,100b) + EX_TABLE(48b,100b) + EX_TABLE(49b,100b) + EX_TABLE(50b,100b) + EX_TABLE(51b,100b) + EX_TABLE(52b,100b) + EX_TABLE(53b,100b) + EX_TABLE(54b,100b) + EX_TABLE(55b,100b) + EX_TABLE(56b,100b) + EX_TABLE(57b,100b) + EX_TABLE(58b,100b) + EX_TABLE(59b,100b) + EX_TABLE(60b,100b) + EX_TABLE(61b,100b) + EX_TABLE(62b,100b) + EX_TABLE(63b,100b) + EX_TABLE(64b,100b) + EX_TABLE(65b,100b) + EX_TABLE(66b,100b) + EX_TABLE(67b,100b) + EX_TABLE(68b,100b) + EX_TABLE(69b,100b) + EX_TABLE(70b,100b) + EX_TABLE(71b,100b) + EX_TABLE(72b,100b) + EX_TABLE(73b,100b) + EX_TABLE(74b,100b) + EX_TABLE(75b,100b) + EX_TABLE(76b,100b) + EX_TABLE(77b,100b) + EX_TABLE(78b,100b) + EX_TABLE(79b,100b) + EX_TABLE(80b,100b) + EX_TABLE(81b,100b) + EX_TABLE(82b,100b) + EX_TABLE(83b,100b) + EX_TABLE(84b,100b) + EX_TABLE(85b,100b) + EX_TABLE(86b,100b) + EX_TABLE(87b,100b) + EX_TABLE(88b,100b) + EX_TABLE(89b,100b) + EX_TABLE(90b,100b) + EX_TABLE(91b,100b) + EXPORT_SYMBOL(__copy_tofrom_user) diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index da0c568d18c4..a24b4039352c 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -29,35 +29,23 @@ .macro err1 100: - .section __ex_table,"a" - .align 3 - .llong 100b,.Ldo_err1 - .previous + EX_TABLE(100b,.Ldo_err1) .endm .macro err2 200: - .section __ex_table,"a" - .align 3 - .llong 200b,.Ldo_err2 - .previous + EX_TABLE(200b,.Ldo_err2) .endm #ifdef CONFIG_ALTIVEC .macro err3 300: - .section __ex_table,"a" - .align 3 - .llong 300b,.Ldo_err3 - .previous + EX_TABLE(300b,.Ldo_err3) .endm .macro err4 400: - .section __ex_table,"a" - .align 3 - .llong 400b,.Ldo_err4 - .previous + EX_TABLE(400b,.Ldo_err4) .endm diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S index 5d0cdbfbe3f2..a58777c1b2cb 100644 --- a/arch/powerpc/lib/ldstfp.S +++ b/arch/powerpc/lib/ldstfp.S @@ -21,18 +21,12 @@ #define STKFRM (PPC_MIN_STKFRM + 16) - .macro extab instr,handler - .section __ex_table,"a" - PPC_LONG \instr,\handler - .previous - .endm - .macro inst32 op reg = 0 .rept 32 20: \op reg,0,r4 b 3f - extab 20b,99f + EX_TABLE(20b,99f) reg = reg + 1 .endr .endm @@ -100,7 +94,7 @@ _GLOBAL(do_lfs) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) /* Load FP reg N from double at *p. N is in r3, p in r4. */ _GLOBAL(do_lfd) @@ -127,7 +121,7 @@ _GLOBAL(do_lfd) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) /* Store FP reg N to float at *p. N is in r3, p in r4. */ _GLOBAL(do_stfs) @@ -154,7 +148,7 @@ _GLOBAL(do_stfs) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) /* Store FP reg N to double at *p. N is in r3, p in r4. */ _GLOBAL(do_stfd) @@ -181,7 +175,7 @@ _GLOBAL(do_stfd) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) #ifdef CONFIG_ALTIVEC /* Get the contents of vrN into v0; N is in r3. */ @@ -248,7 +242,7 @@ _GLOBAL(do_lvx) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) /* Store vector reg N to *p. N is in r3, p in r4. */ _GLOBAL(do_stvx) @@ -276,7 +270,7 @@ _GLOBAL(do_stvx) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX @@ -344,7 +338,7 @@ _GLOBAL(do_lxvd2x) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) /* Store VSX reg N to vector doubleword *p. N is in r3, p in r4. */ _GLOBAL(do_stxvd2x) @@ -372,7 +366,7 @@ _GLOBAL(do_stxvd2x) mr r3,r9 addi r1,r1,STKFRM blr - extab 2b,3b + EX_TABLE(2b,3b) #endif /* CONFIG_VSX */ diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 3362299b1859..b64287c6793f 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -493,10 +493,7 @@ static int __kprobes do_vsx_store(int rn, int (*func)(int, unsigned long), "3: li %0,%4\n" \ " b 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".previous" \ + EX_TABLE(1b, 3b) \ : "=r" (err), "=r" (cr) \ : "r" (x), "r" (addr), "i" (-EFAULT), "0" (err)) @@ -508,10 +505,7 @@ static int __kprobes do_vsx_store(int rn, int (*func)(int, unsigned long), "3: li %0,%3\n" \ " b 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".previous" \ + EX_TABLE(1b, 3b) \ : "=r" (err), "=r" (x) \ : "r" (addr), "i" (-EFAULT), "0" (err)) @@ -523,10 +517,7 @@ static int __kprobes do_vsx_store(int rn, int (*func)(int, unsigned long), "3: li %0,%3\n" \ " b 2b\n" \ ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".previous" \ + EX_TABLE(1b, 3b) \ : "=r" (err) \ : "r" (addr), "i" (-EFAULT), "0" (err)) diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S index d13e07603519..a787776822d8 100644 --- a/arch/powerpc/lib/string.S +++ b/arch/powerpc/lib/string.S @@ -13,8 +13,6 @@ #include #include - .section __ex_table,"a" - PPC_LONG_ALIGN .text /* This clears out any unused part of the destination buffer, @@ -125,10 +123,9 @@ _GLOBAL(__clear_user) 92: mfctr r3 blr - .section __ex_table,"a" - PPC_LONG 11b,90b - PPC_LONG 1b,91b - PPC_LONG 8b,92b - .text + EX_TABLE(11b, 90b) + EX_TABLE(1b, 91b) + EX_TABLE(8b, 92b) + EXPORT_SYMBOL(__clear_user) #endif diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S index 57ace356c949..c100f4d5d5d0 100644 --- a/arch/powerpc/lib/string_64.S +++ b/arch/powerpc/lib/string_64.S @@ -19,6 +19,7 @@ */ #include +#include #include #include @@ -41,26 +42,17 @@ PPC64_CACHES: .macro err1 100: - .section __ex_table,"a" - .align 3 - .llong 100b,.Ldo_err1 - .previous + EX_TABLE(100b,.Ldo_err1) .endm .macro err2 200: - .section __ex_table,"a" - .align 3 - .llong 200b,.Ldo_err2 - .previous + EX_TABLE(200b,.Ldo_err2) .endm .macro err3 300: - .section __ex_table,"a" - .align 3 - .llong 300b,.Ldo_err3 - .previous + EX_TABLE(300b,.Ldo_err3) .endm .Ldo_err1: diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 3cc7cace194a..87fee0c8eb21 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -80,10 +80,8 @@ "3: li %1,-1\n" \ " li %0,%3\n" \ " b 2b\n" \ - ".section __ex_table,\"a\"\n" \ - PPC_LONG_ALIGN "\n" \ - PPC_LONG "1b,3b\n" \ - ".text" \ + ".previous\n" \ + EX_TABLE(1b, 3b) \ : "=r" (err), "=r" (x) \ : "b" (addr), "i" (-EFAULT), "0" (err)) diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c index 57c971b7839c..53a16aa4d384 100644 --- a/arch/powerpc/sysdev/tsi108_pci.c +++ b/arch/powerpc/sysdev/tsi108_pci.c @@ -137,10 +137,8 @@ void tsi108_clear_pci_error(u32 pci_cfg_base) ".section .fixup,\"ax\"\n" \ "3: li %0,-1\n" \ " b 2b\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 2\n" \ - " .long 1b,3b\n" \ - ".text" \ + ".previous\n" \ + EX_TABLE(1b, 3b) \ : "=r"(x) : "r"(addr)) int diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h index 50ae7d2091ce..80d34a9ffff4 100644 --- a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h +++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h @@ -25,6 +25,8 @@ #define PPC_MTOCRF(A, B) mtocrf A, B +#define EX_TABLE(x, y) + FUNC_START(enter_vmx_usercopy) li r3,1 blr diff --git a/tools/testing/selftests/powerpc/primitives/asm/firmware.h b/tools/testing/selftests/powerpc/primitives/asm/firmware.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h b/tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h new file mode 120000 index 000000000000..66c8193224e9 --- /dev/null +++ b/tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h @@ -0,0 +1 @@ +../../../../../../arch/powerpc/include/asm/ppc_asm.h \ No newline at end of file diff --git a/tools/testing/selftests/powerpc/primitives/asm/processor.h b/tools/testing/selftests/powerpc/primitives/asm/processor.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/testing/selftests/powerpc/primitives/linux/stringify.h b/tools/testing/selftests/powerpc/primitives/linux/stringify.h new file mode 100644 index 000000000000..e69de29bb2d1 -- cgit v1.3.1 From 61a92f703120daf7ed25e046275aa8a2d3085ad4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 14 Oct 2016 16:47:31 +1100 Subject: powerpc: Add support for relative exception tables This halves the exception table size on 64-bit builds, and it allows build-time sorting of exception tables to work on relocated kernels. Signed-off-by: Nicholas Piggin [mpe: Minor asm fixups and bits to keep the selftests working] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc_asm.h | 6 ++--- arch/powerpc/include/asm/uaccess.h | 27 ++++++++++++++-------- arch/powerpc/kernel/kprobes.c | 2 +- arch/powerpc/kernel/traps.c | 2 +- arch/powerpc/mm/fault.c | 2 +- arch/powerpc/platforms/embedded6xx/holly.c | 2 +- arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c | 2 +- arch/powerpc/sysdev/fsl_rio.c | 2 +- .../powerpc/primitives/load_unaligned_zeropad.c | 12 ++++++---- 9 files changed, 34 insertions(+), 23 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 6af8852d1f7f..bf9de5575ca9 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -785,9 +785,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601) */ #define EX_TABLE(_fault, _target) \ stringify_in_c(.section __ex_table,"a";)\ - PPC_LONG_ALIGN stringify_in_c(;) \ - PPC_LONG stringify_in_c(_fault;) \ - PPC_LONG stringify_in_c(_target;) \ + stringify_in_c(.balign 4;) \ + stringify_in_c(.long (_fault) - . ;) \ + stringify_in_c(.long (_target) - . ;) \ stringify_in_c(.previous) #endif /* _ASM_POWERPC_PPC_ASM_H */ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index e0b724619c4a..a15d84d59356 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -64,23 +64,30 @@ __access_ok((__force unsigned long)(addr), (size), get_fs())) /* - * The exception table consists of pairs of addresses: the first is the - * address of an instruction that is allowed to fault, and the second is + * The exception table consists of pairs of relative addresses: the first is + * the address of an instruction that is allowed to fault, and the second is * the address at which the program should continue. No registers are - * modified, so it is entirely up to the continuation code to figure out - * what to do. + * modified, so it is entirely up to the continuation code to figure out what + * to do. * - * All the routines below use bits of fixup code that are out of line - * with the main instruction path. This means when everything is well, - * we don't even have to jump over them. Further, they do not intrude - * on our cache or tlb entries. + * All the routines below use bits of fixup code that are out of line with the + * main instruction path. This means when everything is well, we don't even + * have to jump over them. Further, they do not intrude on our cache or tlb + * entries. */ +#define ARCH_HAS_RELATIVE_EXTABLE + struct exception_table_entry { - unsigned long insn; - unsigned long fixup; + int insn; + int fixup; }; +static inline unsigned long extable_fixup(const struct exception_table_entry *x) +{ + return (unsigned long)&x->fixup + x->fixup; +} + /* * These are the main single-value transfer routines. They automatically * use the right size if we just have the right pointer type. diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index e785cc9e1ecd..9479d8e360cf 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -449,7 +449,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) * zero, try to fix up. */ if ((entry = search_exception_tables(regs->nip)) != NULL) { - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return 1; } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 023a462725b5..32c468b8b548 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -365,7 +365,7 @@ static inline int check_io_access(struct pt_regs *regs) (*nip & 0x100)? "OUT to": "IN from", regs->gpr[rb] - _IO_BASE, nip); regs->msr |= MSR_RI; - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return 1; } } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index d0b137d96df1..73932f4a386e 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -512,7 +512,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) /* Are we prepared to handle this fault? */ if ((entry = search_exception_tables(regs->nip)) != NULL) { - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return; } diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index dfd310031549..0409714e8070 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -263,7 +263,7 @@ static int ppc750_machine_check_exception(struct pt_regs *regs) if ((entry = search_exception_tables(regs->nip)) != NULL) { tsi108_clear_pci_cfg_error(); regs->msr |= MSR_RI; - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return 1; } return 0; diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index f97bab8e37a2..9de100e22bf3 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -174,7 +174,7 @@ static int mpc7448_machine_check_exception(struct pt_regs *regs) if ((entry = search_exception_tables(regs->nip)) != NULL) { tsi108_clear_pci_cfg_error(); regs->msr |= MSR_RI; - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return 1; } return 0; diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 87fee0c8eb21..1c41c51f22cb 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -111,7 +111,7 @@ int fsl_rio_mcheck_exception(struct pt_regs *regs) out_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR), 0); regs->msr |= MSR_RI; - regs->nip = entry->fixup; + regs->nip = extable_fixup(entry); return 1; } } diff --git a/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c index cd7af4e1b65a..ed3239bbfae2 100644 --- a/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c +++ b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c @@ -73,19 +73,23 @@ extern char __stop___ex_table[]; #error implement UCONTEXT_NIA #endif +struct extbl_entry { + int insn; + int fixup; +}; static void segv_handler(int signr, siginfo_t *info, void *ptr) { ucontext_t *uc = (ucontext_t *)ptr; unsigned long addr = (unsigned long)info->si_addr; unsigned long *ip = &UCONTEXT_NIA(uc); - unsigned long *ex_p = (unsigned long *)__start___ex_table; + struct extbl_entry *entry = (struct extbl_entry *)__start___ex_table; - while (ex_p < (unsigned long *)__stop___ex_table) { + while (entry < (struct extbl_entry *)__stop___ex_table) { unsigned long insn, fixup; - insn = *ex_p++; - fixup = *ex_p++; + insn = (unsigned long)&entry->insn + entry->insn; + fixup = (unsigned long)&entry->fixup + entry->fixup; if (insn == *ip) { *ip = fixup; -- cgit v1.3.1 From 5b9ff027859868efd63cdbbff5d30182d4cca50a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 13 Oct 2016 16:42:55 +1100 Subject: powerpc: Build-time sort the exception table Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/module.h | 4 ---- scripts/sortextable.c | 2 ++ 3 files changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 65fba4c34cd7..8567c6809240 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -80,6 +80,7 @@ config ARCH_HAS_DMA_SET_COHERENT_MASK config PPC bool default y + select BUILDTIME_EXTABLE_SORT select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select BINFMT_ELF diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index cd4ffd86765f..cc12c61ef315 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -90,10 +90,6 @@ static inline int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sec } #endif -struct exception_table_entry; -void sort_ex_table(struct exception_table_entry *start, - struct exception_table_entry *finish); - #if defined(CONFIG_MODVERSIONS) && defined(CONFIG_PPC64) #define ARCH_RELOCATES_KCRCTAB #define reloc_start PHYSICAL_START diff --git a/scripts/sortextable.c b/scripts/sortextable.c index f453b7ce99d6..365a907f98b3 100644 --- a/scripts/sortextable.c +++ b/scripts/sortextable.c @@ -316,6 +316,8 @@ do_file(char const *const fname) case EM_S390: case EM_AARCH64: case EM_PARISC: + case EM_PPC: + case EM_PPC64: custom_sort = sort_relative_table; break; case EM_ARCOMPACT: -- cgit v1.3.1 From c0a5149105ab5f76f7c5a8fc2eb3d79fe3de6582 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 13 Oct 2016 13:07:14 +1100 Subject: powerpc: Make _ASM_NOKPROBE_SYMBOL a noop when KPROBES not defined Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc_asm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index bf9de5575ca9..025833b8df9f 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -263,10 +263,14 @@ n: * latter is for those that incdentially must be excluded from probing * and allows them to be linked at more optimal location within text. */ +#ifdef CONFIG_KPROBES #define _ASM_NOKPROBE_SYMBOL(entry) \ .pushsection "_kprobe_blacklist","aw"; \ PPC_LONG (entry) ; \ .popsection +#else +#define _ASM_NOKPROBE_SYMBOL(entry) +#endif #define FUNC_START(name) _GLOBAL(name) #define FUNC_END(name) -- cgit v1.3.1 From 5246adec59458b5d325b8e1462ea9ef3ead7f6ae Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sat, 1 Oct 2016 20:41:56 +1000 Subject: powerpc/pseries: Use H_CLEAR_HPT to clear MMU hash table during kexec An hcall was recently added that does exactly what we need during kexec - it clears the entire MMU hash table, ignoring any VRMA mappings. Try it and fall back to the old method if we get a failure. On a POWER8 box with 5TB of memory, this reduces the time it takes to kexec a new kernel from from 4 minutes to 1 minute. Signed-off-by: Anton Blanchard Tested-by: Mahesh Salgaonkar [mpe: Split into separate functions and tweak function naming] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hvcall.h | 3 ++- arch/powerpc/platforms/pseries/lpar.c | 24 ++++++++++++++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 708edebcf147..489748ed21b4 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -275,7 +275,8 @@ #define H_COP 0x304 #define H_GET_MPP_X 0x314 #define H_SET_MODE 0x31C -#define MAX_HCALL_OPCODE H_SET_MODE +#define H_CLEAR_HPT 0x358 +#define MAX_HCALL_OPCODE H_CLEAR_HPT /* H_VIOCTL functions */ #define H_GET_VIOA_DUMP_SIZE 0x01 diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index aa35245d8d6d..24ad43afbb46 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -221,7 +221,7 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group) return -1; } -static void pSeries_lpar_hptab_clear(void) +static void manual_hpte_clear_all(void) { unsigned long size_bytes = 1UL << ppc64_pft_size; unsigned long hpte_count = size_bytes >> 4; @@ -249,6 +249,26 @@ static void pSeries_lpar_hptab_clear(void) &(ptes[j].pteh), &(ptes[j].ptel)); } } +} + +static int hcall_hpte_clear_all(void) +{ + int rc; + + do { + rc = plpar_hcall_norets(H_CLEAR_HPT); + } while (rc == H_CONTINUE); + + return rc; +} + +static void pseries_hpte_clear_all(void) +{ + int rc; + + rc = hcall_hpte_clear_all(); + if (rc != H_SUCCESS) + manual_hpte_clear_all(); #ifdef __LITTLE_ENDIAN__ /* @@ -598,7 +618,7 @@ void __init hpte_init_pseries(void) mmu_hash_ops.hpte_remove = pSeries_lpar_hpte_remove; mmu_hash_ops.hpte_removebolted = pSeries_lpar_hpte_removebolted; mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; - mmu_hash_ops.hpte_clear_all = pSeries_lpar_hptab_clear; + mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; } -- cgit v1.3.1 From 29a969b764817c1dce819c2bc8c00a147529a5ef Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 31 Oct 2016 13:19:39 +1100 Subject: powerpc: Revert Load Monitor Register Support Load monitored is no longer supported on POWER9 so let's remove the code. This reverts commit bd3ea317fddf ("powerpc: Load Monitor Register Support"). Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/processor.h | 2 -- arch/powerpc/include/asm/reg.h | 5 ----- arch/powerpc/kernel/process.c | 18 ------------------ arch/powerpc/kernel/traps.c | 9 --------- 4 files changed, 34 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index c07c31b0e89e..5c0a665af1c3 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -312,8 +312,6 @@ struct thread_struct { unsigned long mmcr2; unsigned mmcr0; unsigned used_ebb; - unsigned long lmrr; - unsigned long lmser; #endif }; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 9cd4e8cbc78c..c491cfebfc05 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -292,8 +292,6 @@ #define SPRN_HRMOR 0x139 /* Real mode offset register */ #define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */ #define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ -#define SPRN_LMRR 0x32D /* Load Monitor Region Register */ -#define SPRN_LMSER 0x32E /* Load Monitor Section Enable Register */ #define SPRN_IC 0x350 /* Virtual Instruction Count */ #define SPRN_VTB 0x351 /* Virtual Time Base */ #define SPRN_LDBAR 0x352 /* LD Base Address Register */ @@ -304,7 +302,6 @@ #define SPRN_PMCR 0x374 /* Power Management Control Register */ /* HFSCR and FSCR bit numbers are the same */ -#define FSCR_LM_LG 11 /* Enable Load Monitor Registers */ #define FSCR_TAR_LG 8 /* Enable Target Address Register */ #define FSCR_EBB_LG 7 /* Enable Event Based Branching */ #define FSCR_TM_LG 5 /* Enable Transactional Memory */ @@ -314,12 +311,10 @@ #define FSCR_VECVSX_LG 1 /* Enable VMX/VSX */ #define FSCR_FP_LG 0 /* Enable Floating Point */ #define SPRN_FSCR 0x099 /* Facility Status & Control Register */ -#define FSCR_LM __MASK(FSCR_LM_LG) #define FSCR_TAR __MASK(FSCR_TAR_LG) #define FSCR_EBB __MASK(FSCR_EBB_LG) #define FSCR_DSCR __MASK(FSCR_DSCR_LG) #define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */ -#define HFSCR_LM __MASK(FSCR_LM_LG) #define HFSCR_TAR __MASK(FSCR_TAR_LG) #define HFSCR_EBB __MASK(FSCR_EBB_LG) #define HFSCR_TM __MASK(FSCR_TM_LG) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ce6dc61b15b2..5b62e8c36210 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1051,14 +1051,6 @@ static inline void save_sprs(struct thread_struct *t) */ t->tar = mfspr(SPRN_TAR); } - - if (cpu_has_feature(CPU_FTR_ARCH_300)) { - /* Conditionally save Load Monitor registers, if enabled */ - if (t->fscr & FSCR_LM) { - t->lmrr = mfspr(SPRN_LMRR); - t->lmser = mfspr(SPRN_LMSER); - } - } #endif } @@ -1094,16 +1086,6 @@ static inline void restore_sprs(struct thread_struct *old_thread, if (old_thread->tar != new_thread->tar) mtspr(SPRN_TAR, new_thread->tar); } - - if (cpu_has_feature(CPU_FTR_ARCH_300)) { - /* Conditionally restore Load Monitor registers, if enabled */ - if (new_thread->fscr & FSCR_LM) { - if (old_thread->lmrr != new_thread->lmrr) - mtspr(SPRN_LMRR, new_thread->lmrr); - if (old_thread->lmser != new_thread->lmser) - mtspr(SPRN_LMSER, new_thread->lmser); - } - } #endif } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 32c468b8b548..91d278c9ab28 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1430,7 +1430,6 @@ void facility_unavailable_exception(struct pt_regs *regs) [FSCR_TM_LG] = "TM", [FSCR_EBB_LG] = "EBB", [FSCR_TAR_LG] = "TAR", - [FSCR_LM_LG] = "LM", }; char *facility = "unknown"; u64 value; @@ -1488,14 +1487,6 @@ void facility_unavailable_exception(struct pt_regs *regs) emulate_single_step(regs); } return; - } else if ((status == FSCR_LM_LG) && cpu_has_feature(CPU_FTR_ARCH_300)) { - /* - * This process has touched LM, so turn it on forever - * for this process - */ - current->thread.fscr |= FSCR_LM; - mtspr(SPRN_FSCR, current->thread.fscr); - return; } if (status == FSCR_TM_LG) { -- cgit v1.3.1 From 8f272a5dd6826f14e47110eccd37b6782bf74901 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 14 Nov 2016 16:28:10 +1100 Subject: powerpc/pseries: Move CMO code from plapr_wrappers.h to platforms/pseries Currently there's some CMO (Cooperative Memory Overcommit) code, in plpar_wrappers.h. Some of it is #ifdef CONFIG_PSERIES and some of it isn't. The end result being if a file includes plpar_wrappers.h it won't build with CONFIG_PSERIES=n. Fix it by moving the CMO code into platforms/pseries. The two hcall wrappers can just be moved into their only caller, cmm.c, and the accessors can go in pseries.h. Note we need the accessors because cmm.c can be built as a module, so there needs to be a split between the built-in code vs the module, and that's achieved by using those accessors. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hvcall.h | 21 ------------------- arch/powerpc/include/asm/plpar_wrappers.h | 32 ----------------------------- arch/powerpc/platforms/pseries/cmm.c | 34 +++++++++++++++++++++++++++++++ arch/powerpc/platforms/pseries/lparcfg.c | 1 + arch/powerpc/platforms/pseries/pseries.h | 19 +++++++++++++++++ 5 files changed, 54 insertions(+), 53 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 489748ed21b4..8d5f8352afd7 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -413,27 +413,6 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc) } } -#ifdef CONFIG_PPC_PSERIES -extern int CMO_PrPSP; -extern int CMO_SecPSP; -extern unsigned long CMO_PageSize; - -static inline int cmo_get_primary_psp(void) -{ - return CMO_PrPSP; -} - -static inline int cmo_get_secondary_psp(void) -{ - return CMO_SecPSP; -} - -static inline unsigned long cmo_get_page_size(void) -{ - return CMO_PageSize; -} -#endif /* CONFIG_PPC_PSERIES */ - #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HVCALL_H */ diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 1b394247afc2..034a588b122c 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -93,38 +93,6 @@ static inline long register_dtl(unsigned long cpu, unsigned long vpa) return vpa_call(H_VPA_REG_DTL, cpu, vpa); } -static inline long plpar_page_set_loaned(unsigned long vpa) -{ - unsigned long cmo_page_sz = cmo_get_page_size(); - long rc = 0; - int i; - - for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) - rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0); - - for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) - plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, - vpa + i - cmo_page_sz, 0); - - return rc; -} - -static inline long plpar_page_set_active(unsigned long vpa) -{ - unsigned long cmo_page_sz = cmo_get_page_size(); - long rc = 0; - int i; - - for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) - rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0); - - for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) - plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, - vpa + i - cmo_page_sz, 0); - - return rc; -} - extern void vpa_init(int cpu); static inline long plpar_pte_enter(unsigned long flags, diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 66e7227469b8..972328829387 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -41,6 +41,8 @@ #include #include +#include "pseries.h" + #define CMM_DRIVER_VERSION "1.0.0" #define CMM_DEFAULT_DELAY 1 #define CMM_HOTPLUG_DELAY 5 @@ -109,6 +111,38 @@ static int hotplug_occurred; /* protected by the hotplug mutex */ static struct task_struct *cmm_thread_ptr; +static long plpar_page_set_loaned(unsigned long vpa) +{ + unsigned long cmo_page_sz = cmo_get_page_size(); + long rc = 0; + int i; + + for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) + rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0); + + for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) + plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, + vpa + i - cmo_page_sz, 0); + + return rc; +} + +static long plpar_page_set_active(unsigned long vpa) +{ + unsigned long cmo_page_sz = cmo_get_page_size(); + long rc = 0; + int i; + + for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) + rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0); + + for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) + plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, + vpa + i - cmo_page_sz, 0); + + return rc; +} + /** * cmm_alloc_pages - Allocate pages and mark them as loaned * @nr: number of pages to allocate diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index afa05a2cb702..e6397976060e 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -37,6 +37,7 @@ #include #include +#include "pseries.h" /* * This isn't a module but we expose that to userspace diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index b1be7b713fe6..1361a9db534b 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -79,4 +79,23 @@ extern struct pci_controller_ops pseries_pci_controller_ops; unsigned long pseries_memory_block_size(void); +extern int CMO_PrPSP; +extern int CMO_SecPSP; +extern unsigned long CMO_PageSize; + +static inline int cmo_get_primary_psp(void) +{ + return CMO_PrPSP; +} + +static inline int cmo_get_secondary_psp(void) +{ + return CMO_SecPSP; +} + +static inline unsigned long cmo_get_page_size(void) +{ + return CMO_PageSize; +} + #endif /* _PSERIES_PSERIES_H */ -- cgit v1.3.1 From c05f69a3dc702312bbecae4c7ddfc999f48ee720 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 21 Sep 2016 16:49:27 +0200 Subject: powerpc/64: get rid of MIN_HUGEPTE_SHIFT MIN_HUGEPTE_SHIFT hasn't been used since commit d1837cba5d5d5 ("powerpc/mm: Cleanup initialization of hugepages on powerpc") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 3 --- arch/powerpc/include/asm/book3s/64/hash-64k.h | 3 --- arch/powerpc/include/asm/nohash/64/pgtable-4k.h | 3 --- arch/powerpc/include/asm/nohash/64/pgtable-64k.h | 3 --- 4 files changed, 12 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 1af837c561ba..1c64bc6330bc 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -16,9 +16,6 @@ #define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE) #define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) -/* With 4k base page size, hugepage PTEs go at the PMD level */ -#define MIN_HUGEPTE_SHIFT PMD_SHIFT - /* PTE flags to conserve for HPTE identification */ #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \ H_PAGE_F_SECOND | H_PAGE_F_GIX) diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 5aae4f530c21..f3dd21efa2ea 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -6,9 +6,6 @@ #define H_PUD_INDEX_SIZE 5 #define H_PGD_INDEX_SIZE 12 -/* With 4k base page size, hugepage PTEs go at the PMD level */ -#define MIN_HUGEPTE_SHIFT PAGE_SHIFT - #define H_PAGE_COMBO 0x00001000 /* this is a combo 4k page */ #define H_PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */ /* diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h index fc7d51753f81..d0db98793dd8 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h @@ -27,9 +27,6 @@ #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -/* With 4k base page size, hugepage PTEs go at the PMD level */ -#define MIN_HUGEPTE_SHIFT PMD_SHIFT - /* PUD_SHIFT determines what a third-level page table entry can map */ #define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) #define PUD_SIZE (1UL << PUD_SHIFT) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h index 908324574f77..55b28ef3409a 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h @@ -31,9 +31,6 @@ #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) -/* With 4k base page size, hugepage PTEs go at the PMD level */ -#define MIN_HUGEPTE_SHIFT PAGE_SHIFT - /* PMD_SHIFT determines what a second-level page table entry can map */ #define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) #define PMD_SIZE (1UL << PMD_SHIFT) -- cgit v1.3.1 From 6654c9368a6ff75a36230d8eb94676da1d01f5ae Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Thu, 17 Nov 2016 16:07:47 +1100 Subject: powerpc/eeh: Refactor EEH PE reset functions eeh_pe_reset and eeh_reset_pe are two different functions in the same file which do mostly the same thing. Not only is this confusing, but potentially causes disrepancies in functionality, notably eeh_reset_pe as it does not check return values for failure. Refactor this into the following: - eeh_pe_reset(): stays as is, performs a single operation, exported - eeh_pe_reset_full(): new, full reset process that calls eeh_pe_reset() - eeh_reset_pe(): removed and replaced by eeh_pe_reset_full() - eeh_reset_pe_once(): removed Signed-off-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-pci.h | 2 +- arch/powerpc/kernel/eeh.c | 78 +++++++++++++++++--------------------- arch/powerpc/kernel/eeh_driver.c | 4 +- 3 files changed, 38 insertions(+), 46 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 0f73de069f19..726288048652 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -53,7 +53,7 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev); struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr); void eeh_slot_error_detail(struct eeh_pe *pe, int severity); int eeh_pci_enable(struct eeh_pe *pe, int function); -int eeh_reset_pe(struct eeh_pe *); +int eeh_pe_reset_full(struct eeh_pe *pe); void eeh_save_bars(struct eeh_dev *edev); int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 927d16269ad6..8180bfd7ab93 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -808,76 +808,67 @@ static void *eeh_set_dev_freset(void *data, void *flag) } /** - * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second + * eeh_pe_reset_full - Complete a full reset process on the indicated PE * @pe: EEH PE * - * Assert the PCI #RST line for 1/4 second. + * This function executes a full reset procedure on a PE, including setting + * the appropriate flags, performing a fundamental or hot reset, and then + * deactivating the reset status. It is designed to be used within the EEH + * subsystem, as opposed to eeh_pe_reset which is exported to drivers and + * only performs a single operation at a time. + * + * This function will attempt to reset a PE three times before failing. */ -static void eeh_reset_pe_once(struct eeh_pe *pe) +int eeh_pe_reset_full(struct eeh_pe *pe) { + int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); + int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED); + int type = EEH_RESET_HOT; unsigned int freset = 0; + int i, state, ret; - /* Determine type of EEH reset required for - * Partitionable Endpoint, a hot-reset (1) - * or a fundamental reset (3). - * A fundamental reset required by any device under - * Partitionable Endpoint trumps hot-reset. + /* + * Determine the type of reset to perform - hot or fundamental. + * Hot reset is the default operation, unless any device under the + * PE requires a fundamental reset. */ eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); if (freset) - eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); - else - eeh_ops->reset(pe, EEH_RESET_HOT); - - eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); -} - -/** - * eeh_reset_pe - Reset the indicated PE - * @pe: EEH PE - * - * This routine should be called to reset indicated device, including - * PE. A PE might include multiple PCI devices and sometimes PCI bridges - * might be involved as well. - */ -int eeh_reset_pe(struct eeh_pe *pe) -{ - int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); - int i, state, ret; + type = EEH_RESET_FUNDAMENTAL; - /* Mark as reset and block config space */ - eeh_pe_state_mark(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); + /* Mark the PE as in reset state and block config space accesses */ + eeh_pe_state_mark(pe, reset_state); - /* Take three shots at resetting the bus */ + /* Make three attempts at resetting the bus */ for (i = 0; i < 3; i++) { - eeh_reset_pe_once(pe); + ret = eeh_pe_reset(pe, type); + if (ret) + break; - /* - * EEH_PE_ISOLATED is expected to be removed after - * BAR restore. - */ + ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE); + if (ret) + break; + + /* Wait until the PE is in a functioning state */ state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if ((state & flags) == flags) { - ret = 0; - goto out; - } + if ((state & active_flags) == active_flags) + break; if (state < 0) { pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x", __func__, pe->phb->global_number, pe->addr); ret = -ENOTRECOVERABLE; - goto out; + break; } - /* We might run out of credits */ + /* Set error in case this is our last attempt */ ret = -EIO; pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", __func__, state, pe->phb->global_number, pe->addr, (i + 1)); } -out: - eeh_pe_state_clear(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); + eeh_pe_state_clear(pe, reset_state); return ret; } @@ -1601,6 +1592,7 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe) return eeh_unfreeze_pe(pe, true); } + /** * eeh_pe_reset - Issue PE reset according to specified type * @pe: EEH PE diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index ac984d2038ab..555a47bd5d1a 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -588,7 +588,7 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe) eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL); /* Issue reset */ - ret = eeh_reset_pe(pe); + ret = eeh_pe_reset_full(pe); if (ret) { eeh_pe_state_clear(pe, EEH_PE_RECOVERING); return ret; @@ -659,7 +659,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, * config accesses. So we prefer to block them. However, controlled * PCI config accesses initiated from EEH itself are allowed. */ - rc = eeh_reset_pe(pe); + rc = eeh_pe_reset_full(pe); if (rc) return rc; -- cgit v1.3.1 From 82de5797a26087c651a06de327e6aa9ea69d5d7f Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 21 Nov 2016 22:36:40 +0530 Subject: powerpc: Remove extraneous header from asm-prototypes.h Commit 03465f899bda ("powerpc: Use kprobe blacklist for exception handlers") removed __kprobes annotation from some of the prototypes, but left the kprobes header include directive unchanged. Remove it as it is no longer needed. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/asm-prototypes.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index d1492736d852..dfef1174663e 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -13,8 +13,6 @@ */ #include -#include - #include /* SMP */ -- cgit v1.3.1 From 6cc89bad60a673a24386f1ada83de8a068a78909 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 21 Nov 2016 22:36:41 +0530 Subject: powerpc/kprobes: Invoke handlers directly Invoke the kprobe handlers directly rather than through notify_die(), to reduce path taken for handling kprobes. Similar to commit 6f6343f53d13 ("kprobes/x86: Call exception handlers directly from do_int3/do_debug"). While at it, rename post_kprobe_handler() to kprobe_post_handler() for more uniform naming. Reported-by: Masami Hiramatsu Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/kprobes.h | 7 +++++++ arch/powerpc/kernel/kprobes.c | 29 +++++++---------------------- arch/powerpc/kernel/traps.c | 13 +++++++++++++ 3 files changed, 27 insertions(+), 22 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index 2c9759bdb63b..97b8c1f83453 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -32,6 +32,7 @@ #include #include +#ifdef CONFIG_KPROBES #define __ARCH_WANT_KPROBES_INSN_SLOT struct pt_regs; @@ -127,5 +128,11 @@ struct kprobe_ctlblk { extern int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); +extern int kprobe_handler(struct pt_regs *regs); +extern int kprobe_post_handler(struct pt_regs *regs); +#else +static inline int kprobe_handler(struct pt_regs *regs) { return 0; } +static inline int kprobe_post_handler(struct pt_regs *regs) { return 0; } +#endif /* CONFIG_KPROBES */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_KPROBES_H */ diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 9479d8e360cf..ad108b842669 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -140,13 +140,16 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, regs->link = (unsigned long)kretprobe_trampoline; } -static int __kprobes kprobe_handler(struct pt_regs *regs) +int __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; unsigned int *addr = (unsigned int *)regs->nip; struct kprobe_ctlblk *kcb; + if (user_mode(regs)) + return 0; + /* * We don't want to be preempted for the entire * duration of kprobe processing @@ -359,12 +362,12 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, * single-stepped a copy of the instruction. The address of this * copy is p->ainsn.insn. */ -static int __kprobes post_kprobe_handler(struct pt_regs *regs) +int __kprobes kprobe_post_handler(struct pt_regs *regs) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - if (!cur) + if (!cur || user_mode(regs)) return 0; /* make sure we got here for instruction we have a kprobe on */ @@ -470,25 +473,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) int __kprobes kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { - struct die_args *args = (struct die_args *)data; - int ret = NOTIFY_DONE; - - if (args->regs && user_mode(args->regs)) - return ret; - - switch (val) { - case DIE_BPT: - if (kprobe_handler(args->regs)) - ret = NOTIFY_STOP; - break; - case DIE_SSTEP: - if (post_kprobe_handler(args->regs)) - ret = NOTIFY_STOP; - break; - default: - break; - } - return ret; + return NOTIFY_DONE; } unsigned long arch_deref_entry_point(void *entry) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 3eb20d16c67c..b922c6f4a2fe 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -64,6 +64,7 @@ #include #include #include +#include #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) int (*__debugger)(struct pt_regs *regs) __read_mostly; @@ -826,6 +827,9 @@ void single_step_exception(struct pt_regs *regs) clear_single_step(regs); + if (kprobe_post_handler(regs)) + return; + if (notify_die(DIE_SSTEP, "single_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) goto bail; @@ -1179,6 +1183,9 @@ void program_check_exception(struct pt_regs *regs) if (debugger_bpt(regs)) goto bail; + if (kprobe_handler(regs)) + goto bail; + /* trap exception */ if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) @@ -1747,6 +1754,9 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status) return; } + if (kprobe_post_handler(regs)) + return; + if (notify_die(DIE_SSTEP, "block_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) { return; @@ -1761,6 +1771,9 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status) /* Clear the instruction completion event */ mtspr(SPRN_DBSR, DBSR_IC); + if (kprobe_post_handler(regs)) + return; + if (notify_die(DIE_SSTEP, "single_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) { return; -- cgit v1.3.1 From d0563a1297e234ed37f6b51c2e9321accebd1839 Mon Sep 17 00:00:00 2001 From: Pan Xinhui Date: Wed, 27 Apr 2016 17:16:45 +0800 Subject: powerpc: Implement {cmp}xchg for u8 and u16 Implement xchg{u8,u16}{local,relaxed}, and cmpxchg{u8,u16}{,local,acquire,relaxed}. It works on all ppc. remove volatile of first parameter in __cmpxchg_local and __cmpxchg Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Pan Xinhui Acked-by: Boqun Feng Acked-by: Peter Zijlstra (Intel) Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cmpxchg.h | 109 ++++++++++++++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index 44efe739b6b9..c12f110261b2 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -7,6 +7,71 @@ #include #include +#ifdef __BIG_ENDIAN +#define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE) +#else +#define BITOFF_CAL(size, off) (off * BITS_PER_BYTE) +#endif + +#define XCHG_GEN(type, sfx, cl) \ +static inline u32 __xchg_##type##sfx(void *p, u32 val) \ +{ \ + unsigned int prev, prev_mask, tmp, bitoff, off; \ + \ + off = (unsigned long)p % sizeof(u32); \ + bitoff = BITOFF_CAL(sizeof(type), off); \ + p -= off; \ + val <<= bitoff; \ + prev_mask = (u32)(type)-1 << bitoff; \ + \ + __asm__ __volatile__( \ +"1: lwarx %0,0,%3\n" \ +" andc %1,%0,%5\n" \ +" or %1,%1,%4\n" \ + PPC405_ERR77(0,%3) \ +" stwcx. %1,0,%3\n" \ +" bne- 1b\n" \ + : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \ + : "r" (p), "r" (val), "r" (prev_mask) \ + : "cc", cl); \ + \ + return prev >> bitoff; \ +} + +#define CMPXCHG_GEN(type, sfx, br, br2, cl) \ +static inline \ +u32 __cmpxchg_##type##sfx(void *p, u32 old, u32 new) \ +{ \ + unsigned int prev, prev_mask, tmp, bitoff, off; \ + \ + off = (unsigned long)p % sizeof(u32); \ + bitoff = BITOFF_CAL(sizeof(type), off); \ + p -= off; \ + old <<= bitoff; \ + new <<= bitoff; \ + prev_mask = (u32)(type)-1 << bitoff; \ + \ + __asm__ __volatile__( \ + br \ +"1: lwarx %0,0,%3\n" \ +" and %1,%0,%6\n" \ +" cmpw 0,%1,%4\n" \ +" bne- 2f\n" \ +" andc %1,%0,%6\n" \ +" or %1,%1,%5\n" \ + PPC405_ERR77(0,%3) \ +" stwcx. %1,0,%3\n" \ +" bne- 1b\n" \ + br2 \ + "\n" \ +"2:" \ + : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \ + : "r" (p), "r" (old), "r" (new), "r" (prev_mask) \ + : "cc", cl); \ + \ + return prev >> bitoff; \ +} + /* * Atomic exchange * @@ -14,6 +79,11 @@ * the previous value stored there. */ +XCHG_GEN(u8, _local, "memory"); +XCHG_GEN(u8, _relaxed, "cc"); +XCHG_GEN(u16, _local, "memory"); +XCHG_GEN(u16, _relaxed, "cc"); + static __always_inline unsigned long __xchg_u32_local(volatile void *p, unsigned long val) { @@ -85,9 +155,13 @@ __xchg_u64_relaxed(u64 *p, unsigned long val) #endif static __always_inline unsigned long -__xchg_local(volatile void *ptr, unsigned long x, unsigned int size) +__xchg_local(void *ptr, unsigned long x, unsigned int size) { switch (size) { + case 1: + return __xchg_u8_local(ptr, x); + case 2: + return __xchg_u16_local(ptr, x); case 4: return __xchg_u32_local(ptr, x); #ifdef CONFIG_PPC64 @@ -103,6 +177,10 @@ static __always_inline unsigned long __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) { switch (size) { + case 1: + return __xchg_u8_relaxed(ptr, x); + case 2: + return __xchg_u16_relaxed(ptr, x); case 4: return __xchg_u32_relaxed(ptr, x); #ifdef CONFIG_PPC64 @@ -131,6 +209,15 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) * and return the old value of *p. */ +CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); +CMPXCHG_GEN(u8, _local, , , "memory"); +CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); +CMPXCHG_GEN(u8, _relaxed, , , "cc"); +CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory"); +CMPXCHG_GEN(u16, _local, , , "memory"); +CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory"); +CMPXCHG_GEN(u16, _relaxed, , , "cc"); + static __always_inline unsigned long __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) { @@ -312,10 +399,14 @@ __cmpxchg_u64_acquire(u64 *p, unsigned long old, unsigned long new) #endif static __always_inline unsigned long -__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, +__cmpxchg(void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8(ptr, old, new); + case 2: + return __cmpxchg_u16(ptr, old, new); case 4: return __cmpxchg_u32(ptr, old, new); #ifdef CONFIG_PPC64 @@ -328,10 +419,14 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, } static __always_inline unsigned long -__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, +__cmpxchg_local(void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8_local(ptr, old, new); + case 2: + return __cmpxchg_u16_local(ptr, old, new); case 4: return __cmpxchg_u32_local(ptr, old, new); #ifdef CONFIG_PPC64 @@ -348,6 +443,10 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8_relaxed(ptr, old, new); + case 2: + return __cmpxchg_u16_relaxed(ptr, old, new); case 4: return __cmpxchg_u32_relaxed(ptr, old, new); #ifdef CONFIG_PPC64 @@ -364,6 +463,10 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { + case 1: + return __cmpxchg_u8_acquire(ptr, old, new); + case 2: + return __cmpxchg_u16_acquire(ptr, old, new); case 4: return __cmpxchg_u32_acquire(ptr, old, new); #ifdef CONFIG_PPC64 -- cgit v1.3.1 From 6533b7c16ee5712041b4e324100550e02a9a5dda Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Nov 2016 11:49:30 +0100 Subject: powerpc: Initial stack protector (-fstack-protector) support Partialy copied from commit c743f38013aef ("ARM: initial stack protector (-fstack-protector) support") This is the very basic stuff without the changing canary upon task switch yet. Just the Kconfig option and a constant canary value initialized at boot time. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/stackprotector.h | 40 +++++++++++++++++++++++++++++++ arch/powerpc/kernel/Makefile | 4 ++++ arch/powerpc/kernel/process.c | 6 +++++ 4 files changed, 51 insertions(+) create mode 100644 arch/powerpc/include/asm/stackprotector.h (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 5ee3dbeab323..cfc0688a25e9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -163,6 +163,7 @@ config PPC select HAVE_VIRT_CPU_ACCOUNTING select HAVE_ARCH_HARDENED_USERCOPY select HAVE_KERNEL_GZIP + select HAVE_CC_STACKPROTECTOR config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h new file mode 100644 index 000000000000..6720190eabec --- /dev/null +++ b/arch/powerpc/include/asm/stackprotector.h @@ -0,0 +1,40 @@ +/* + * GCC stack protector support. + * + * Stack protector works by putting predefined pattern at the start of + * the stack frame and verifying that it hasn't been overwritten when + * returning from the function. The pattern is called stack canary + * and gcc expects it to be defined by a global variable called + * "__stack_chk_guard" on PPC. This unfortunately means that on SMP + * we cannot have a different canary value per task. + */ + +#ifndef _ASM_STACKPROTECTOR_H +#define _ASM_STACKPROTECTOR_H + +#include +#include +#include + +extern unsigned long __stack_chk_guard; + +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return, + * and it must always be inlined. + */ +static __always_inline void boot_init_stack_canary(void) +{ + unsigned long canary; + + /* Try to get a semi random initial value. */ + get_random_bytes(&canary, sizeof(canary)); + canary ^= mftb(); + canary ^= LINUX_VERSION_CODE; + + current->stack_canary = canary; + __stack_chk_guard = current->stack_canary; +} + +#endif /* _ASM_STACKPROTECTOR_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 26b5a5e02e69..7a9f7164cc2e 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -19,6 +19,10 @@ CFLAGS_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) +# -fstack-protector triggers protection checks in this code, +# but it is being used too early to link to meaningful stack_chk logic. +CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector) + ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 5b62e8c36210..9da9a42595ce 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -64,6 +64,12 @@ #include #include +#ifdef CONFIG_CC_STACKPROTECTOR +#include +unsigned long __stack_chk_guard __read_mostly; +EXPORT_SYMBOL(__stack_chk_guard); +#endif + /* Transactional Memory debug */ #ifdef TM_DEBUG_SW #define TM_DEBUG(x...) printk(KERN_INFO x) -- cgit v1.3.1 From da58b23cb976ab83a80d358102e139afe94f0c56 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 24 Nov 2016 17:08:11 +1100 Subject: powerpc: Fix __cmpxchg() to take a volatile ptr again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit d0563a1297e2 ("powerpc: Implement {cmp}xchg for u8 and u16") we removed the volatile from __cmpxchg(). This is leading to warnings such as: drivers/gpu/drm/drm_lock.c: In function ‘drm_lock_take’: arch/powerpc/include/asm/cmpxchg.h:484:37: warning: passing argument 1 of ‘__cmpxchg’ discards ‘volatile’ qualifier from pointer target (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \ There doesn't seem to be consensus across architectures whether the argument is volatile or not, so at least for now put the volatile back. Fixes: d0563a1297e2 ("powerpc: Implement {cmp}xchg for u8 and u16") Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cmpxchg.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h index c12f110261b2..fc46b664c49e 100644 --- a/arch/powerpc/include/asm/cmpxchg.h +++ b/arch/powerpc/include/asm/cmpxchg.h @@ -14,7 +14,7 @@ #endif #define XCHG_GEN(type, sfx, cl) \ -static inline u32 __xchg_##type##sfx(void *p, u32 val) \ +static inline u32 __xchg_##type##sfx(volatile void *p, u32 val) \ { \ unsigned int prev, prev_mask, tmp, bitoff, off; \ \ @@ -40,7 +40,7 @@ static inline u32 __xchg_##type##sfx(void *p, u32 val) \ #define CMPXCHG_GEN(type, sfx, br, br2, cl) \ static inline \ -u32 __cmpxchg_##type##sfx(void *p, u32 old, u32 new) \ +u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new) \ { \ unsigned int prev, prev_mask, tmp, bitoff, off; \ \ @@ -399,7 +399,7 @@ __cmpxchg_u64_acquire(u64 *p, unsigned long old, unsigned long new) #endif static __always_inline unsigned long -__cmpxchg(void *ptr, unsigned long old, unsigned long new, +__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size) { switch (size) { -- cgit v1.3.1 From bee8b3b56d1dfc4075254a61340ee3898a732e63 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Nov 2016 11:46:58 +0530 Subject: powerpc/mm: Rename hugetlb-radix.h to hugetlb.h We will start moving some book3s specific hugetlb functions there. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hugetlb-radix.h | 29 ---------------------- arch/powerpc/include/asm/book3s/64/hugetlb.h | 29 ++++++++++++++++++++++ arch/powerpc/include/asm/hugetlb.h | 2 +- 3 files changed, 30 insertions(+), 30 deletions(-) delete mode 100644 arch/powerpc/include/asm/book3s/64/hugetlb-radix.h create mode 100644 arch/powerpc/include/asm/book3s/64/hugetlb.h (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h b/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h deleted file mode 100644 index c45189aa7476..000000000000 --- a/arch/powerpc/include/asm/book3s/64/hugetlb-radix.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef _ASM_POWERPC_BOOK3S_64_HUGETLB_RADIX_H -#define _ASM_POWERPC_BOOK3S_64_HUGETLB_RADIX_H -/* - * For radix we want generic code to handle hugetlb. But then if we want - * both hash and radix to be enabled together we need to workaround the - * limitations. - */ -void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -extern unsigned long -radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, - unsigned long flags); - -static inline int hstate_get_psize(struct hstate *hstate) -{ - unsigned long shift; - - shift = huge_page_shift(hstate); - if (shift == mmu_psize_defs[MMU_PAGE_2M].shift) - return MMU_PAGE_2M; - else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift) - return MMU_PAGE_1G; - else { - WARN(1, "Wrong huge page shift\n"); - return mmu_virtual_psize; - } -} -#endif diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h new file mode 100644 index 000000000000..499268045306 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -0,0 +1,29 @@ +#ifndef _ASM_POWERPC_BOOK3S_64_HUGETLB_H +#define _ASM_POWERPC_BOOK3S_64_HUGETLB_H +/* + * For radix we want generic code to handle hugetlb. But then if we want + * both hash and radix to be enabled together we need to workaround the + * limitations. + */ +void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +extern unsigned long +radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags); + +static inline int hstate_get_psize(struct hstate *hstate) +{ + unsigned long shift; + + shift = huge_page_shift(hstate); + if (shift == mmu_psize_defs[MMU_PAGE_2M].shift) + return MMU_PAGE_2M; + else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift) + return MMU_PAGE_1G; + else { + WARN(1, "Wrong huge page shift\n"); + return mmu_virtual_psize; + } +} +#endif diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index c5517f463ec7..c03e0a3dd4d8 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -9,7 +9,7 @@ extern struct kmem_cache *hugepte_cache; #ifdef CONFIG_PPC_BOOK3S_64 -#include +#include /* * This should work for other subarchs too. But right now we use the * new format only for 64bit book3s -- cgit v1.3.1 From ccf17c8b5c8465750a6c514be9ef6f5b156b6c67 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Nov 2016 11:46:59 +0530 Subject: powerpc/mm/hugetlb: Handle hugepage size supported by hash config W.r.t hash page table config, we support 16MB and 16GB as the hugepage size. Update the hstate_get_psize to handle 16M and 16G. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hugetlb.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index 499268045306..d9c283f95e05 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -21,6 +21,10 @@ static inline int hstate_get_psize(struct hstate *hstate) return MMU_PAGE_2M; else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift) return MMU_PAGE_1G; + else if (shift == mmu_psize_defs[MMU_PAGE_16M].shift) + return MMU_PAGE_16M; + else if (shift == mmu_psize_defs[MMU_PAGE_16G].shift) + return MMU_PAGE_16G; else { WARN(1, "Wrong huge page shift\n"); return mmu_virtual_psize; -- cgit v1.3.1 From 049d567af209b093eefa9f26eae6e15226db3520 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Nov 2016 11:47:00 +0530 Subject: powerpc/mm: Introduce _PAGE_LARGE software pte bits This patch adds a new software defined pte bit. We use the reserved fields of ISA 3.0 pte definition since we will only be using this on DD1 code paths. We can possibly look at removing this code later. The software bit will be used to differentiate between 64K/4K and 2M ptes. This helps in finding the page size mapping by a pte so that we can do efficient tlb flush. We don't support 1G hugetlb pages yet. So we add a DEBUG WARN_ON to catch wrong usage. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hugetlb.h | 20 ++++++++++++++++++++ arch/powerpc/include/asm/book3s/64/pgtable.h | 10 ++++++++++ arch/powerpc/include/asm/book3s/64/radix.h | 2 ++ 3 files changed, 32 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index d9c283f95e05..c62f14d0bec1 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -30,4 +30,24 @@ static inline int hstate_get_psize(struct hstate *hstate) return mmu_virtual_psize; } } + +#define arch_make_huge_pte arch_make_huge_pte +static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writable) +{ + unsigned long page_shift; + + if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) + return entry; + + page_shift = huge_page_shift(hstate_vma(vma)); + /* + * We don't support 1G hugetlb pages yet. + */ + VM_WARN_ON(page_shift == mmu_psize_defs[MMU_PAGE_1G].shift); + if (page_shift == mmu_psize_defs[MMU_PAGE_2M].shift) + return __pte(pte_val(entry) | _PAGE_LARGE); + else + return entry; +} #endif diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 9fd77f8794a0..ac66d05a7c01 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -26,6 +26,11 @@ #define _RPAGE_SW1 0x00800 #define _RPAGE_SW2 0x00400 #define _RPAGE_SW3 0x00200 +#define _RPAGE_RSV1 0x1000000000000000UL +#define _RPAGE_RSV2 0x0800000000000000UL +#define _RPAGE_RSV3 0x0400000000000000UL +#define _RPAGE_RSV4 0x0200000000000000UL + #ifdef CONFIG_MEM_SOFT_DIRTY #define _PAGE_SOFT_DIRTY _RPAGE_SW3 /* software: software dirty tracking */ #else @@ -33,6 +38,11 @@ #endif #define _PAGE_SPECIAL _RPAGE_SW2 /* software: special page */ +/* + * For P9 DD1 only, we need to track whether the pte's huge. + */ +#define _PAGE_LARGE _RPAGE_RSV1 + #define _PAGE_PTE (1ul << 62) /* distinguishes PTEs from pointers */ #define _PAGE_PRESENT (1ul << 63) /* pte contains a translation */ diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 2a46dea8e1b1..d2c5c064e266 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -243,6 +243,8 @@ static inline int radix__pmd_trans_huge(pmd_t pmd) static inline pmd_t radix__pmd_mkhuge(pmd_t pmd) { + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + return __pmd(pmd_val(pmd) | _PAGE_PTE | _PAGE_LARGE); return __pmd(pmd_val(pmd) | _PAGE_PTE); } static inline void radix__pmdp_huge_split_prepare(struct vm_area_struct *vma, -- cgit v1.3.1 From 6d3a0379ebdc8e35662343f5359ac4589b79aec2 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Nov 2016 11:47:01 +0530 Subject: powerpc/mm: Add radix__tlb_flush_pte_p9_dd1() Now that we have page size details encoded in pte using software pte bits, use that to find the page size needed for tlb flush. This function should only be used on P9 DD1, so give it a horrible name to make that clear. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/tlbflush-radix.h | 2 ++ arch/powerpc/mm/tlb-radix.c | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index a9e19cb2f7c5..cc7fbde4f53c 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -42,4 +42,6 @@ extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, unsigned long page_size); extern void radix__flush_tlb_lpid(unsigned long lpid); extern void radix__flush_tlb_all(void); +extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, + unsigned long address); #endif diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index bda8c43be78a..2822a8277f0b 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -424,3 +424,21 @@ void radix__flush_tlb_all(void) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } + +void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, + unsigned long address) +{ + /* + * We track page size in pte only for DD1, So we can + * call this only on DD1. + */ + if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) { + VM_WARN_ON(1); + return; + } + + if (old_pte & _PAGE_LARGE) + radix__flush_tlb_page_psize(mm, address, MMU_PAGE_2M); + else + radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize); +} -- cgit v1.3.1 From b3603e174fc81598e8b060d5e9aafe19ee6e65cf Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Nov 2016 11:47:02 +0530 Subject: powerpc/mm: update radix__ptep_set_access_flag to not do full mm tlb flush When we are updating a pte, we just need to flush the tlb mapping that pte. Right now we do a full mm flush because we don't track the page size. Now that we have page size details in pte use that to do the optimized flush Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/pgtable.h | 3 ++- arch/powerpc/include/asm/book3s/64/pgtable.h | 5 +++-- arch/powerpc/include/asm/book3s/64/radix.h | 11 +++-------- arch/powerpc/include/asm/nohash/32/pgtable.h | 3 ++- arch/powerpc/include/asm/nohash/64/pgtable.h | 3 ++- arch/powerpc/mm/pgtable-book3s64.c | 3 ++- arch/powerpc/mm/pgtable.c | 2 +- 7 files changed, 15 insertions(+), 15 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 6b8b2d57fdc8..dc58980f3ad9 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -224,7 +224,8 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, static inline void __ptep_set_access_flags(struct mm_struct *mm, - pte_t *ptep, pte_t entry) + pte_t *ptep, pte_t entry, + unsigned long address) { unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index ac66d05a7c01..bbea0040320a 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -578,10 +578,11 @@ static inline bool check_pte_access(unsigned long access, unsigned long ptev) */ static inline void __ptep_set_access_flags(struct mm_struct *mm, - pte_t *ptep, pte_t entry) + pte_t *ptep, pte_t entry, + unsigned long address) { if (radix_enabled()) - return radix__ptep_set_access_flags(mm, ptep, entry); + return radix__ptep_set_access_flags(mm, ptep, entry, address); return hash__ptep_set_access_flags(ptep, entry); } diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index d2c5c064e266..36f636911ade 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -167,7 +167,8 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm, * function doesn't need to invalidate tlb. */ static inline void radix__ptep_set_access_flags(struct mm_struct *mm, - pte_t *ptep, pte_t entry) + pte_t *ptep, pte_t entry, + unsigned long address) { unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | @@ -183,13 +184,7 @@ static inline void radix__ptep_set_access_flags(struct mm_struct *mm, * new value of pte */ new_pte = old_pte | set; - - /* - * For now let's do heavy pid flush - * radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize); - */ - radix__flush_tlb_mm(mm); - + radix__flush_tlb_pte_p9_dd1(old_pte, mm, address); __radix_pte_update(ptep, 0, new_pte); } else __radix_pte_update(ptep, 0, set); diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index c219ef7be53b..65073fbc6707 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -268,7 +268,8 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, static inline void __ptep_set_access_flags(struct mm_struct *mm, - pte_t *ptep, pte_t entry) + pte_t *ptep, pte_t entry, + unsigned long address) { unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index e9e540a804fc..53a41b06a7b9 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -289,7 +289,8 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, * function doesn't need to flush the hash entry */ static inline void __ptep_set_access_flags(struct mm_struct *mm, - pte_t *ptep, pte_t entry) + pte_t *ptep, pte_t entry, + unsigned long address) { unsigned long bits = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index f4f437cbabf1..ebf9782bacf9 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -35,7 +35,8 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, #endif changed = !pmd_same(*(pmdp), entry); if (changed) { - __ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), pmd_pte(entry)); + __ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), + pmd_pte(entry), address); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); } return changed; diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 911fdfb63ec1..cb39c8bd2436 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -224,7 +224,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, if (changed) { if (!is_vm_hugetlb_page(vma)) assert_pte_locked(vma->vm_mm, address); - __ptep_set_access_flags(vma->vm_mm, ptep, entry); + __ptep_set_access_flags(vma->vm_mm, ptep, entry, address); flush_tlb_page(vma, address); } return changed; -- cgit v1.3.1 From e58d1cf24309b3b58c7cff7ea1f873e498fdaa39 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Nov 2016 11:47:03 +0530 Subject: powerpc/mm: update radix__pte_update to not do full mm tlb flush When we are updating a pte, we just need to flush the tlb mapping that pte. Right now we do a full mm flush because we don't track page size. Now that we have page size details in pte use that to do the optimized flush Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/radix.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 36f636911ade..f4066cf31b3e 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -145,13 +145,7 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm, * new value of pte */ new_pte = (old_pte | set) & ~clr; - - /* - * For now let's do heavy pid flush - * radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize); - */ - radix__flush_tlb_mm(mm); - + radix__flush_tlb_pte_p9_dd1(old_pte, mm, addr); __radix_pte_update(ptep, 0, new_pte); } else old_pte = __radix_pte_update(ptep, clr, set); -- cgit v1.3.1 From d522ae1e49a4f0bcbd0efa0a3afb2b8d52d1fbd6 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Nov 2016 11:47:04 +0530 Subject: powerpc/mm: Batch tlb flush when invalidating pte entries This will improve the task exit case, by batching tlb invalidates. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/radix.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index f4066cf31b3e..b4d1302387a3 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -140,13 +140,20 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm, unsigned long new_pte; old_pte = __radix_pte_update(ptep, ~0, 0); - asm volatile("ptesync" : : : "memory"); /* * new value of pte */ new_pte = (old_pte | set) & ~clr; - radix__flush_tlb_pte_p9_dd1(old_pte, mm, addr); - __radix_pte_update(ptep, 0, new_pte); + /* + * If we are trying to clear the pte, we can skip + * the below sequence and batch the tlb flush. The + * tlb flush batching is done by mmu gather code + */ + if (new_pte) { + asm volatile("ptesync" : : : "memory"); + radix__flush_tlb_pte_p9_dd1(old_pte, mm, addr); + __radix_pte_update(ptep, 0, new_pte); + } } else old_pte = __radix_pte_update(ptep, clr, set); asm volatile("ptesync" : : : "memory"); -- cgit v1.3.1 From da6658859b9c734fee36570f3a7d51764c6c3838 Mon Sep 17 00:00:00 2001 From: Thiago Jung Bauermann Date: Tue, 29 Nov 2016 23:45:50 +1100 Subject: powerpc: Change places using CONFIG_KEXEC to use CONFIG_KEXEC_CORE instead. Commit 2965faa5e03d ("kexec: split kexec_load syscall from kexec core code") introduced CONFIG_KEXEC_CORE so that CONFIG_KEXEC means whether the kexec_load system call should be compiled-in and CONFIG_KEXEC_FILE means whether the kexec_file_load system call should be compiled-in. These options can be set independently from each other. Since until now powerpc only supported kexec_load, CONFIG_KEXEC and CONFIG_KEXEC_CORE were synonyms. That is not the case anymore, so we need to make a distinction. Almost all places where CONFIG_KEXEC was being used should be using CONFIG_KEXEC_CORE instead, since kexec_file_load also needs that code compiled in. Signed-off-by: Thiago Jung Bauermann Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/debug.h | 2 +- arch/powerpc/include/asm/kexec.h | 6 +++--- arch/powerpc/include/asm/machdep.h | 4 ++-- arch/powerpc/include/asm/smp.h | 2 +- arch/powerpc/kernel/Makefile | 4 ++-- arch/powerpc/kernel/head_64.S | 2 +- arch/powerpc/kernel/misc_32.S | 2 +- arch/powerpc/kernel/misc_64.S | 6 +++--- arch/powerpc/kernel/prom.c | 2 +- arch/powerpc/kernel/setup_64.c | 4 ++-- arch/powerpc/kernel/smp.c | 6 +++--- arch/powerpc/kernel/traps.c | 2 +- arch/powerpc/platforms/85xx/corenet_generic.c | 2 +- arch/powerpc/platforms/85xx/smp.c | 8 ++++---- arch/powerpc/platforms/cell/spu_base.c | 2 +- arch/powerpc/platforms/powernv/setup.c | 6 +++--- arch/powerpc/platforms/ps3/setup.c | 4 ++-- arch/powerpc/platforms/pseries/Makefile | 2 +- arch/powerpc/platforms/pseries/setup.c | 4 ++-- 20 files changed, 36 insertions(+), 36 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index cfc0688a25e9..a063c7235fa9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -508,7 +508,7 @@ config CRASH_DUMP config FA_DUMP bool "Firmware-assisted dump" - depends on PPC64 && PPC_RTAS && CRASH_DUMP && KEXEC + depends on PPC64 && PPC_RTAS && CRASH_DUMP && KEXEC_CORE help A robust mechanism to get reliable kernel crash dump with assistance from firmware. This approach does not use kexec, diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index a954e4975049..86308f177f2d 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -10,7 +10,7 @@ struct pt_regs; extern struct dentry *powerpc_debugfs_root; -#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) +#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) extern int (*__debugger)(struct pt_regs *regs); extern int (*__debugger_ipi)(struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index a46f5f45570c..eca2f975bf44 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -53,7 +53,7 @@ typedef void (*crash_shutdown_t)(void); -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* * This function is responsible for capturing register states if coming @@ -91,7 +91,7 @@ static inline bool kdump_in_progress(void) return crashing_cpu >= 0; } -#else /* !CONFIG_KEXEC */ +#else /* !CONFIG_KEXEC_CORE */ static inline void crash_kexec_secondary(struct pt_regs *regs) { } static inline int overlaps_crashkernel(unsigned long start, unsigned long size) @@ -116,7 +116,7 @@ static inline bool kdump_in_progress(void) return false; } -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #endif /* ! __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_KEXEC_H */ diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index e02cbc6a6c70..5011b69107a7 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -183,7 +183,7 @@ struct machdep_calls { */ void (*machine_shutdown)(void); -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE void (*kexec_cpu_down)(int crash_shutdown, int secondary); /* Called to do what every setup is needed on image and the @@ -198,7 +198,7 @@ struct machdep_calls { * no return. */ void (*machine_kexec)(struct kimage *image); -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #ifdef CONFIG_SUSPEND /* These are called to disable and enable, respectively, IRQs when diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 0d02c11dc331..32db16d2e7ad 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -176,7 +176,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys) #endif /* !CONFIG_SMP */ #endif /* !CONFIG_PPC64 */ -#if defined(CONFIG_PPC64) && (defined(CONFIG_SMP) || defined(CONFIG_KEXEC)) +#if defined(CONFIG_PPC64) && (defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)) extern void smp_release_cpus(void); #else static inline void smp_release_cpus(void) { }; diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 7a9f7164cc2e..68a6a3c8322e 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -109,7 +109,7 @@ pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \ pci-common.o pci_of_scan.o obj-$(CONFIG_PCI_MSI) += msi.o -obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \ +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o crash.o \ machine_kexec_$(BITS).o obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o @@ -130,7 +130,7 @@ obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o obj-$(CONFIG_PPC64) += $(obj64-y) obj-$(CONFIG_PPC32) += $(obj32-y) -ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),) +ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE),) obj-y += ppc_save_regs.o endif diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index bdb4612491a9..1dc5eae2ced3 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -160,7 +160,7 @@ __secondary_hold: cmpdi 0,r12,0 beq 100b -#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) +#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE) #ifdef CONFIG_PPC_BOOK3E tovirt(r12,r12) #endif diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 93cf7a5846a6..1863324c6a3c 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -614,7 +614,7 @@ _GLOBAL(start_secondary_resume) _GLOBAL(__main) blr -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* * Must be relocatable PIC code callable as a C function. */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 4f178671f230..32be2a844947 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -478,7 +478,7 @@ _GLOBAL(kexec_wait) addi r5,r5,kexec_flag-1b 99: HMT_LOW -#ifdef CONFIG_KEXEC /* use no memory without kexec */ +#ifdef CONFIG_KEXEC_CORE /* use no memory without kexec */ lwz r4,0(r5) cmpwi 0,r4,0 beq 99b @@ -503,7 +503,7 @@ kexec_flag: .long 0 -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE #ifdef CONFIG_PPC_BOOK3E /* * BOOK3E has no real MMU mode, so we have to setup the initial TLB @@ -716,4 +716,4 @@ _GLOBAL(kexec_sequence) mtlr 4 li r5,0 blr /* image->start(physid, image->start, 0); */ -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index a7b87b6b4ef4..f5d399e46193 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -428,7 +428,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node, tce_alloc_end = *lprop; #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL); if (lprop) crashk_res.start = *lprop; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 7ac8e6eaab5b..c3e129080c31 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -346,7 +346,7 @@ void early_setup_secondary(void) #endif /* CONFIG_SMP */ -#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) +#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE) static bool use_spinloop(void) { if (!IS_ENABLED(CONFIG_PPC_BOOK3E)) @@ -391,7 +391,7 @@ void smp_release_cpus(void) DBG(" <- smp_release_cpus()\n"); } -#endif /* CONFIG_SMP || CONFIG_KEXEC */ +#endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */ /* * Initialize some remaining members of the ppc64_caches and systemcfg diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9c6f3fd58059..893bd7f79be6 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -193,7 +193,7 @@ int smp_request_message_ipi(int virq, int msg) if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) { return -EINVAL; } -#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC) +#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC_CORE) if (msg == PPC_MSG_DEBUGGER_BREAK) { return 1; } @@ -325,7 +325,7 @@ void tick_broadcast(const struct cpumask *mask) } #endif -#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) +#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) void smp_send_debugger_break(void) { int cpu; @@ -340,7 +340,7 @@ void smp_send_debugger_break(void) } #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) { crash_ipi_function_ptr = crash_ipi_callback; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index b922c6f4a2fe..5faaed2f2144 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -66,7 +66,7 @@ #include #include -#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) +#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) int (*__debugger)(struct pt_regs *regs) __read_mostly; int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly; int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly; diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 1179115a4b5c..3803b0addf65 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -220,7 +220,7 @@ define_machine(corenet_generic) { * * Likewise, problems have been seen with kexec when coreint is enabled. */ -#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC) +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC_CORE) .get_irq = mpic_get_irq, #else .get_irq = mpic_get_coreint_irq, diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index fe9f19e5e935..a83a6d26090d 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -349,13 +349,13 @@ struct smp_ops_t smp_85xx_ops = { .cpu_disable = generic_cpu_disable, .cpu_die = generic_cpu_die, #endif -#if defined(CONFIG_KEXEC) && !defined(CONFIG_PPC64) +#if defined(CONFIG_KEXEC_CORE) && !defined(CONFIG_PPC64) .give_timebase = smp_generic_give_timebase, .take_timebase = smp_generic_take_timebase, #endif }; -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE #ifdef CONFIG_PPC32 atomic_t kexec_down_cpus = ATOMIC_INIT(0); @@ -458,7 +458,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image) default_machine_kexec(image); } -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ static void smp_85xx_basic_setup(int cpu_nr) { @@ -512,7 +512,7 @@ void __init mpc85xx_smp_init(void) #endif smp_ops = &smp_85xx_ops; -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE ppc_md.kexec_cpu_down = mpc85xx_smp_kexec_cpu_down; ppc_md.machine_kexec = mpc85xx_smp_machine_kexec; #endif diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index e84d8fbc2e21..96c2b8a40630 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -676,7 +676,7 @@ static ssize_t spu_stat_show(struct device *dev, static DEVICE_ATTR(stat, 0444, spu_stat_show, NULL); -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE struct crash_spu_info { struct spu *spu; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index efe8b6bb168b..d50c7d99baaf 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -174,7 +174,7 @@ static void pnv_shutdown(void) opal_shutdown(); } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE static void pnv_kexec_wait_secondaries_down(void) { int my_cpu, i, notified = -1; @@ -245,7 +245,7 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE); } } -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE static unsigned long pnv_memory_block_size(void) @@ -311,7 +311,7 @@ define_machine(powernv) { .machine_shutdown = pnv_shutdown, .power_save = NULL, .calibrate_decr = generic_calibrate_decr, -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_cpu_down = pnv_kexec_cpu_down, #endif #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 3a487e7f4a5e..6244bc849469 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -250,7 +250,7 @@ static int __init ps3_probe(void) return 1; } -#if defined(CONFIG_KEXEC) +#if defined(CONFIG_KEXEC_CORE) static void ps3_kexec_cpu_down(int crash_shutdown, int secondary) { int cpu = smp_processor_id(); @@ -276,7 +276,7 @@ define_machine(ps3) { .progress = ps3_progress, .restart = ps3_restart, .halt = ps3_halt, -#if defined(CONFIG_KEXEC) +#if defined(CONFIG_KEXEC_CORE) .kexec_cpu_down = ps3_kexec_cpu_down, #endif }; diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 942fe116a8ba..8f4ba089e802 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -8,7 +8,7 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \ pci.o pci_dlpar.o eeh_pseries.o msi.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SCANLOG) += scanlog.o -obj-$(CONFIG_KEXEC) += kexec.o +obj-$(CONFIG_KEXEC_CORE) += kexec.o obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 97aa3f332f24..7736352f7279 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -367,7 +367,7 @@ void pseries_disable_reloc_on_exc(void) } EXPORT_SYMBOL(pseries_disable_reloc_on_exc); -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE static void pSeries_machine_kexec(struct kimage *image) { if (firmware_has_feature(FW_FEATURE_SET_MODE)) @@ -725,7 +725,7 @@ define_machine(pseries) { .progress = rtas_progress, .system_reset_exception = pSeries_system_reset_exception, .machine_check_exception = pSeries_machine_check_exception, -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .machine_kexec = pSeries_machine_kexec, .kexec_cpu_down = pseries_kexec_cpu_down, #endif -- cgit v1.3.1 From a0458284f0625ade5eff2118bab89b2d4bbacc3e Mon Sep 17 00:00:00 2001 From: Thiago Jung Bauermann Date: Tue, 29 Nov 2016 23:45:51 +1100 Subject: powerpc: Add support code for kexec_file_load() This patch adds the support code needed for implementing kexec_file_load() on powerpc. This consists of functions to load the ELF kernel, either big or little endian, and setup the purgatory enviroment which switches from the first kernel to the second kernel. None of this code is built yet, as it depends on CONFIG_KEXEC_FILE which we have not yet defined. Although we could define CONFIG_KEXEC_FILE in this patch, we'd then have a window in history where the kconfig symbol is present but the syscall is not, which would be awkward. Signed-off-by: Josh Sklar Signed-off-by: Thiago Jung Bauermann Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/kexec.h | 10 + arch/powerpc/kernel/Makefile | 1 + arch/powerpc/kernel/kexec_elf_64.c | 663 ++++++++++++++++++++++++++++ arch/powerpc/kernel/machine_kexec_file_64.c | 338 ++++++++++++++ 4 files changed, 1012 insertions(+) create mode 100644 arch/powerpc/kernel/kexec_elf_64.c create mode 100644 arch/powerpc/kernel/machine_kexec_file_64.c (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index eca2f975bf44..6c3b71502fbc 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -91,6 +91,16 @@ static inline bool kdump_in_progress(void) return crashing_cpu >= 0; } +#ifdef CONFIG_KEXEC_FILE +extern struct kexec_file_ops kexec_elf64_ops; + +int setup_purgatory(struct kimage *image, const void *slave_code, + const void *fdt, unsigned long kernel_load_addr, + unsigned long fdt_load_addr); +int setup_new_fdt(void *fdt, unsigned long initrd_load_addr, + unsigned long initrd_len, const char *cmdline); +#endif /* CONFIG_KEXEC_FILE */ + #else /* !CONFIG_KEXEC_CORE */ static inline void crash_kexec_secondary(struct pt_regs *regs) { } diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 68a6a3c8322e..a3a6047fd395 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -111,6 +111,7 @@ obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \ obj-$(CONFIG_PCI_MSI) += msi.o obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o crash.o \ machine_kexec_$(BITS).o +obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file_$(BITS).o kexec_elf_$(BITS).o obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c new file mode 100644 index 000000000000..6acffd34a70f --- /dev/null +++ b/arch/powerpc/kernel/kexec_elf_64.c @@ -0,0 +1,663 @@ +/* + * Load ELF vmlinux file for the kexec_file_load syscall. + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * Copyright (C) 2016 IBM Corporation + * + * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c. + * Heavily modified for the kernel by + * Thiago Jung Bauermann . + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) "kexec_elf: " fmt + +#include +#include +#include +#include +#include +#include +#include + +#define PURGATORY_STACK_SIZE (16 * 1024) + +#define elf_addr_to_cpu elf64_to_cpu + +#ifndef Elf_Rel +#define Elf_Rel Elf64_Rel +#endif /* Elf_Rel */ + +struct elf_info { + /* + * Where the ELF binary contents are kept. + * Memory managed by the user of the struct. + */ + const char *buffer; + + const struct elfhdr *ehdr; + const struct elf_phdr *proghdrs; + struct elf_shdr *sechdrs; +}; + +static inline bool elf_is_elf_file(const struct elfhdr *ehdr) +{ + return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0; +} + +static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, uint64_t value) +{ + if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) + value = le64_to_cpu(value); + else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) + value = be64_to_cpu(value); + + return value; +} + +static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value) +{ + if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) + value = le16_to_cpu(value); + else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) + value = be16_to_cpu(value); + + return value; +} + +static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value) +{ + if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB) + value = le32_to_cpu(value); + else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB) + value = be32_to_cpu(value); + + return value; +} + +/** + * elf_is_ehdr_sane - check that it is safe to use the ELF header + * @buf_len: size of the buffer in which the ELF file is loaded. + */ +static bool elf_is_ehdr_sane(const struct elfhdr *ehdr, size_t buf_len) +{ + if (ehdr->e_phnum > 0 && ehdr->e_phentsize != sizeof(struct elf_phdr)) { + pr_debug("Bad program header size.\n"); + return false; + } else if (ehdr->e_shnum > 0 && + ehdr->e_shentsize != sizeof(struct elf_shdr)) { + pr_debug("Bad section header size.\n"); + return false; + } else if (ehdr->e_ident[EI_VERSION] != EV_CURRENT || + ehdr->e_version != EV_CURRENT) { + pr_debug("Unknown ELF version.\n"); + return false; + } + + if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) { + size_t phdr_size; + + /* + * e_phnum is at most 65535 so calculating the size of the + * program header cannot overflow. + */ + phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum; + + /* Sanity check the program header table location. */ + if (ehdr->e_phoff + phdr_size < ehdr->e_phoff) { + pr_debug("Program headers at invalid location.\n"); + return false; + } else if (ehdr->e_phoff + phdr_size > buf_len) { + pr_debug("Program headers truncated.\n"); + return false; + } + } + + if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) { + size_t shdr_size; + + /* + * e_shnum is at most 65536 so calculating + * the size of the section header cannot overflow. + */ + shdr_size = sizeof(struct elf_shdr) * ehdr->e_shnum; + + /* Sanity check the section header table location. */ + if (ehdr->e_shoff + shdr_size < ehdr->e_shoff) { + pr_debug("Section headers at invalid location.\n"); + return false; + } else if (ehdr->e_shoff + shdr_size > buf_len) { + pr_debug("Section headers truncated.\n"); + return false; + } + } + + return true; +} + +static int elf_read_ehdr(const char *buf, size_t len, struct elfhdr *ehdr) +{ + struct elfhdr *buf_ehdr; + + if (len < sizeof(*buf_ehdr)) { + pr_debug("Buffer is too small to hold ELF header.\n"); + return -ENOEXEC; + } + + memset(ehdr, 0, sizeof(*ehdr)); + memcpy(ehdr->e_ident, buf, sizeof(ehdr->e_ident)); + if (!elf_is_elf_file(ehdr)) { + pr_debug("No ELF header magic.\n"); + return -ENOEXEC; + } + + if (ehdr->e_ident[EI_CLASS] != ELF_CLASS) { + pr_debug("Not a supported ELF class.\n"); + return -ENOEXEC; + } else if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB && + ehdr->e_ident[EI_DATA] != ELFDATA2MSB) { + pr_debug("Not a supported ELF data format.\n"); + return -ENOEXEC; + } + + buf_ehdr = (struct elfhdr *) buf; + if (elf16_to_cpu(ehdr, buf_ehdr->e_ehsize) != sizeof(*buf_ehdr)) { + pr_debug("Bad ELF header size.\n"); + return -ENOEXEC; + } + + ehdr->e_type = elf16_to_cpu(ehdr, buf_ehdr->e_type); + ehdr->e_machine = elf16_to_cpu(ehdr, buf_ehdr->e_machine); + ehdr->e_version = elf32_to_cpu(ehdr, buf_ehdr->e_version); + ehdr->e_entry = elf_addr_to_cpu(ehdr, buf_ehdr->e_entry); + ehdr->e_phoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_phoff); + ehdr->e_shoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_shoff); + ehdr->e_flags = elf32_to_cpu(ehdr, buf_ehdr->e_flags); + ehdr->e_phentsize = elf16_to_cpu(ehdr, buf_ehdr->e_phentsize); + ehdr->e_phnum = elf16_to_cpu(ehdr, buf_ehdr->e_phnum); + ehdr->e_shentsize = elf16_to_cpu(ehdr, buf_ehdr->e_shentsize); + ehdr->e_shnum = elf16_to_cpu(ehdr, buf_ehdr->e_shnum); + ehdr->e_shstrndx = elf16_to_cpu(ehdr, buf_ehdr->e_shstrndx); + + return elf_is_ehdr_sane(ehdr, len) ? 0 : -ENOEXEC; +} + +/** + * elf_is_phdr_sane - check that it is safe to use the program header + * @buf_len: size of the buffer in which the ELF file is loaded. + */ +static bool elf_is_phdr_sane(const struct elf_phdr *phdr, size_t buf_len) +{ + + if (phdr->p_offset + phdr->p_filesz < phdr->p_offset) { + pr_debug("ELF segment location wraps around.\n"); + return false; + } else if (phdr->p_offset + phdr->p_filesz > buf_len) { + pr_debug("ELF segment not in file.\n"); + return false; + } else if (phdr->p_paddr + phdr->p_memsz < phdr->p_paddr) { + pr_debug("ELF segment address wraps around.\n"); + return false; + } + + return true; +} + +static int elf_read_phdr(const char *buf, size_t len, struct elf_info *elf_info, + int idx) +{ + /* Override the const in proghdrs, we are the ones doing the loading. */ + struct elf_phdr *phdr = (struct elf_phdr *) &elf_info->proghdrs[idx]; + const char *pbuf; + struct elf_phdr *buf_phdr; + + pbuf = buf + elf_info->ehdr->e_phoff + (idx * sizeof(*buf_phdr)); + buf_phdr = (struct elf_phdr *) pbuf; + + phdr->p_type = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_type); + phdr->p_offset = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_offset); + phdr->p_paddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_paddr); + phdr->p_vaddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_vaddr); + phdr->p_flags = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_flags); + + /* + * The following fields have a type equivalent to Elf_Addr + * both in 32 bit and 64 bit ELF. + */ + phdr->p_filesz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_filesz); + phdr->p_memsz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_memsz); + phdr->p_align = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_align); + + return elf_is_phdr_sane(phdr, len) ? 0 : -ENOEXEC; +} + +/** + * elf_read_phdrs - read the program headers from the buffer + * + * This function assumes that the program header table was checked for sanity. + * Use elf_is_ehdr_sane() if it wasn't. + */ +static int elf_read_phdrs(const char *buf, size_t len, + struct elf_info *elf_info) +{ + size_t phdr_size, i; + const struct elfhdr *ehdr = elf_info->ehdr; + + /* + * e_phnum is at most 65535 so calculating the size of the + * program header cannot overflow. + */ + phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum; + + elf_info->proghdrs = kzalloc(phdr_size, GFP_KERNEL); + if (!elf_info->proghdrs) + return -ENOMEM; + + for (i = 0; i < ehdr->e_phnum; i++) { + int ret; + + ret = elf_read_phdr(buf, len, elf_info, i); + if (ret) { + kfree(elf_info->proghdrs); + elf_info->proghdrs = NULL; + return ret; + } + } + + return 0; +} + +/** + * elf_is_shdr_sane - check that it is safe to use the section header + * @buf_len: size of the buffer in which the ELF file is loaded. + */ +static bool elf_is_shdr_sane(const struct elf_shdr *shdr, size_t buf_len) +{ + bool size_ok; + + /* SHT_NULL headers have undefined values, so we can't check them. */ + if (shdr->sh_type == SHT_NULL) + return true; + + /* Now verify sh_entsize */ + switch (shdr->sh_type) { + case SHT_SYMTAB: + size_ok = shdr->sh_entsize == sizeof(Elf_Sym); + break; + case SHT_RELA: + size_ok = shdr->sh_entsize == sizeof(Elf_Rela); + break; + case SHT_DYNAMIC: + size_ok = shdr->sh_entsize == sizeof(Elf_Dyn); + break; + case SHT_REL: + size_ok = shdr->sh_entsize == sizeof(Elf_Rel); + break; + case SHT_NOTE: + case SHT_PROGBITS: + case SHT_HASH: + case SHT_NOBITS: + default: + /* + * This is a section whose entsize requirements + * I don't care about. If I don't know about + * the section I can't care about it's entsize + * requirements. + */ + size_ok = true; + break; + } + + if (!size_ok) { + pr_debug("ELF section with wrong entry size.\n"); + return false; + } else if (shdr->sh_addr + shdr->sh_size < shdr->sh_addr) { + pr_debug("ELF section address wraps around.\n"); + return false; + } + + if (shdr->sh_type != SHT_NOBITS) { + if (shdr->sh_offset + shdr->sh_size < shdr->sh_offset) { + pr_debug("ELF section location wraps around.\n"); + return false; + } else if (shdr->sh_offset + shdr->sh_size > buf_len) { + pr_debug("ELF section not in file.\n"); + return false; + } + } + + return true; +} + +static int elf_read_shdr(const char *buf, size_t len, struct elf_info *elf_info, + int idx) +{ + struct elf_shdr *shdr = &elf_info->sechdrs[idx]; + const struct elfhdr *ehdr = elf_info->ehdr; + const char *sbuf; + struct elf_shdr *buf_shdr; + + sbuf = buf + ehdr->e_shoff + idx * sizeof(*buf_shdr); + buf_shdr = (struct elf_shdr *) sbuf; + + shdr->sh_name = elf32_to_cpu(ehdr, buf_shdr->sh_name); + shdr->sh_type = elf32_to_cpu(ehdr, buf_shdr->sh_type); + shdr->sh_addr = elf_addr_to_cpu(ehdr, buf_shdr->sh_addr); + shdr->sh_offset = elf_addr_to_cpu(ehdr, buf_shdr->sh_offset); + shdr->sh_link = elf32_to_cpu(ehdr, buf_shdr->sh_link); + shdr->sh_info = elf32_to_cpu(ehdr, buf_shdr->sh_info); + + /* + * The following fields have a type equivalent to Elf_Addr + * both in 32 bit and 64 bit ELF. + */ + shdr->sh_flags = elf_addr_to_cpu(ehdr, buf_shdr->sh_flags); + shdr->sh_size = elf_addr_to_cpu(ehdr, buf_shdr->sh_size); + shdr->sh_addralign = elf_addr_to_cpu(ehdr, buf_shdr->sh_addralign); + shdr->sh_entsize = elf_addr_to_cpu(ehdr, buf_shdr->sh_entsize); + + return elf_is_shdr_sane(shdr, len) ? 0 : -ENOEXEC; +} + +/** + * elf_read_shdrs - read the section headers from the buffer + * + * This function assumes that the section header table was checked for sanity. + * Use elf_is_ehdr_sane() if it wasn't. + */ +static int elf_read_shdrs(const char *buf, size_t len, + struct elf_info *elf_info) +{ + size_t shdr_size, i; + + /* + * e_shnum is at most 65536 so calculating + * the size of the section header cannot overflow. + */ + shdr_size = sizeof(struct elf_shdr) * elf_info->ehdr->e_shnum; + + elf_info->sechdrs = kzalloc(shdr_size, GFP_KERNEL); + if (!elf_info->sechdrs) + return -ENOMEM; + + for (i = 0; i < elf_info->ehdr->e_shnum; i++) { + int ret; + + ret = elf_read_shdr(buf, len, elf_info, i); + if (ret) { + kfree(elf_info->sechdrs); + elf_info->sechdrs = NULL; + return ret; + } + } + + return 0; +} + +/** + * elf_read_from_buffer - read ELF file and sets up ELF header and ELF info + * @buf: Buffer to read ELF file from. + * @len: Size of @buf. + * @ehdr: Pointer to existing struct which will be populated. + * @elf_info: Pointer to existing struct which will be populated. + * + * This function allows reading ELF files with different byte order than + * the kernel, byte-swapping the fields as needed. + * + * Return: + * On success returns 0, and the caller should call elf_free_info(elf_info) to + * free the memory allocated for the section and program headers. + */ +int elf_read_from_buffer(const char *buf, size_t len, struct elfhdr *ehdr, + struct elf_info *elf_info) +{ + int ret; + + ret = elf_read_ehdr(buf, len, ehdr); + if (ret) + return ret; + + elf_info->buffer = buf; + elf_info->ehdr = ehdr; + if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) { + ret = elf_read_phdrs(buf, len, elf_info); + if (ret) + return ret; + } + if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) { + ret = elf_read_shdrs(buf, len, elf_info); + if (ret) { + kfree(elf_info->proghdrs); + return ret; + } + } + + return 0; +} + +/** + * elf_free_info - free memory allocated by elf_read_from_buffer + */ +void elf_free_info(struct elf_info *elf_info) +{ + kfree(elf_info->proghdrs); + kfree(elf_info->sechdrs); + memset(elf_info, 0, sizeof(*elf_info)); +} +/** + * build_elf_exec_info - read ELF executable and check that we can use it + */ +static int build_elf_exec_info(const char *buf, size_t len, struct elfhdr *ehdr, + struct elf_info *elf_info) +{ + int i; + int ret; + + ret = elf_read_from_buffer(buf, len, ehdr, elf_info); + if (ret) + return ret; + + /* Big endian vmlinux has type ET_DYN. */ + if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) { + pr_err("Not an ELF executable.\n"); + goto error; + } else if (!elf_info->proghdrs) { + pr_err("No ELF program header.\n"); + goto error; + } + + for (i = 0; i < ehdr->e_phnum; i++) { + /* + * Kexec does not support loading interpreters. + * In addition this check keeps us from attempting + * to kexec ordinay executables. + */ + if (elf_info->proghdrs[i].p_type == PT_INTERP) { + pr_err("Requires an ELF interpreter.\n"); + goto error; + } + } + + return 0; +error: + elf_free_info(elf_info); + return -ENOEXEC; +} + +static int elf64_probe(const char *buf, unsigned long len) +{ + struct elfhdr ehdr; + struct elf_info elf_info; + int ret; + + ret = build_elf_exec_info(buf, len, &ehdr, &elf_info); + if (ret) + return ret; + + elf_free_info(&elf_info); + + return elf_check_arch(&ehdr) ? 0 : -ENOEXEC; +} + +/** + * elf_exec_load - load ELF executable image + * @lowest_load_addr: On return, will be the address where the first PT_LOAD + * section will be loaded in memory. + * + * Return: + * 0 on success, negative value on failure. + */ +static int elf_exec_load(struct kimage *image, struct elfhdr *ehdr, + struct elf_info *elf_info, + unsigned long *lowest_load_addr) +{ + unsigned long base = 0, lowest_addr = UINT_MAX; + int ret; + size_t i; + struct kexec_buf kbuf = { .image = image, .buf_max = ppc64_rma_size, + .top_down = false }; + + /* Read in the PT_LOAD segments. */ + for (i = 0; i < ehdr->e_phnum; i++) { + unsigned long load_addr; + size_t size; + const struct elf_phdr *phdr; + + phdr = &elf_info->proghdrs[i]; + if (phdr->p_type != PT_LOAD) + continue; + + size = phdr->p_filesz; + if (size > phdr->p_memsz) + size = phdr->p_memsz; + + kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset; + kbuf.bufsz = size; + kbuf.memsz = phdr->p_memsz; + kbuf.buf_align = phdr->p_align; + kbuf.buf_min = phdr->p_paddr + base; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + load_addr = kbuf.mem; + + if (load_addr < lowest_addr) + lowest_addr = load_addr; + } + + /* Update entry point to reflect new load address. */ + ehdr->e_entry += base; + + *lowest_load_addr = lowest_addr; + ret = 0; + out: + return ret; +} + +static void *elf64_load(struct kimage *image, char *kernel_buf, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len) +{ + int ret; + unsigned int fdt_size; + unsigned long kernel_load_addr, purgatory_load_addr; + unsigned long initrd_load_addr = 0, fdt_load_addr; + void *fdt; + const void *slave_code; + struct elfhdr ehdr; + struct elf_info elf_info; + struct kexec_buf kbuf = { .image = image, .buf_min = 0, + .buf_max = ppc64_rma_size }; + + ret = build_elf_exec_info(kernel_buf, kernel_len, &ehdr, &elf_info); + if (ret) + goto out; + + ret = elf_exec_load(image, &ehdr, &elf_info, &kernel_load_addr); + if (ret) + goto out; + + pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr); + + ret = kexec_load_purgatory(image, 0, ppc64_rma_size, true, + &purgatory_load_addr); + if (ret) { + pr_err("Loading purgatory failed.\n"); + goto out; + } + + pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr); + + if (initrd != NULL) { + kbuf.buffer = initrd; + kbuf.bufsz = kbuf.memsz = initrd_len; + kbuf.buf_align = PAGE_SIZE; + kbuf.top_down = false; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + initrd_load_addr = kbuf.mem; + + pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr); + } + + fdt_size = fdt_totalsize(initial_boot_params) * 2; + fdt = kmalloc(fdt_size, GFP_KERNEL); + if (!fdt) { + pr_err("Not enough memory for the device tree.\n"); + ret = -ENOMEM; + goto out; + } + ret = fdt_open_into(initial_boot_params, fdt, fdt_size); + if (ret < 0) { + pr_err("Error setting up the new device tree.\n"); + ret = -EINVAL; + goto out; + } + + ret = setup_new_fdt(fdt, initrd_load_addr, initrd_len, cmdline); + if (ret) + goto out; + + fdt_pack(fdt); + + kbuf.buffer = fdt; + kbuf.bufsz = kbuf.memsz = fdt_size; + kbuf.buf_align = PAGE_SIZE; + kbuf.top_down = true; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + fdt_load_addr = kbuf.mem; + + pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr); + + slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset; + ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr, + fdt_load_addr); + if (ret) + pr_err("Error setting up the purgatory.\n"); + +out: + elf_free_info(&elf_info); + + /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */ + return ret ? ERR_PTR(ret) : fdt; +} + +struct kexec_file_ops kexec_elf64_ops = { + .probe = elf64_probe, + .load = elf64_load, +}; diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c new file mode 100644 index 000000000000..7abc8a75ee48 --- /dev/null +++ b/arch/powerpc/kernel/machine_kexec_file_64.c @@ -0,0 +1,338 @@ +/* + * ppc64 code to implement the kexec_file_load syscall + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * Copyright (C) 2016 IBM Corporation + * + * Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c. + * Heavily modified for the kernel by + * Thiago Jung Bauermann . + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include + +#define SLAVE_CODE_SIZE 256 + +static struct kexec_file_ops *kexec_file_loaders[] = { + &kexec_elf64_ops, +}; + +int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len) +{ + int i, ret = -ENOEXEC; + struct kexec_file_ops *fops; + + /* We don't support crash kernels yet. */ + if (image->type == KEXEC_TYPE_CRASH) + return -ENOTSUPP; + + for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) { + fops = kexec_file_loaders[i]; + if (!fops || !fops->probe) + continue; + + ret = fops->probe(buf, buf_len); + if (!ret) { + image->fops = fops; + return ret; + } + } + + return ret; +} + +void *arch_kexec_kernel_image_load(struct kimage *image) +{ + if (!image->fops || !image->fops->load) + return ERR_PTR(-ENOEXEC); + + return image->fops->load(image, image->kernel_buf, + image->kernel_buf_len, image->initrd_buf, + image->initrd_buf_len, image->cmdline_buf, + image->cmdline_buf_len); +} + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + if (!image->fops || !image->fops->cleanup) + return 0; + + return image->fops->cleanup(image->image_loader_data); +} + +/** + * arch_kexec_walk_mem - call func(data) for each unreserved memory block + * @kbuf: Context info for the search. Also passed to @func. + * @func: Function to call for each memory block. + * + * This function is used by kexec_add_buffer and kexec_locate_mem_hole + * to find unreserved memory to load kexec segments into. + * + * Return: The memory walk will stop when func returns a non-zero value + * and that value will be returned. If all free regions are visited without + * func returning non-zero, then zero will be returned. + */ +int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *)) +{ + int ret = 0; + u64 i; + phys_addr_t mstart, mend; + + if (kbuf->top_down) { + for_each_free_mem_range_reverse(i, NUMA_NO_NODE, 0, + &mstart, &mend, NULL) { + /* + * In memblock, end points to the first byte after the + * range while in kexec, end points to the last byte + * in the range. + */ + ret = func(mstart, mend - 1, kbuf); + if (ret) + break; + } + } else { + for_each_free_mem_range(i, NUMA_NO_NODE, 0, &mstart, &mend, + NULL) { + /* + * In memblock, end points to the first byte after the + * range while in kexec, end points to the last byte + * in the range. + */ + ret = func(mstart, mend - 1, kbuf); + if (ret) + break; + } + } + + return ret; +} + +/** + * setup_purgatory - initialize the purgatory's global variables + * @image: kexec image. + * @slave_code: Slave code for the purgatory. + * @fdt: Flattened device tree for the next kernel. + * @kernel_load_addr: Address where the kernel is loaded. + * @fdt_load_addr: Address where the flattened device tree is loaded. + * + * Return: 0 on success, or negative errno on error. + */ +int setup_purgatory(struct kimage *image, const void *slave_code, + const void *fdt, unsigned long kernel_load_addr, + unsigned long fdt_load_addr) +{ + unsigned int *slave_code_buf, master_entry; + int ret; + + slave_code_buf = kmalloc(SLAVE_CODE_SIZE, GFP_KERNEL); + if (!slave_code_buf) + return -ENOMEM; + + /* Get the slave code from the new kernel and put it in purgatory. */ + ret = kexec_purgatory_get_set_symbol(image, "purgatory_start", + slave_code_buf, SLAVE_CODE_SIZE, + true); + if (ret) { + kfree(slave_code_buf); + return ret; + } + + master_entry = slave_code_buf[0]; + memcpy(slave_code_buf, slave_code, SLAVE_CODE_SIZE); + slave_code_buf[0] = master_entry; + ret = kexec_purgatory_get_set_symbol(image, "purgatory_start", + slave_code_buf, SLAVE_CODE_SIZE, + false); + kfree(slave_code_buf); + + ret = kexec_purgatory_get_set_symbol(image, "kernel", &kernel_load_addr, + sizeof(kernel_load_addr), false); + if (ret) + return ret; + ret = kexec_purgatory_get_set_symbol(image, "dt_offset", &fdt_load_addr, + sizeof(fdt_load_addr), false); + if (ret) + return ret; + + return 0; +} + +/** + * delete_fdt_mem_rsv - delete memory reservation with given address and size + * + * Return: 0 on success, or negative errno on error. + */ +static int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size) +{ + int i, ret, num_rsvs = fdt_num_mem_rsv(fdt); + + for (i = 0; i < num_rsvs; i++) { + uint64_t rsv_start, rsv_size; + + ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size); + if (ret) { + pr_err("Malformed device tree.\n"); + return -EINVAL; + } + + if (rsv_start == start && rsv_size == size) { + ret = fdt_del_mem_rsv(fdt, i); + if (ret) { + pr_err("Error deleting device tree reservation.\n"); + return -EINVAL; + } + + return 0; + } + } + + return -ENOENT; +} + +/* + * setup_new_fdt - modify /chosen and memory reservation for the next kernel + * @fdt: Flattened device tree for the next kernel. + * @initrd_load_addr: Address where the next initrd will be loaded. + * @initrd_len: Size of the next initrd, or 0 if there will be none. + * @cmdline: Command line for the next kernel, or NULL if there will + * be none. + * + * Return: 0 on success, or negative errno on error. + */ +int setup_new_fdt(void *fdt, unsigned long initrd_load_addr, + unsigned long initrd_len, const char *cmdline) +{ + int ret, chosen_node; + const void *prop; + + /* Remove memory reservation for the current device tree. */ + ret = delete_fdt_mem_rsv(fdt, __pa(initial_boot_params), + fdt_totalsize(initial_boot_params)); + if (ret == 0) + pr_debug("Removed old device tree reservation.\n"); + else if (ret != -ENOENT) + return ret; + + chosen_node = fdt_path_offset(fdt, "/chosen"); + if (chosen_node == -FDT_ERR_NOTFOUND) { + chosen_node = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), + "chosen"); + if (chosen_node < 0) { + pr_err("Error creating /chosen.\n"); + return -EINVAL; + } + } else if (chosen_node < 0) { + pr_err("Malformed device tree: error reading /chosen.\n"); + return -EINVAL; + } + + /* Did we boot using an initrd? */ + prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL); + if (prop) { + uint64_t tmp_start, tmp_end, tmp_size; + + tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop)); + + prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL); + if (!prop) { + pr_err("Malformed device tree.\n"); + return -EINVAL; + } + tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop)); + + /* + * kexec reserves exact initrd size, while firmware may + * reserve a multiple of PAGE_SIZE, so check for both. + */ + tmp_size = tmp_end - tmp_start; + ret = delete_fdt_mem_rsv(fdt, tmp_start, tmp_size); + if (ret == -ENOENT) + ret = delete_fdt_mem_rsv(fdt, tmp_start, + round_up(tmp_size, PAGE_SIZE)); + if (ret == 0) + pr_debug("Removed old initrd reservation.\n"); + else if (ret != -ENOENT) + return ret; + + /* If there's no new initrd, delete the old initrd's info. */ + if (initrd_len == 0) { + ret = fdt_delprop(fdt, chosen_node, + "linux,initrd-start"); + if (ret) { + pr_err("Error deleting linux,initrd-start.\n"); + return -EINVAL; + } + + ret = fdt_delprop(fdt, chosen_node, "linux,initrd-end"); + if (ret) { + pr_err("Error deleting linux,initrd-end.\n"); + return -EINVAL; + } + } + } + + if (initrd_len) { + ret = fdt_setprop_u64(fdt, chosen_node, + "linux,initrd-start", + initrd_load_addr); + if (ret < 0) { + pr_err("Error setting up the new device tree.\n"); + return -EINVAL; + } + + /* initrd-end is the first address after the initrd image. */ + ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end", + initrd_load_addr + initrd_len); + if (ret < 0) { + pr_err("Error setting up the new device tree.\n"); + return -EINVAL; + } + + ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len); + if (ret) { + pr_err("Error reserving initrd memory: %s\n", + fdt_strerror(ret)); + return -EINVAL; + } + } + + if (cmdline != NULL) { + ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline); + if (ret < 0) { + pr_err("Error setting up the new device tree.\n"); + return -EINVAL; + } + } else { + ret = fdt_delprop(fdt, chosen_node, "bootargs"); + if (ret && ret != -FDT_ERR_NOTFOUND) { + pr_err("Error deleting bootargs.\n"); + return -EINVAL; + } + } + + ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0); + if (ret) { + pr_err("Error setting up the new device tree.\n"); + return -EINVAL; + } + + return 0; +} -- cgit v1.3.1 From 80f60e509a03ff9ff2bdbf9cd1e935c6360b8bd9 Mon Sep 17 00:00:00 2001 From: Thiago Jung Bauermann Date: Tue, 29 Nov 2016 23:45:53 +1100 Subject: powerpc/kexec: Enable kexec_file_load() syscall Define the Kconfig symbol so that the kexec_file_load() code can be built, and wire up the syscall so that it can be called. Signed-off-by: Thiago Jung Bauermann Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 13 +++++++++++++ arch/powerpc/include/asm/systbl.h | 1 + arch/powerpc/include/asm/unistd.h | 2 +- arch/powerpc/include/uapi/asm/unistd.h | 1 + 4 files changed, 16 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a063c7235fa9..48923c349ccb 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -465,6 +465,19 @@ config KEXEC interface is strongly in flux, so no good recommendation can be made. +config KEXEC_FILE + bool "kexec file based system call" + select KEXEC_CORE + select BUILD_BIN2C + depends on PPC64 + depends on CRYPTO=y + depends on CRYPTO_SHA256=y + help + This is a new version of the kexec system call. This call is + file based and takes in file descriptors as system call arguments + for kernel and initramfs as opposed to a list of segments as is the + case for the older kexec call. + config RELOCATABLE bool "Build a relocatable kernel" depends on (PPC64 && !COMPILE_TEST) || (FLATMEM && (44x || FSL_BOOKE)) diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 2fc5d4db503c..4b369d83fe9c 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -386,3 +386,4 @@ SYSCALL(mlock2) SYSCALL(copy_file_range) COMPAT_SYS_SPU(preadv2) COMPAT_SYS_SPU(pwritev2) +SYSCALL(kexec_file_load) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index e8cdfec8d512..eb1acee91a20 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include -#define NR_syscalls 382 +#define NR_syscalls 383 #define __NR__exit __NR_exit diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index e9f5f41aa55a..2f26335a3c42 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -392,5 +392,6 @@ #define __NR_copy_file_range 379 #define __NR_preadv2 380 #define __NR_pwritev2 381 +#define __NR_kexec_file_load 382 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ -- cgit v1.3.1 From 53ce299615e587e900548eb6b384c3081b71bbfa Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 8 Nov 2016 17:08:06 +1100 Subject: powerpc/pseries: add definitions for new H_SIGNAL_SYS_RESET hcall This has not made its way to a PAPR release yet, but we have an hcall number assigned. H_SIGNAL_SYS_RESET = 0x380 Syntax: hcall(uint64 H_SIGNAL_SYS_RESET, int64 target); Generate a system reset NMI on the threads indicated by target. Values for target: -1 = target all online threads including the caller -2 = target all online threads except for the caller All other negative values: reserved Positive values: The thread to be targeted, obtained from the value of the "ibm,ppc-interrupt-server#s" property of the CPU in the OF device tree. Semantics: - Invalid target: return H_Parameter. - Otherwise: Generate a system reset NMI on target thread(s), return H_Success. This will be used by crash/debug code to get stuck CPUs into a known state. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/hvcall.h | 8 +++++++- arch/powerpc/include/asm/plpar_wrappers.h | 5 +++++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 8d5f8352afd7..77ff1ba99d1f 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -276,7 +276,8 @@ #define H_GET_MPP_X 0x314 #define H_SET_MODE 0x31C #define H_CLEAR_HPT 0x358 -#define MAX_HCALL_OPCODE H_CLEAR_HPT +#define H_SIGNAL_SYS_RESET 0x380 +#define MAX_HCALL_OPCODE H_SIGNAL_SYS_RESET /* H_VIOCTL functions */ #define H_GET_VIOA_DUMP_SIZE 0x01 @@ -307,6 +308,11 @@ #define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3 #define H_SET_MODE_RESOURCE_LE 4 +/* Values for argument to H_SIGNAL_SYS_RESET */ +#define H_SIGNAL_SYS_RESET_ALL -1 +#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2 +/* >= 0 values are CPU number */ + #ifndef __ASSEMBLY__ /** diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 034a588b122c..0bcc75e295e3 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -308,4 +308,9 @@ static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawr return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0); } +static inline long plapr_signal_sys_reset(long cpu) +{ + return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu); +} + #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ -- cgit v1.3.1 From 76ffb5785047b3924da20969eb3f658b363c20f0 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 18 Nov 2016 23:15:42 +1100 Subject: powerpc/prom: Switch to using structs for ibm_architecture_vec Now that we've defined structures to describe each of the client architecture vectors, we can use those to construct the value we pass to firmware. This avoids the tricks we previously played with the W() macro, allows us to properly endian annotate fields, and should help to avoid bugs introduced by failing to have the correct number of zero pad bytes between fields. It also means we can avoid hard coding IBM_ARCH_VEC_NRCORES_OFFSET in order to update the max_cpus value and instead just set it. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/prom.h | 6 - arch/powerpc/kernel/prom_init.c | 274 ++++++++++++++++++++++------------------ 2 files changed, 154 insertions(+), 126 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 7f436ba1b56f..5e57705b4759 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -159,11 +159,5 @@ struct of_drconf_cell { /* Option Vector 6: IBM PAPR hints */ #define OV6_LINUX 0x02 /* Linux is our OS */ -/* - * The architecture vector has an array of PVR mask/value pairs, - * followed by # option vectors - 1, followed by the option vectors. - */ -extern unsigned char ibm_architecture_vec[]; - #endif /* __KERNEL__ */ #endif /* _POWERPC_PROM_H */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index e41c0b6637ab..ec47a939cbdd 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -635,13 +635,7 @@ static void __init early_cmdline_parse(void) * * See prom.h for the definition of the bits specified in the * architecture vector. - * - * Because the description vector contains a mix of byte and word - * values, we declare it as an unsigned char array, and use this - * macro to put word values in. */ -#define W(x) ((x) >> 24) & 0xff, ((x) >> 16) & 0xff, \ - ((x) >> 8) & 0xff, (x) & 0xff /* Firmware expects the value to be n - 1, where n is the # of vectors */ #define NUM_VECTORS(n) ((n) - 1) @@ -652,94 +646,6 @@ static void __init early_cmdline_parse(void) */ #define VECTOR_LENGTH(n) (1 + (n) - 2) -unsigned char ibm_architecture_vec[] = { - W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */ - W(0xffff0000), W(0x003e0000), /* POWER6 */ - W(0xffff0000), W(0x003f0000), /* POWER7 */ - W(0xffff0000), W(0x004b0000), /* POWER8E */ - W(0xffff0000), W(0x004c0000), /* POWER8NVL */ - W(0xffff0000), W(0x004d0000), /* POWER8 */ - W(0xffffffff), W(0x0f000004), /* all 2.07-compliant */ - W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */ - W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */ - W(0xfffffffe), W(0x0f000001), /* all 2.04-compliant and earlier */ - NUM_VECTORS(6), /* 6 option vectors */ - - /* option vector 1: processor architectures supported */ - VECTOR_LENGTH(2), /* length */ - 0, /* don't ignore, don't halt */ - OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 | - OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07, - - /* option vector 2: Open Firmware options supported */ - VECTOR_LENGTH(33), /* length */ - OV2_REAL_MODE, - 0, 0, - W(0xffffffff), /* real_base */ - W(0xffffffff), /* real_size */ - W(0xffffffff), /* virt_base */ - W(0xffffffff), /* virt_size */ - W(0xffffffff), /* load_base */ - W(256), /* 256MB min RMA */ - W(0xffffffff), /* full client load */ - 0, /* min RMA percentage of total RAM */ - 48, /* max log_2(hash table size) */ - - /* option vector 3: processor options supported */ - VECTOR_LENGTH(2), /* length */ - 0, /* don't ignore, don't halt */ - OV3_FP | OV3_VMX | OV3_DFP, - - /* option vector 4: IBM PAPR implementation */ - VECTOR_LENGTH(2), /* length */ - 0, /* don't halt */ - OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */ - - /* option vector 5: PAPR/OF options */ - VECTOR_LENGTH(21), /* length */ - 0, /* don't ignore, don't halt */ - OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) | - OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) | -#ifdef CONFIG_PCI_MSI - /* PCIe/MSI support. Without MSI full PCIe is not supported */ - OV5_FEAT(OV5_MSI), -#else - 0, -#endif - 0, -#ifdef CONFIG_PPC_SMLPAR - OV5_FEAT(OV5_CMO) | OV5_FEAT(OV5_XCMO), -#else - 0, -#endif - OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN), - 0, - 0, - 0, - /* WARNING: The offset of the "number of cores" field below - * must match by the macro below. Update the definition if - * the structure layout changes. - */ -#define IBM_ARCH_VEC_NRCORES_OFFSET 133 - W(NR_CPUS), /* number of cores supported */ - 0, - 0, - 0, - 0, - OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) | - OV5_FEAT(OV5_PFO_HW_842), /* Byte 17 */ - 0, /* Byte 18 */ - 0, /* Byte 19 */ - 0, /* Byte 20 */ - OV5_FEAT(OV5_SUB_PROCESSORS), /* Byte 21 */ - - /* option vector 6: IBM PAPR hints */ - VECTOR_LENGTH(3), /* length */ - 0, - 0, - OV6_LINUX, -}; - struct option_vector1 { u8 byte1; u8 arch_versions; @@ -793,6 +699,154 @@ struct option_vector6 { u8 os_name; } __packed; +struct ibm_arch_vec { + struct { u32 mask, val; } pvrs[10]; + + u8 num_vectors; + + u8 vec1_len; + struct option_vector1 vec1; + + u8 vec2_len; + struct option_vector2 vec2; + + u8 vec3_len; + struct option_vector3 vec3; + + u8 vec4_len; + struct option_vector4 vec4; + + u8 vec5_len; + struct option_vector5 vec5; + + u8 vec6_len; + struct option_vector6 vec6; +} __packed; + +struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { + .pvrs = { + { + .mask = cpu_to_be32(0xfffe0000), /* POWER5/POWER5+ */ + .val = cpu_to_be32(0x003a0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER6 */ + .val = cpu_to_be32(0x003e0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER7 */ + .val = cpu_to_be32(0x003f0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER8E */ + .val = cpu_to_be32(0x004b0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER8NVL */ + .val = cpu_to_be32(0x004c0000), + }, + { + .mask = cpu_to_be32(0xffff0000), /* POWER8 */ + .val = cpu_to_be32(0x004d0000), + }, + { + .mask = cpu_to_be32(0xffffffff), /* all 2.07-compliant */ + .val = cpu_to_be32(0x0f000004), + }, + { + .mask = cpu_to_be32(0xffffffff), /* all 2.06-compliant */ + .val = cpu_to_be32(0x0f000003), + }, + { + .mask = cpu_to_be32(0xffffffff), /* all 2.05-compliant */ + .val = cpu_to_be32(0x0f000002), + }, + { + .mask = cpu_to_be32(0xfffffffe), /* all 2.04-compliant and earlier */ + .val = cpu_to_be32(0x0f000001), + }, + }, + + .num_vectors = NUM_VECTORS(6), + + .vec1_len = VECTOR_LENGTH(sizeof(struct option_vector1)), + .vec1 = { + .byte1 = 0, + .arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 | + OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07, + }, + + .vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)), + /* option vector 2: Open Firmware options supported */ + .vec2 = { + .byte1 = OV2_REAL_MODE, + .reserved = 0, + .real_base = cpu_to_be32(0xffffffff), + .real_size = cpu_to_be32(0xffffffff), + .virt_base = cpu_to_be32(0xffffffff), + .virt_size = cpu_to_be32(0xffffffff), + .load_base = cpu_to_be32(0xffffffff), + .min_rma = cpu_to_be32(256), /* 256MB min RMA */ + .min_load = cpu_to_be32(0xffffffff), /* full client load */ + .min_rma_percent = 0, /* min RMA percentage of total RAM */ + .max_pft_size = 48, /* max log_2(hash table size) */ + }, + + .vec3_len = VECTOR_LENGTH(sizeof(struct option_vector3)), + /* option vector 3: processor options supported */ + .vec3 = { + .byte1 = 0, /* don't ignore, don't halt */ + .byte2 = OV3_FP | OV3_VMX | OV3_DFP, + }, + + .vec4_len = VECTOR_LENGTH(sizeof(struct option_vector4)), + /* option vector 4: IBM PAPR implementation */ + .vec4 = { + .byte1 = 0, /* don't halt */ + .min_vp_cap = OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */ + }, + + .vec5_len = VECTOR_LENGTH(sizeof(struct option_vector5)), + /* option vector 5: PAPR/OF options */ + .vec5 = { + .byte1 = 0, /* don't ignore, don't halt */ + .byte2 = OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) | + OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) | +#ifdef CONFIG_PCI_MSI + /* PCIe/MSI support. Without MSI full PCIe is not supported */ + OV5_FEAT(OV5_MSI), +#else + 0, +#endif + .byte3 = 0, + .cmo = +#ifdef CONFIG_PPC_SMLPAR + OV5_FEAT(OV5_CMO) | OV5_FEAT(OV5_XCMO), +#else + 0, +#endif + .associativity = OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN), + .bin_opts = 0, + .micro_checkpoint = 0, + .reserved0 = 0, + .max_cpus = cpu_to_be32(NR_CPUS), /* number of cores supported */ + .papr_level = 0, + .reserved1 = 0, + .platform_facilities = OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) | OV5_FEAT(OV5_PFO_HW_842), + .reserved2 = 0, + .reserved3 = 0, + .subprocessors = 1, + }, + + /* option vector 6: IBM PAPR hints */ + .vec6_len = VECTOR_LENGTH(sizeof(struct option_vector6)), + .vec6 = { + .reserved = 0, + .secondary_pteg = 0, + .os_name = OV6_LINUX, + }, +}; + /* Old method - ELF header with PT_NOTE sections only works on BE */ #ifdef __BIG_ENDIAN__ static struct fake_elf { @@ -926,7 +980,6 @@ static void __init prom_send_capabilities(void) ihandle root; prom_arg_t ret; u32 cores; - unsigned char *ptcores; root = call_prom("open", 1, 1, ADDR("/")); if (root != 0) { @@ -937,37 +990,18 @@ static void __init prom_send_capabilities(void) * divide NR_CPUS. */ - /* The core value may start at an odd address. If such a word - * access is made at a cache line boundary, this leads to an - * exception which may not be handled at this time. - * Forcing a per byte access to avoid exception. - */ - ptcores = &ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]; - cores = 0; - cores |= ptcores[0] << 24; - cores |= ptcores[1] << 16; - cores |= ptcores[2] << 8; - cores |= ptcores[3]; - if (cores != NR_CPUS) { - prom_printf("WARNING ! " - "ibm_architecture_vec structure inconsistent: %lu!\n", - cores); - } else { - cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()); - prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n", - cores, NR_CPUS); - ptcores[0] = (cores >> 24) & 0xff; - ptcores[1] = (cores >> 16) & 0xff; - ptcores[2] = (cores >> 8) & 0xff; - ptcores[3] = cores & 0xff; - } + cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()); + prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n", + cores, NR_CPUS); + + ibm_architecture_vec.vec5.max_cpus = cpu_to_be32(cores); /* try calling the ibm,client-architecture-support method */ prom_printf("Calling ibm,client-architecture-support..."); if (call_prom_ret("call-method", 3, 2, &ret, ADDR("ibm,client-architecture-support"), root, - ADDR(ibm_architecture_vec)) == 0) { + ADDR(&ibm_architecture_vec)) == 0) { /* the call exists... */ if (ret) prom_printf("\nWARNING: ibm,client-architecture" -- cgit v1.3.1 From 88f54a3581eb9deaa3bd1aade40aef266d782385 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 30 Nov 2016 17:51:59 +1100 Subject: powerpc/iommu: Pass mm_struct to init/cleanup helpers We are going to get rid of @current references in mmu_context_boos3s64.c and cache mm_struct in the VFIO container. Since mm_context_t does not have reference counting, we will be using mm_struct which does have the reference counter. This changes mm_iommu_init/mm_iommu_cleanup to receive mm_struct rather than mm_context_t (which is embedded into mm). This should not cause any behavioral change. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 4 ++-- arch/powerpc/kernel/setup-common.c | 2 +- arch/powerpc/mm/mmu_context_book3s64.c | 4 ++-- arch/powerpc/mm/mmu_context_iommu.c | 9 +++++---- 4 files changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 5c451140660a..424844bc2a57 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -23,8 +23,8 @@ extern bool mm_iommu_preregistered(void); extern long mm_iommu_get(unsigned long ua, unsigned long entries, struct mm_iommu_table_group_mem_t **pmem); extern long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem); -extern void mm_iommu_init(mm_context_t *ctx); -extern void mm_iommu_cleanup(mm_context_t *ctx); +extern void mm_iommu_init(struct mm_struct *mm); +extern void mm_iommu_cleanup(struct mm_struct *mm); extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, unsigned long size); extern struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua, diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 270ee30abdcf..f516ac508ae3 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -915,7 +915,7 @@ void __init setup_arch(char **cmdline_p) init_mm.context.pte_frag = NULL; #endif #ifdef CONFIG_SPAPR_TCE_IOMMU - mm_iommu_init(&init_mm.context); + mm_iommu_init(&init_mm); #endif irqstack_early_init(); exc_lvl_early_init(); diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index b114f8b93ec9..ad8273590975 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -115,7 +115,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) mm->context.pte_frag = NULL; #endif #ifdef CONFIG_SPAPR_TCE_IOMMU - mm_iommu_init(&mm->context); + mm_iommu_init(mm); #endif return 0; } @@ -160,7 +160,7 @@ static inline void destroy_pagetable_page(struct mm_struct *mm) void destroy_context(struct mm_struct *mm) { #ifdef CONFIG_SPAPR_TCE_IOMMU - mm_iommu_cleanup(&mm->context); + mm_iommu_cleanup(mm); #endif #ifdef CONFIG_PPC_ICSWX diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index e0f1c33601dd..ad2e575fd418 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -373,16 +373,17 @@ void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem) } EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec); -void mm_iommu_init(mm_context_t *ctx) +void mm_iommu_init(struct mm_struct *mm) { - INIT_LIST_HEAD_RCU(&ctx->iommu_group_mem_list); + INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list); } -void mm_iommu_cleanup(mm_context_t *ctx) +void mm_iommu_cleanup(struct mm_struct *mm) { struct mm_iommu_table_group_mem_t *mem, *tmp; - list_for_each_entry_safe(mem, tmp, &ctx->iommu_group_mem_list, next) { + list_for_each_entry_safe(mem, tmp, &mm->context.iommu_group_mem_list, + next) { list_del_rcu(&mem->next); mm_iommu_do_free(mem); } -- cgit v1.3.1 From d7baee6901b34c4895eb78efdbf13a49079d7404 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 30 Nov 2016 17:52:00 +1100 Subject: powerpc/iommu: Stop using @current in mm_iommu_xxx This changes mm_iommu_xxx helpers to take mm_struct as a parameter instead of getting it from @current which in some situations may not have a valid reference to mm. This changes helpers to receive @mm and moves all references to @current to the caller, including checks for !current and !current->mm; checks in mm_iommu_preregistered() are removed as there is no caller yet. This moves the mm_iommu_adjust_locked_vm() call to the caller as it receives mm_iommu_table_group_mem_t but it needs mm. This should cause no behavioral change. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Acked-by: Alex Williamson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 16 ++++++------ arch/powerpc/mm/mmu_context_iommu.c | 46 +++++++++++++--------------------- drivers/vfio/vfio_iommu_spapr_tce.c | 14 ++++++++--- 3 files changed, 36 insertions(+), 40 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 424844bc2a57..b9e3f0aca261 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -19,16 +19,18 @@ extern void destroy_context(struct mm_struct *mm); struct mm_iommu_table_group_mem_t; extern int isolate_lru_page(struct page *page); /* from internal.h */ -extern bool mm_iommu_preregistered(void); -extern long mm_iommu_get(unsigned long ua, unsigned long entries, +extern bool mm_iommu_preregistered(struct mm_struct *mm); +extern long mm_iommu_get(struct mm_struct *mm, + unsigned long ua, unsigned long entries, struct mm_iommu_table_group_mem_t **pmem); -extern long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem); +extern long mm_iommu_put(struct mm_struct *mm, + struct mm_iommu_table_group_mem_t *mem); extern void mm_iommu_init(struct mm_struct *mm); extern void mm_iommu_cleanup(struct mm_struct *mm); -extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, - unsigned long size); -extern struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua, - unsigned long entries); +extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, + unsigned long ua, unsigned long size); +extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, + unsigned long ua, unsigned long entries); extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, unsigned long ua, unsigned long *hpa); extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index ad2e575fd418..4c6db09e77ad 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -56,7 +56,7 @@ static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, } pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n", - current->pid, + current ? current->pid : 0, incr ? '+' : '-', npages << PAGE_SHIFT, mm->locked_vm << PAGE_SHIFT, @@ -66,12 +66,9 @@ static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, return ret; } -bool mm_iommu_preregistered(void) +bool mm_iommu_preregistered(struct mm_struct *mm) { - if (!current || !current->mm) - return false; - - return !list_empty(¤t->mm->context.iommu_group_mem_list); + return !list_empty(&mm->context.iommu_group_mem_list); } EXPORT_SYMBOL_GPL(mm_iommu_preregistered); @@ -124,19 +121,16 @@ static int mm_iommu_move_page_from_cma(struct page *page) return 0; } -long mm_iommu_get(unsigned long ua, unsigned long entries, +long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, struct mm_iommu_table_group_mem_t **pmem) { struct mm_iommu_table_group_mem_t *mem; long i, j, ret = 0, locked_entries = 0; struct page *page = NULL; - if (!current || !current->mm) - return -ESRCH; /* process exited */ - mutex_lock(&mem_list_mutex); - list_for_each_entry_rcu(mem, ¤t->mm->context.iommu_group_mem_list, + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua == ua) && (mem->entries == entries)) { ++mem->used; @@ -154,7 +148,7 @@ long mm_iommu_get(unsigned long ua, unsigned long entries, } - ret = mm_iommu_adjust_locked_vm(current->mm, entries, true); + ret = mm_iommu_adjust_locked_vm(mm, entries, true); if (ret) goto unlock_exit; @@ -215,11 +209,11 @@ populate: mem->entries = entries; *pmem = mem; - list_add_rcu(&mem->next, ¤t->mm->context.iommu_group_mem_list); + list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); unlock_exit: if (locked_entries && ret) - mm_iommu_adjust_locked_vm(current->mm, locked_entries, false); + mm_iommu_adjust_locked_vm(mm, locked_entries, false); mutex_unlock(&mem_list_mutex); @@ -264,17 +258,13 @@ static void mm_iommu_free(struct rcu_head *head) static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem) { list_del_rcu(&mem->next); - mm_iommu_adjust_locked_vm(current->mm, mem->entries, false); call_rcu(&mem->rcu, mm_iommu_free); } -long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem) +long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) { long ret = 0; - if (!current || !current->mm) - return -ESRCH; /* process exited */ - mutex_lock(&mem_list_mutex); if (mem->used == 0) { @@ -297,6 +287,8 @@ long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem) /* @mapped became 0 so now mappings are disabled, release the region */ mm_iommu_release(mem); + mm_iommu_adjust_locked_vm(mm, mem->entries, false); + unlock_exit: mutex_unlock(&mem_list_mutex); @@ -304,14 +296,12 @@ unlock_exit: } EXPORT_SYMBOL_GPL(mm_iommu_put); -struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, - unsigned long size) +struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, + unsigned long ua, unsigned long size) { struct mm_iommu_table_group_mem_t *mem, *ret = NULL; - list_for_each_entry_rcu(mem, - ¤t->mm->context.iommu_group_mem_list, - next) { + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua <= ua) && (ua + size <= mem->ua + (mem->entries << PAGE_SHIFT))) { @@ -324,14 +314,12 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, } EXPORT_SYMBOL_GPL(mm_iommu_lookup); -struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua, - unsigned long entries) +struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, + unsigned long ua, unsigned long entries) { struct mm_iommu_table_group_mem_t *mem, *ret = NULL; - list_for_each_entry_rcu(mem, - ¤t->mm->context.iommu_group_mem_list, - next) { + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua == ua) && (mem->entries == entries)) { ret = mem; break; diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 80378ddadc5c..d0c38b201267 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -107,14 +107,17 @@ static long tce_iommu_unregister_pages(struct tce_container *container, { struct mm_iommu_table_group_mem_t *mem; + if (!current || !current->mm) + return -ESRCH; /* process exited */ + if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK)) return -EINVAL; - mem = mm_iommu_find(vaddr, size >> PAGE_SHIFT); + mem = mm_iommu_find(current->mm, vaddr, size >> PAGE_SHIFT); if (!mem) return -ENOENT; - return mm_iommu_put(mem); + return mm_iommu_put(current->mm, mem); } static long tce_iommu_register_pages(struct tce_container *container, @@ -124,11 +127,14 @@ static long tce_iommu_register_pages(struct tce_container *container, struct mm_iommu_table_group_mem_t *mem = NULL; unsigned long entries = size >> PAGE_SHIFT; + if (!current || !current->mm) + return -ESRCH; /* process exited */ + if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) || ((vaddr + size) < vaddr)) return -EINVAL; - ret = mm_iommu_get(vaddr, entries, &mem); + ret = mm_iommu_get(current->mm, vaddr, entries, &mem); if (ret) return ret; @@ -375,7 +381,7 @@ static int tce_iommu_prereg_ua_to_hpa(unsigned long tce, unsigned long size, long ret = 0; struct mm_iommu_table_group_mem_t *mem; - mem = mm_iommu_lookup(tce, size); + mem = mm_iommu_lookup(current->mm, tce, size); if (!mem) return -EINVAL; -- cgit v1.3.1 From 9b081e10805cd8e356f30ded1cb2008d67af26c9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 7 Dec 2016 08:47:24 +0100 Subject: powerpc: port 64 bits pgtable_cache to 32 bits Today powerpc64 uses a set of pgtable_caches while powerpc32 uses standard pages when using 4k pages and a single pgtable_cache if using other size pages. In preparation of implementing huge pages on the 8xx, this patch replaces the specific powerpc32 handling by the 64 bits approach. This is done by: * moving 64 bits pgtable_cache_add() and pgtable_cache_init() in a new file called init-common.c * modifying pgtable_cache_init() to also handle the case without PMD * removing the 32 bits version of pgtable_cache_add() and pgtable_cache_init() * copying related header contents from 64 bits into both the book3s/32 and nohash/32 header files On the 8xx, the following cache sizes will be used: * 4k pages mode: - PGT_CACHE(10) for PGD - PGT_CACHE(3) for 512k hugepage tables * 16k pages mode: - PGT_CACHE(6) for PGD - PGT_CACHE(7) for 512k hugepage tables - PGT_CACHE(3) for 8M hugepage tables Signed-off-by: Christophe Leroy Reviewed-by: Aneesh Kumar K.V Signed-off-by: Scott Wood --- arch/powerpc/include/asm/book3s/32/pgalloc.h | 44 +++++++++-- arch/powerpc/include/asm/book3s/32/pgtable.h | 40 +++++----- arch/powerpc/include/asm/book3s/64/pgtable.h | 3 - arch/powerpc/include/asm/nohash/32/pgalloc.h | 44 +++++++++-- arch/powerpc/include/asm/nohash/32/pgtable.h | 42 +++++------ arch/powerpc/include/asm/nohash/64/pgtable.h | 2 - arch/powerpc/include/asm/pgtable.h | 2 + arch/powerpc/mm/Makefile | 3 +- arch/powerpc/mm/init-common.c | 107 +++++++++++++++++++++++++++ arch/powerpc/mm/init_64.c | 77 ------------------- arch/powerpc/mm/pgtable_32.c | 37 --------- 11 files changed, 227 insertions(+), 174 deletions(-) create mode 100644 arch/powerpc/mm/init-common.c (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index 8e21bb492dca..d310546e5d9d 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -2,14 +2,42 @@ #define _ASM_POWERPC_BOOK3S_32_PGALLOC_H #include +#include -/* For 32-bit, all levels of page tables are just drawn from get_free_page() */ -#define MAX_PGTABLE_INDEX_SIZE 0 +/* + * Functions that deal with pagetables that could be at any level of + * the table need to be passed an "index_size" so they know how to + * handle allocation. For PTE pages (which are linked to a struct + * page for now, and drawn from the main get_free_pages() pool), the + * allocation size will be (2^index_size * sizeof(pointer)) and + * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). + * + * The maximum index size needs to be big enough to allow any + * pagetable sizes we need, but small enough to fit in the low bits of + * any page table pointer. In other words all pagetables, even tiny + * ones, must be aligned to allow at least enough low 0 bits to + * contain this value. This value is also used as a mask, so it must + * be one less than a power of two. + */ +#define MAX_PGTABLE_INDEX_SIZE 0xf extern void __bad_pte(pmd_t *pmd); -extern pgd_t *pgd_alloc(struct mm_struct *mm); -extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); +extern struct kmem_cache *pgtable_cache[]; +#define PGT_CACHE(shift) ({ \ + BUG_ON(!(shift)); \ + pgtable_cache[(shift) - 1]; \ + }) + +static inline pgd_t *pgd_alloc(struct mm_struct *mm) +{ + return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL); +} + +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); +} /* * We don't have any real pmd's, and this code never triggers because @@ -68,8 +96,12 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) static inline void pgtable_free(void *table, unsigned index_size) { - BUG_ON(index_size); /* 32-bit doesn't use this */ - free_page((unsigned long)table); + if (!index_size) { + free_page((unsigned long)table); + } else { + BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE); + kmem_cache_free(PGT_CACHE(index_size), table); + } } #define check_pgt_cache() do { } while (0) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 6b8b2d57fdc8..388b0522f748 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -8,6 +8,23 @@ /* And here we include common definitions */ #include +#define PTE_INDEX_SIZE PTE_SHIFT +#define PMD_INDEX_SIZE 0 +#define PUD_INDEX_SIZE 0 +#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT) + +#define PMD_CACHE_INDEX PMD_INDEX_SIZE + +#ifndef __ASSEMBLY__ +#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) +#define PMD_TABLE_SIZE 0 +#define PUD_TABLE_SIZE 0 +#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) +#endif /* __ASSEMBLY__ */ + +#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) +#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) + /* * The normal case is that PTEs are 32-bits and we have a 1-page * 1024-entry pgdir pointing to 1-page 1024-entry PTE pages. -- paulus @@ -19,14 +36,10 @@ * -Matt */ /* PGDIR_SHIFT determines what a top-level page table entry can map */ -#define PGDIR_SHIFT (PAGE_SHIFT + PTE_SHIFT) +#define PGDIR_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define PTRS_PER_PTE (1 << PTE_SHIFT) -#define PTRS_PER_PMD 1 -#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT)) - #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) /* * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary @@ -82,12 +95,8 @@ extern unsigned long ioremap_bot; -/* - * entries per page directory level: our page-table tree is two-level, so - * we don't really have any PMD directory. - */ -#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_SHIFT) -#define PGD_TABLE_SIZE (sizeof(pgd_t) << (32 - PGDIR_SHIFT)) +/* Bits to mask out from a PGD to get to the PUD page */ +#define PGD_MASKED_BITS 0 #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \ @@ -283,15 +292,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -#ifndef CONFIG_PPC_4K_PAGES -void pgtable_cache_init(void); -#else -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) -#endif - extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp); diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 9fd77f8794a0..0a46a5f2a739 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -789,9 +789,6 @@ extern struct page *pgd_page(pgd_t pgd); #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) -void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); -void pgtable_cache_init(void); - static inline int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags) { diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h index 76d6b9e0c8a9..633139291a48 100644 --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h @@ -2,14 +2,42 @@ #define _ASM_POWERPC_PGALLOC_32_H #include +#include -/* For 32-bit, all levels of page tables are just drawn from get_free_page() */ -#define MAX_PGTABLE_INDEX_SIZE 0 +/* + * Functions that deal with pagetables that could be at any level of + * the table need to be passed an "index_size" so they know how to + * handle allocation. For PTE pages (which are linked to a struct + * page for now, and drawn from the main get_free_pages() pool), the + * allocation size will be (2^index_size * sizeof(pointer)) and + * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). + * + * The maximum index size needs to be big enough to allow any + * pagetable sizes we need, but small enough to fit in the low bits of + * any page table pointer. In other words all pagetables, even tiny + * ones, must be aligned to allow at least enough low 0 bits to + * contain this value. This value is also used as a mask, so it must + * be one less than a power of two. + */ +#define MAX_PGTABLE_INDEX_SIZE 0xf extern void __bad_pte(pmd_t *pmd); -extern pgd_t *pgd_alloc(struct mm_struct *mm); -extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); +extern struct kmem_cache *pgtable_cache[]; +#define PGT_CACHE(shift) ({ \ + BUG_ON(!(shift)); \ + pgtable_cache[(shift) - 1]; \ + }) + +static inline pgd_t *pgd_alloc(struct mm_struct *mm) +{ + return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL); +} + +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); +} /* * We don't have any real pmd's, and this code never triggers because @@ -68,8 +96,12 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) static inline void pgtable_free(void *table, unsigned index_size) { - BUG_ON(index_size); /* 32-bit doesn't use this */ - free_page((unsigned long)table); + if (!index_size) { + free_page((unsigned long)table); + } else { + BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE); + kmem_cache_free(PGT_CACHE(index_size), table); + } } #define check_pgt_cache() do { } while (0) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index c219ef7be53b..7bd916e91295 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -16,6 +16,23 @@ extern int icache_44x_need_flush; #endif /* __ASSEMBLY__ */ +#define PTE_INDEX_SIZE PTE_SHIFT +#define PMD_INDEX_SIZE 0 +#define PUD_INDEX_SIZE 0 +#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT) + +#define PMD_CACHE_INDEX PMD_INDEX_SIZE + +#ifndef __ASSEMBLY__ +#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) +#define PMD_TABLE_SIZE 0 +#define PUD_TABLE_SIZE 0 +#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) +#endif /* __ASSEMBLY__ */ + +#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) +#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) + /* * The normal case is that PTEs are 32-bits and we have a 1-page * 1024-entry pgdir pointing to 1-page 1024-entry PTE pages. -- paulus @@ -27,22 +44,12 @@ extern int icache_44x_need_flush; * -Matt */ /* PGDIR_SHIFT determines what a top-level page table entry can map */ -#define PGDIR_SHIFT (PAGE_SHIFT + PTE_SHIFT) +#define PGDIR_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -/* - * entries per page directory level: our page-table tree is two-level, so - * we don't really have any PMD directory. - */ -#ifndef __ASSEMBLY__ -#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_SHIFT) -#define PGD_TABLE_SIZE (sizeof(pgd_t) << (32 - PGDIR_SHIFT)) -#endif /* __ASSEMBLY__ */ - -#define PTRS_PER_PTE (1 << PTE_SHIFT) -#define PTRS_PER_PMD 1 -#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT)) +/* Bits to mask out from a PGD to get to the PUD page */ +#define PGD_MASKED_BITS 0 #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) #define FIRST_USER_ADDRESS 0UL @@ -328,15 +335,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -#ifndef CONFIG_PPC_4K_PAGES -void pgtable_cache_init(void); -#else -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) -#endif - extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp); diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index e9e540a804fc..6c4a14292a9e 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -346,8 +346,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) #define __swp_entry_to_pte(x) __pte((x).val) -void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); -void pgtable_cache_init(void); extern int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags); extern int __meminit vmemmap_create_mapping(unsigned long start, diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 9bd87f269d6d..dd01212935ac 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -78,6 +78,8 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned long vmalloc_to_phys(void *vmalloc_addr); +void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); +void pgtable_cache_init(void); #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PGTABLE_H */ diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index f4ffe1f68ce9..3f4d338985fc 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -7,7 +7,8 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y := fault.o mem.o pgtable.o mmap.o \ - init_$(BITS).o pgtable_$(BITS).o + init_$(BITS).o pgtable_$(BITS).o \ + init-common.o obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ tlb_nohash_low.o obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c new file mode 100644 index 000000000000..a175cd82ae8c --- /dev/null +++ b/arch/powerpc/mm/init-common.c @@ -0,0 +1,107 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Dave Engebretsen + * Rework for PPC64 port. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#undef DEBUG + +#include +#include +#include + +static void pgd_ctor(void *addr) +{ + memset(addr, 0, PGD_TABLE_SIZE); +} + +static void pud_ctor(void *addr) +{ + memset(addr, 0, PUD_TABLE_SIZE); +} + +static void pmd_ctor(void *addr) +{ + memset(addr, 0, PMD_TABLE_SIZE); +} + +struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; + +/* + * Create a kmem_cache() for pagetables. This is not used for PTE + * pages - they're linked to struct page, come from the normal free + * pages pool and have a different entry size (see real_pte_t) to + * everything else. Caches created by this function are used for all + * the higher level pagetables, and for hugepage pagetables. + */ +void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) +{ + char *name; + unsigned long table_size = sizeof(void *) << shift; + unsigned long align = table_size; + + /* When batching pgtable pointers for RCU freeing, we store + * the index size in the low bits. Table alignment must be + * big enough to fit it. + * + * Likewise, hugeapge pagetable pointers contain a (different) + * shift value in the low bits. All tables must be aligned so + * as to leave enough 0 bits in the address to contain it. */ + unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1, + HUGEPD_SHIFT_MASK + 1); + struct kmem_cache *new; + + /* It would be nice if this was a BUILD_BUG_ON(), but at the + * moment, gcc doesn't seem to recognize is_power_of_2 as a + * constant expression, so so much for that. */ + BUG_ON(!is_power_of_2(minalign)); + BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE)); + + if (PGT_CACHE(shift)) + return; /* Already have a cache of this size */ + + align = max_t(unsigned long, align, minalign); + name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift); + new = kmem_cache_create(name, table_size, align, 0, ctor); + kfree(name); + pgtable_cache[shift - 1] = new; + pr_debug("Allocated pgtable cache for order %d\n", shift); +} + + +void pgtable_cache_init(void) +{ + pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); + + if (PMD_INDEX_SIZE && !PGT_CACHE(PMD_INDEX_SIZE)) + pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor); + /* + * In all current configs, when the PUD index exists it's the + * same size as either the pgd or pmd index except with THP enabled + * on book3s 64 + */ + if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) + pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); + + if (!PGT_CACHE(PGD_INDEX_SIZE)) + panic("Couldn't allocate pgd cache"); + if (PMD_INDEX_SIZE && !PGT_CACHE(PMD_INDEX_SIZE)) + panic("Couldn't allocate pmd pgtable caches"); + if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) + panic("Couldn't allocate pud pgtable caches"); +} diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 16ada1eb7e26..a000c3585390 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -80,83 +80,6 @@ EXPORT_SYMBOL_GPL(memstart_addr); phys_addr_t kernstart_addr; EXPORT_SYMBOL_GPL(kernstart_addr); -static void pgd_ctor(void *addr) -{ - memset(addr, 0, PGD_TABLE_SIZE); -} - -static void pud_ctor(void *addr) -{ - memset(addr, 0, PUD_TABLE_SIZE); -} - -static void pmd_ctor(void *addr) -{ - memset(addr, 0, PMD_TABLE_SIZE); -} - -struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; - -/* - * Create a kmem_cache() for pagetables. This is not used for PTE - * pages - they're linked to struct page, come from the normal free - * pages pool and have a different entry size (see real_pte_t) to - * everything else. Caches created by this function are used for all - * the higher level pagetables, and for hugepage pagetables. - */ -void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) -{ - char *name; - unsigned long table_size = sizeof(void *) << shift; - unsigned long align = table_size; - - /* When batching pgtable pointers for RCU freeing, we store - * the index size in the low bits. Table alignment must be - * big enough to fit it. - * - * Likewise, hugeapge pagetable pointers contain a (different) - * shift value in the low bits. All tables must be aligned so - * as to leave enough 0 bits in the address to contain it. */ - unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1, - HUGEPD_SHIFT_MASK + 1); - struct kmem_cache *new; - - /* It would be nice if this was a BUILD_BUG_ON(), but at the - * moment, gcc doesn't seem to recognize is_power_of_2 as a - * constant expression, so so much for that. */ - BUG_ON(!is_power_of_2(minalign)); - BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE)); - - if (PGT_CACHE(shift)) - return; /* Already have a cache of this size */ - - align = max_t(unsigned long, align, minalign); - name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift); - new = kmem_cache_create(name, table_size, align, 0, ctor); - kfree(name); - pgtable_cache[shift - 1] = new; - pr_debug("Allocated pgtable cache for order %d\n", shift); -} - - -void pgtable_cache_init(void) -{ - pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); - pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor); - /* - * In all current configs, when the PUD index exists it's the - * same size as either the pgd or pmd index except with THP enabled - * on book3s 64 - */ - if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) - pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); - - if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX)) - panic("Couldn't allocate pgtable caches"); - if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) - panic("Couldn't allocate pud pgtable caches"); -} - #ifdef CONFIG_SPARSEMEM_VMEMMAP /* * Given an address within the vmemmap, determine the pfn of the page that diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 0ae0572bc239..a65c0b4c0669 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -42,43 +42,6 @@ EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ extern char etext[], _stext[], _sinittext[], _einittext[]; -#define PGDIR_ORDER (32 + PGD_T_LOG2 - PGDIR_SHIFT) - -#ifndef CONFIG_PPC_4K_PAGES -static struct kmem_cache *pgtable_cache; - -void pgtable_cache_init(void) -{ - pgtable_cache = kmem_cache_create("PGDIR cache", 1 << PGDIR_ORDER, - 1 << PGDIR_ORDER, 0, NULL); - if (pgtable_cache == NULL) - panic("Couldn't allocate pgtable caches"); -} -#endif - -pgd_t *pgd_alloc(struct mm_struct *mm) -{ - pgd_t *ret; - - /* pgdir take page or two with 4K pages and a page fraction otherwise */ -#ifndef CONFIG_PPC_4K_PAGES - ret = kmem_cache_alloc(pgtable_cache, GFP_KERNEL | __GFP_ZERO); -#else - ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, - PGDIR_ORDER - PAGE_SHIFT); -#endif - return ret; -} - -void pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ -#ifndef CONFIG_PPC_4K_PAGES - kmem_cache_free(pgtable_cache, (void *)pgd); -#else - free_pages((unsigned long)pgd, PGDIR_ORDER - PAGE_SHIFT); -#endif -} - __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { pte_t *pte; -- cgit v1.3.1 From 4b91428699477532ab1255c2dd5819713e9e8985 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 7 Dec 2016 08:47:28 +0100 Subject: powerpc/8xx: Implement support of hugepages 8xx uses a two level page table with two different linux page size support (4k and 16k). 8xx also support two different hugepage sizes 512k and 8M. In order to support them on linux we define two different page table layout. The size of pages is in the PGD entry, using PS field (bits 28-29): 00 : Small pages (4k or 16k) 01 : 512k pages 10 : reserved 11 : 8M pages For 512K hugepage size a pgd entry have the below format [0101] . The hugepte table allocated will contain 8 entries pointing to 512K huge pte in 4k pages mode and 64 entries in 16k pages mode. For 8M in 16k mode, a pgd entry have the below format [1101] . The hugepte table allocated will contain 8 entries pointing to 8M huge pte. For 8M in 4k mode, multiple pgd entries point to the same hugepte address and pgd entry will have the below format [1101]. The hugepte table allocated will only have one entry. For the time being, we do not support CPU15 ERRATA when HUGETLB is selected Signed-off-by: Christophe Leroy Reviewed-by: Aneesh Kumar K.V (v3, for the generic bits) Signed-off-by: Scott Wood --- arch/powerpc/include/asm/hugetlb.h | 19 ++++- arch/powerpc/include/asm/mmu-8xx.h | 35 ++++++++ arch/powerpc/include/asm/mmu.h | 23 +++--- arch/powerpc/include/asm/nohash/32/pte-8xx.h | 1 + arch/powerpc/include/asm/nohash/pgtable.h | 4 + arch/powerpc/include/asm/reg_8xx.h | 2 +- arch/powerpc/kernel/head_8xx.S | 119 +++++++++++++++++++++++++-- arch/powerpc/mm/hugetlbpage.c | 29 ++++--- arch/powerpc/mm/tlb_nohash.c | 21 ++++- arch/powerpc/platforms/8xx/Kconfig | 1 + arch/powerpc/platforms/Kconfig.cputype | 1 + 11 files changed, 225 insertions(+), 30 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index c5517f463ec7..3facdd41709c 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -51,12 +51,20 @@ static inline void __local_flush_hugetlb_page(struct vm_area_struct *vma, static inline pte_t *hugepd_page(hugepd_t hpd) { BUG_ON(!hugepd_ok(hpd)); +#ifdef CONFIG_PPC_8xx + return (pte_t *)__va(hpd.pd & ~(_PMD_PAGE_MASK | _PMD_PRESENT_MASK)); +#else return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | PD_HUGE); +#endif } static inline unsigned int hugepd_shift(hugepd_t hpd) { +#ifdef CONFIG_PPC_8xx + return ((hpd.pd & _PMD_PAGE_MASK) >> 1) + 17; +#else return hpd.pd & HUGEPD_SHIFT_MASK; +#endif } #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -99,7 +107,15 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte); +#ifdef CONFIG_PPC_8xx +static inline void flush_hugetlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ + flush_tlb_page(vma, vmaddr); +} +#else void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +#endif void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, @@ -205,7 +221,8 @@ static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, * are reserved early in the boot process by memblock instead of via * the .dts as on IBM platforms. */ -#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_FSL_BOOK3E) +#if defined(CONFIG_HUGETLB_PAGE) && (defined(CONFIG_PPC_FSL_BOOK3E) || \ + defined(CONFIG_PPC_8xx)) extern void __init reserve_hugetlb_gpages(void); #else static inline void reserve_hugetlb_gpages(void) diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 3e0e4927811c..798b5bf91427 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -172,6 +172,41 @@ typedef struct { #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000) #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) + +/* Page size definitions, common between 32 and 64-bit + * + * shift : is the "PAGE_SHIFT" value for that page size + * penc : is the pte encoding mask + * + */ +struct mmu_psize_def { + unsigned int shift; /* number of bits */ + unsigned int enc; /* PTE encoding */ + unsigned int ind; /* Corresponding indirect page size shift */ + unsigned int flags; +#define MMU_PAGE_SIZE_DIRECT 0x1 /* Supported as a direct size */ +#define MMU_PAGE_SIZE_INDIRECT 0x2 /* Supported as an indirect size */ +}; + +extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; + +static inline int shift_to_mmu_psize(unsigned int shift) +{ + int psize; + + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) + if (mmu_psize_defs[psize].shift == shift) + return psize; + return -1; +} + +static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) +{ + if (mmu_psize_defs[mmu_psize].shift) + return mmu_psize_defs[mmu_psize].shift; + BUG(); +} + #endif /* !__ASSEMBLY__ */ #if defined(CONFIG_PPC_4K_PAGES) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index e88368354e49..b119bdd6ed27 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -264,19 +264,20 @@ static inline bool early_radix_enabled(void) #define MMU_PAGE_64K 2 #define MMU_PAGE_64K_AP 3 /* "Admixed pages" (hash64 only) */ #define MMU_PAGE_256K 4 -#define MMU_PAGE_1M 5 -#define MMU_PAGE_2M 6 -#define MMU_PAGE_4M 7 -#define MMU_PAGE_8M 8 -#define MMU_PAGE_16M 9 -#define MMU_PAGE_64M 10 -#define MMU_PAGE_256M 11 -#define MMU_PAGE_1G 12 -#define MMU_PAGE_16G 13 -#define MMU_PAGE_64G 14 +#define MMU_PAGE_512K 5 +#define MMU_PAGE_1M 6 +#define MMU_PAGE_2M 7 +#define MMU_PAGE_4M 8 +#define MMU_PAGE_8M 9 +#define MMU_PAGE_16M 10 +#define MMU_PAGE_64M 11 +#define MMU_PAGE_256M 12 +#define MMU_PAGE_1G 13 +#define MMU_PAGE_16G 14 +#define MMU_PAGE_64G 15 /* N.B. we need to change the type of hpte_page_sizes if this gets to be > 16 */ -#define MMU_PAGE_COUNT 15 +#define MMU_PAGE_COUNT 16 #ifdef CONFIG_PPC_BOOK3S_64 #include diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 3742b1919661..b4df2734c078 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -49,6 +49,7 @@ #define _PMD_BAD 0x0ff0 #define _PMD_PAGE_MASK 0x000c #define _PMD_PAGE_8M 0x000c +#define _PMD_PAGE_512K 0x0004 /* Until my rework is finished, 8xx still needs atomic PTE updates */ #define PTE_ATOMIC_UPDATES 1 diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 1263c22d60d8..172849727054 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -226,7 +226,11 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, #ifdef CONFIG_HUGETLB_PAGE static inline int hugepd_ok(hugepd_t hpd) { +#ifdef CONFIG_PPC_8xx + return ((hpd.pd & 0x4) != 0); +#else return (hpd.pd > 0); +#endif } static inline int pmd_huge(pmd_t pmd) diff --git a/arch/powerpc/include/asm/reg_8xx.h b/arch/powerpc/include/asm/reg_8xx.h index 0197e12f7d48..1f1636124a04 100644 --- a/arch/powerpc/include/asm/reg_8xx.h +++ b/arch/powerpc/include/asm/reg_8xx.h @@ -4,7 +4,7 @@ #ifndef _ASM_POWERPC_REG_8xx_H #define _ASM_POWERPC_REG_8xx_H -#include +#include /* Cache control on the MPC8xx is provided through some additional * special purpose registers. diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index fb133a163263..1a9c99d3e5d8 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -73,6 +73,9 @@ #define RPN_PATTERN 0x00f0 #endif +#define PAGE_SHIFT_512K 19 +#define PAGE_SHIFT_8M 23 + __HEAD _ENTRY(_stext); _ENTRY(_start); @@ -322,7 +325,7 @@ SystemCall: #endif InstructionTLBMiss: -#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) +#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) mtspr SPRN_SPRG_SCRATCH2, r3 #endif EXCEPTION_PROLOG_0 @@ -332,10 +335,12 @@ InstructionTLBMiss: */ mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) -#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) /* Only modules will cause ITLB Misses as we always * pin the first 8MB of kernel memory */ +#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) mfcr r3 +#endif +#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) IS_KERNEL(r11, r10) #endif mfspr r11, SPRN_M_TW /* Get level 1 table */ @@ -343,7 +348,6 @@ InstructionTLBMiss: BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: - mtcr r3 #endif /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 @@ -351,14 +355,25 @@ InstructionTLBMiss: /* Extract level 2 index */ rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 +#ifdef CONFIG_HUGETLB_PAGE + mtcr r11 + bt- 28, 10f /* bit 28 = Large page (8M) */ + bt- 29, 20f /* bit 29 = Large page (8M or 512k) */ +#endif rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ - +4: +#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) + mtcr r3 +#endif /* Insert the APG into the TWC from the Linux PTE. */ rlwimi r11, r10, 0, 25, 26 /* Load the MI_TWC with the attributes for this "segment." */ MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ +#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) + rlwimi r10, r11, 1, MI_SPS16K +#endif #ifdef CONFIG_SWAP rlwinm r11, r10, 32-5, _PAGE_PRESENT and r11, r11, r10 @@ -371,16 +386,45 @@ InstructionTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ +#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) + rlwimi r10, r11, 0, 0x0ff0 /* Set 24-27, clear 20-23 */ +#else rlwimi r10, r11, 0, 0x0ff8 /* Set 24-27, clear 20-23,28 */ +#endif MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ -#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) +#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) mfspr r3, SPRN_SPRG_SCRATCH2 #endif EXCEPTION_EPILOG_0 rfi +#ifdef CONFIG_HUGETLB_PAGE +10: /* 8M pages */ +#ifdef CONFIG_PPC_16K_PAGES + /* Extract level 2 index */ + rlwinm r10, r10, 32 - (PAGE_SHIFT_8M - PAGE_SHIFT), 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1), 29 + /* Add level 2 base */ + rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1) - 1 +#else + /* Level 2 base */ + rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK +#endif + lwz r10, 0(r10) /* Get the pte */ + rlwinm r11, r11, 0, 0xf + b 4b + +20: /* 512k pages */ + /* Extract level 2 index */ + rlwinm r10, r10, 32 - (PAGE_SHIFT_512K - PAGE_SHIFT), 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1), 29 + /* Add level 2 base */ + rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1 + lwz r10, 0(r10) /* Get the pte */ + rlwinm r11, r11, 0, 0xf + b 4b +#endif + . = 0x1200 DataStoreTLBMiss: mtspr SPRN_SPRG_SCRATCH2, r3 @@ -407,7 +451,6 @@ _ENTRY(DTLBMiss_jmp) #endif blt cr7, DTLBMissLinear 3: - mtcr r3 mfspr r10, SPRN_MD_EPN /* Insert level 1 index */ @@ -418,8 +461,15 @@ _ENTRY(DTLBMiss_jmp) */ /* Extract level 2 index */ rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 +#ifdef CONFIG_HUGETLB_PAGE + mtcr r11 + bt- 28, 10f /* bit 28 = Large page (8M) */ + bt- 29, 20f /* bit 29 = Large page (8M or 512k) */ +#endif rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ +4: + mtcr r3 /* Insert the Guarded flag and APG into the TWC from the Linux PTE. * It is bit 26-27 of both the Linux PTE and the TWC (at least @@ -434,6 +484,11 @@ _ENTRY(DTLBMiss_jmp) rlwimi r11, r10, 32-5, 30, 30 MTSPR_CPU6(SPRN_MD_TWC, r11, r3) + /* In 4k pages mode, SPS (bit 28) in RPN must match PS[1] (bit 29) + * In 16k pages mode, SPS is always 1 */ +#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) + rlwimi r10, r11, 1, MD_SPS16K +#endif /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. * We also need to know if the insn is a load/store, so: * Clear _PAGE_PRESENT and load that which will @@ -455,7 +510,11 @@ _ENTRY(DTLBMiss_jmp) * of the MMU. */ li r11, RPN_PATTERN +#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES) + rlwimi r10, r11, 0, 24, 27 /* Set 24-27 */ +#else rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ +#endif rlwimi r10, r11, 0, 20, 20 /* clear 20 */ MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ @@ -465,6 +524,30 @@ _ENTRY(DTLBMiss_jmp) EXCEPTION_EPILOG_0 rfi +#ifdef CONFIG_HUGETLB_PAGE +10: /* 8M pages */ + /* Extract level 2 index */ +#ifdef CONFIG_PPC_16K_PAGES + rlwinm r10, r10, 32 - (PAGE_SHIFT_8M - PAGE_SHIFT), 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1), 29 + /* Add level 2 base */ + rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1) - 1 +#else + /* Level 2 base */ + rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK +#endif + lwz r10, 0(r10) /* Get the pte */ + rlwinm r11, r11, 0, 0xf + b 4b + +20: /* 512k pages */ + /* Extract level 2 index */ + rlwinm r10, r10, 32 - (PAGE_SHIFT_512K - PAGE_SHIFT), 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1), 29 + /* Add level 2 base */ + rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1 + lwz r10, 0(r10) /* Get the pte */ + rlwinm r11, r11, 0, 0xf + b 4b +#endif /* This is an instruction TLB error on the MPC8xx. This could be due * to many reasons, such as executing guarded memory or illegal instruction @@ -586,6 +669,9 @@ _ENTRY(FixupDAR_cmp) /* Insert level 1 index */ 3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ + mtcr r11 + bt 28,200f /* bit 28 = Large page (8M) */ + bt 29,202f /* bit 29 = Large page (8M or 512K) */ rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ /* Insert level 2 index */ rlwimi r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 @@ -611,6 +697,27 @@ _ENTRY(FixupDAR_cmp) 141: mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed /* Nope, go back to normal TLB processing */ + /* concat physical page address(r11) and page offset(r10) */ +200: +#ifdef CONFIG_PPC_16K_PAGES + rlwinm r11, r11, 0, 0, 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1) - 1 + rlwimi r11, r10, 32 - (PAGE_SHIFT_8M - 2), 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1), 29 +#else + rlwinm r11, r10, 0, ~HUGEPD_SHIFT_MASK +#endif + lwz r11, 0(r11) /* Get the pte */ + /* concat physical page address(r11) and page offset(r10) */ + rlwimi r11, r10, 0, 32 - PAGE_SHIFT_8M, 31 + b 201b + +202: + rlwinm r11, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1 + rlwimi r11, r10, 32 - (PAGE_SHIFT_512K - 2), 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1), 29 + lwz r11, 0(r11) /* Get the pte */ + /* concat physical page address(r11) and page offset(r10) */ + rlwimi r11, r10, 0, 32 - PAGE_SHIFT_512K, 31 + b 201b + 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ mtspr SPRN_DSISR, r10 diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 53245aa00e22..289df38fb7e0 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -26,6 +26,8 @@ #ifdef CONFIG_HUGETLB_PAGE #define PAGE_SHIFT_64K 16 +#define PAGE_SHIFT_512K 19 +#define PAGE_SHIFT_8M 23 #define PAGE_SHIFT_16M 24 #define PAGE_SHIFT_16G 34 @@ -38,7 +40,7 @@ unsigned int HPAGE_SHIFT; * implementations may have more than one gpage size, so we need multiple * arrays */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) #define MAX_NUMBER_GPAGES 128 struct psize_gpages { u64 gpage_list[MAX_NUMBER_GPAGES]; @@ -105,6 +107,11 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, #ifdef CONFIG_PPC_BOOK3S_64 hpdp->pd = __pa(new) | (shift_to_mmu_psize(pshift) << 2); +#elif defined(CONFIG_PPC_8xx) + hpdp->pd = __pa(new) | + (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : + _PMD_PAGE_512K) | + _PMD_PRESENT; #else /* We use the old format for PPC_FSL_BOOK3E */ hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; @@ -124,7 +131,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, * These macros define how to determine which level of the page table holds * the hpdp. */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) #define HUGEPD_PGD_SHIFT PGDIR_SHIFT #define HUGEPD_PUD_SHIFT PUD_SHIFT #else @@ -200,7 +207,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz return hugepte_offset(*hpdp, addr, pdshift); } -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) /* Build list of addresses of gigantic pages. This function is used in early * boot before the buddy allocator is setup. */ @@ -366,7 +373,7 @@ int alloc_bootmem_huge_page(struct hstate *hstate) } #endif -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) #define HUGEPD_FREELIST_SIZE \ ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) @@ -735,10 +742,10 @@ static int __init add_huge_page_size(unsigned long long size) * that it fits within pagetable and slice limits. */ if (size <= PAGE_SIZE) return -EINVAL; -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) if (!is_power_of_4(size)) return -EINVAL; -#else +#elif !defined(CONFIG_PPC_8xx) if (!is_power_of_2(size) || (shift > SLICE_HIGH_SHIFT)) return -EINVAL; #endif @@ -777,7 +784,7 @@ static int __init hugetlbpage_init(void) { int psize; -#if !defined(CONFIG_PPC_FSL_BOOK3E) +#if !defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_PPC_8xx) if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE)) return -ENODEV; #endif @@ -809,7 +816,7 @@ static int __init hugetlbpage_init(void) panic("hugetlbpage_init(): could not create " "pgtable cache for %d bit pagesize\n", shift); } -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) else if (!hugepte_cache) { /* * Create a kmem cache for hugeptes. The bottom bits in @@ -828,10 +835,12 @@ static int __init hugetlbpage_init(void) #endif } -#ifdef CONFIG_PPC_FSL_BOOK3E - /* Default hpage size = 4M */ +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) + /* Default hpage size = 4M on FSL_BOOK3E and 512k on 8xx */ if (mmu_psize_defs[MMU_PAGE_4M].shift) HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; + else if (mmu_psize_defs[MMU_PAGE_512K].shift) + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_512K].shift; #else /* Set default large page size. Currently, we pick 16M or 1M * depending on what is available diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 050badc0ebd3..ba28fcb98597 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -53,7 +53,7 @@ * other sizes not listed here. The .ind field is only used on MMUs that have * indirect page table entries. */ -#ifdef CONFIG_PPC_BOOK3E_MMU +#if defined(CONFIG_PPC_BOOK3E_MMU) || defined(CONFIG_PPC_8xx) #ifdef CONFIG_PPC_FSL_BOOK3E struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { [MMU_PAGE_4K] = { @@ -85,6 +85,25 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { .enc = BOOK3E_PAGESZ_1GB, }, }; +#elif defined(CONFIG_PPC_8xx) +struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { + /* we only manage 4k and 16k pages as normal pages */ +#ifdef CONFIG_PPC_4K_PAGES + [MMU_PAGE_4K] = { + .shift = 12, + }, +#else + [MMU_PAGE_16K] = { + .shift = 14, + }, +#endif + [MMU_PAGE_512K] = { + .shift = 19, + }, + [MMU_PAGE_8M] = { + .shift = 23, + }, +}; #else struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { [MMU_PAGE_4K] = { diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index 564d99bb2a26..80cbcb0ad9b1 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -130,6 +130,7 @@ config 8xx_CPU6 config 8xx_CPU15 bool "CPU15 Silicon Errata" + depends on !HUGETLB_PAGE default y help This enables a workaround for erratum CPU15 on MPC8xx chips. diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index ca2da30ad2ab..6e89e5a8d4fb 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -34,6 +34,7 @@ config PPC_8xx select FSL_SOC select 8xx select PPC_LIB_RHEAP + select SYS_SUPPORTS_HUGETLBFS config 40x bool "AMCC 40x" -- cgit v1.3.1