From e75d07bd8303588c33e6f1f180a9081fb58c872e Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 9 Mar 2022 10:29:50 +0100 Subject: powerpc: Remove find_current_mm_pte() Last usage of find_current_mm_pte() was removed by commit 15759cb054ef ("powerpc/perf/callchain: Use __get_user_pages_fast in read_user_stack_slow") Remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ec79f462a3bfa8365b7df505e574d5d85246bc68.1646818177.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s64/pgtable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index f6151a589298..85c84e89e3ea 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -100,14 +100,14 @@ static void do_serialize(void *arg) } /* - * Serialize against find_current_mm_pte which does lock-less + * Serialize against __find_linux_pte() which does lock-less * lookup in page tables with local interrupts disabled. For huge pages * it casts pmd_t to pte_t. Since format of pte_t is different from * pmd_t we want to prevent transit from pmd pointing to page table * to pmd pointing to huge page (and back) while interrupts are disabled. * We clear pmd to possibly replace it with page table pointer in * different code paths. So make sure we wait for the parallel - * find_current_mm_pte to finish. + * __find_linux_pte() to finish. */ void serialize_against_pte_lookup(struct mm_struct *mm) { -- cgit v1.2.3-70-g09d2 From d5090716be6791ada9ee142163a4934c1c147aaa Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 26 Nov 2022 06:10:00 +0100 Subject: powerpc/book3e: remove #include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 7ad4bd887d27 ("powerpc/book3e: get rid of #include ") removed the usage of the define UTS_RELEASE but forgot to drop the include. utsrelease.h is potentially generated on each build. By removing the unused include we can get rid of some spurious recompilations. Fixes: 7ad4bd887d27 ("powerpc/book3e: get rid of #include ") Signed-off-by: Thomas Weißschuh Reviewed-by: Masahiro Yamada Reviewed-by: Christophe Leroy [mpe: Fix typo in change log and add more explanation] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221126051002.123199-2-linux@weissschuh.net --- arch/powerpc/mm/nohash/kaslr_booke.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 0d04f9d5da8d..2fb3edafe9ab 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -19,7 +19,6 @@ #include #include #include -#include struct regions { unsigned long pa_start; -- cgit v1.2.3-70-g09d2 From 274d842fa1efd9449e62222c8896e0be11621f1f Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Wed, 9 Nov 2022 15:51:10 +1100 Subject: powerpc/tlb: Add local flush for page given mm_struct and psize Adds a local TLB flush operation that works given an mm_struct, VA to flush, and page size representation. Most implementations mirror the surrounding code. The book3s/32/tlbflush.h implementation is left as a BUILD_BUG because it is more complicated and not required for anything as yet. This removes the need to create a vm_area_struct, which the temporary patching mm work does not need. Signed-off-by: Benjamin Gray Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221109045112.187069-8-bgray@linux.ibm.com --- arch/powerpc/include/asm/book3s/32/tlbflush.h | 9 +++++++++ arch/powerpc/include/asm/book3s/64/tlbflush.h | 7 +++++++ arch/powerpc/include/asm/nohash/tlbflush.h | 7 +++++++ arch/powerpc/mm/nohash/tlb.c | 8 ++++++++ 4 files changed, 31 insertions(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h index ba1743c52b56..4be572908124 100644 --- a/arch/powerpc/include/asm/book3s/32/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h @@ -2,6 +2,8 @@ #ifndef _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H #define _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H +#include + #define MMU_NO_CONTEXT (0) /* * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx @@ -74,6 +76,13 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, { flush_tlb_page(vma, vmaddr); } + +static inline void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) +{ + BUILD_BUG(); +} + static inline void local_flush_tlb_mm(struct mm_struct *mm) { flush_tlb_mm(mm); diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index c56a0aee8124..dd39313242b4 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -86,6 +86,13 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, radix__local_flush_tlb_page(vma, vmaddr); } +static inline void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) +{ + if (radix_enabled()) + radix__local_flush_tlb_page_psize(mm, vmaddr, psize); +} + static inline void tlb_flush(struct mmu_gather *tlb) { if (radix_enabled()) diff --git a/arch/powerpc/include/asm/nohash/tlbflush.h b/arch/powerpc/include/asm/nohash/tlbflush.h index bdaf34ad41ea..9a2cf83ea4f1 100644 --- a/arch/powerpc/include/asm/nohash/tlbflush.h +++ b/arch/powerpc/include/asm/nohash/tlbflush.h @@ -45,6 +45,12 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned lon asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory"); } +static inline void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) +{ + asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory"); +} + static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { start &= PAGE_MASK; @@ -58,6 +64,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); extern void local_flush_tlb_mm(struct mm_struct *mm); extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +void local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize); extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, int tsize, int ind); diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index 2c15c86c7015..a903b308acc5 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -184,6 +184,14 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) mmu_get_tsize(mmu_virtual_psize), 0); } EXPORT_SYMBOL(local_flush_tlb_page); + +void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) +{ + __local_flush_tlb_page(mm, vmaddr, mmu_get_tsize(psize), 0); +} +EXPORT_SYMBOL(local_flush_tlb_page_psize); + #endif /* -- cgit v1.2.3-70-g09d2 From 6b34a099faa123488b13caf704562f4dbe483fc4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 24 Oct 2022 13:01:50 +1000 Subject: powerpc/64s/hash: add stress_hpt kernel boot option to increase hash faults This option increases the number of hash misses by limiting the number of kernel HPT entries, by keeping a per-CPU record of the last kernel HPTEs installed, and removing that from the hash table on the next hash insertion. A timer round-robins CPUs removing remaining kernel HPTEs and clearing the TLB (in the case of bare metal) to increase and slightly randomise kernel fault activity. Signed-off-by: Nicholas Piggin [mpe: Add comment about NR_CPUS usage, fixup whitespace] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221024030150.852517-1-npiggin@gmail.com --- Documentation/admin-guide/kernel-parameters.txt | 5 + arch/powerpc/mm/book3s64/hash_4k.c | 5 + arch/powerpc/mm/book3s64/hash_64k.c | 10 ++ arch/powerpc/mm/book3s64/hash_utils.c | 130 +++++++++++++++++++++++- arch/powerpc/mm/book3s64/internal.h | 11 ++ 5 files changed, 160 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a465d5242774..9f3d256529d0 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1042,6 +1042,11 @@ them frequently to increase the rate of SLB faults on kernel addresses. + stress_hpt [PPC] + Limits the number of kernel HPT entries in the hash + page table to increase the rate of hash page table + faults on kernel addresses. + disable= [IPV6] See Documentation/networking/ipv6.rst. diff --git a/arch/powerpc/mm/book3s64/hash_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c index 7de1a8a0c62a..02acbfd05b46 100644 --- a/arch/powerpc/mm/book3s64/hash_4k.c +++ b/arch/powerpc/mm/book3s64/hash_4k.c @@ -16,6 +16,8 @@ #include #include +#include "internal.h" + int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, int subpg_prot) @@ -118,6 +120,9 @@ repeat: } new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE); + + if (stress_hpt()) + hpt_do_stress(ea, hpte_group); } *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; diff --git a/arch/powerpc/mm/book3s64/hash_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c index 998c6817ed47..954af420f358 100644 --- a/arch/powerpc/mm/book3s64/hash_64k.c +++ b/arch/powerpc/mm/book3s64/hash_64k.c @@ -16,6 +16,8 @@ #include #include +#include "internal.h" + /* * Return true, if the entry has a slot value which * the software considers as invalid. @@ -216,6 +218,9 @@ repeat: new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE); new_pte |= H_PAGE_HASHPTE; + if (stress_hpt()) + hpt_do_stress(ea, hpte_group); + *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } @@ -327,7 +332,12 @@ repeat: new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE); + + if (stress_hpt()) + hpt_do_stress(ea, hpte_group); } + *ptep = __pte(new_pte & ~H_PAGE_BUSY); + return 0; } diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 6df4c6d38b66..80a148c57de8 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -471,7 +471,7 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, return ret; } -static bool disable_1tb_segments = false; +static bool disable_1tb_segments __ro_after_init; static int __init parse_disable_1tb_segments(char *p) { @@ -480,6 +480,40 @@ static int __init parse_disable_1tb_segments(char *p) } early_param("disable_1tb_segments", parse_disable_1tb_segments); +bool stress_hpt_enabled __initdata; + +static int __init parse_stress_hpt(char *p) +{ + stress_hpt_enabled = true; + return 0; +} +early_param("stress_hpt", parse_stress_hpt); + +__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_hpt_key); + +/* + * per-CPU array allocated if we enable stress_hpt. + */ +#define STRESS_MAX_GROUPS 16 +struct stress_hpt_struct { + unsigned long last_group[STRESS_MAX_GROUPS]; +}; + +static inline int stress_nr_groups(void) +{ + /* + * LPAR H_REMOVE flushes TLB, so need some number > 1 of entries + * to allow practical forward progress. Bare metal returns 1, which + * seems to help uncover more bugs. + */ + if (firmware_has_feature(FW_FEATURE_LPAR)) + return STRESS_MAX_GROUPS; + else + return 1; +} + +static struct stress_hpt_struct *stress_hpt_struct; + static int __init htab_dt_scan_seg_sizes(unsigned long node, const char *uname, int depth, void *data) @@ -976,6 +1010,23 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, pr_info("Partition table %p\n", partition_tb); } +void hpt_clear_stress(void); +static struct timer_list stress_hpt_timer; +void stress_hpt_timer_fn(struct timer_list *timer) +{ + int next_cpu; + + hpt_clear_stress(); + if (!firmware_has_feature(FW_FEATURE_LPAR)) + tlbiel_all(); + + next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(cpu_online_mask); + stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10); + add_timer_on(&stress_hpt_timer, next_cpu); +} + static void __init htab_initialize(void) { unsigned long table; @@ -995,6 +1046,20 @@ static void __init htab_initialize(void) if (stress_slb_enabled) static_branch_enable(&stress_slb_key); + if (stress_hpt_enabled) { + unsigned long tmp; + static_branch_enable(&stress_hpt_key); + // Too early to use nr_cpu_ids, so use NR_CPUS + tmp = memblock_phys_alloc_range(sizeof(struct stress_hpt_struct) * NR_CPUS, + 0, 0, MEMBLOCK_ALLOC_ANYWHERE); + memset((void *)tmp, 0xff, sizeof(struct stress_hpt_struct) * NR_CPUS); + stress_hpt_struct = __va(tmp); + + timer_setup(&stress_hpt_timer, stress_hpt_timer_fn, 0); + stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10); + add_timer(&stress_hpt_timer); + } + /* * Calculate the required size of the htab. We want the number of * PTEGs to equal one half the number of real pages. @@ -1980,6 +2045,69 @@ repeat: return slot; } +void hpt_clear_stress(void) +{ + int cpu = raw_smp_processor_id(); + int g; + + for (g = 0; g < stress_nr_groups(); g++) { + unsigned long last_group; + last_group = stress_hpt_struct[cpu].last_group[g]; + + if (last_group != -1UL) { + int i; + for (i = 0; i < HPTES_PER_GROUP; i++) { + if (mmu_hash_ops.hpte_remove(last_group) == -1) + break; + } + stress_hpt_struct[cpu].last_group[g] = -1; + } + } +} + +void hpt_do_stress(unsigned long ea, unsigned long hpte_group) +{ + unsigned long last_group; + int cpu = raw_smp_processor_id(); + + last_group = stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1]; + if (hpte_group == last_group) + return; + + if (last_group != -1UL) { + int i; + /* + * Concurrent CPUs might be inserting into this group, so + * give up after a number of iterations, to prevent a live + * lock. + */ + for (i = 0; i < HPTES_PER_GROUP; i++) { + if (mmu_hash_ops.hpte_remove(last_group) == -1) + break; + } + stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1] = -1; + } + + if (ea >= PAGE_OFFSET) { + /* + * We would really like to prefetch to get the TLB loaded, then + * remove the PTE before returning from fault interrupt, to + * increase the hash fault rate. + * + * Unfortunately QEMU TCG does not model the TLB in a way that + * makes this possible, and systemsim (mambo) emulator does not + * bring in TLBs with prefetches (although loads/stores do + * work for non-CI PTEs). + * + * So remember this PTE and clear it on the next hash fault. + */ + memmove(&stress_hpt_struct[cpu].last_group[1], + &stress_hpt_struct[cpu].last_group[0], + (stress_nr_groups() - 1) * sizeof(unsigned long)); + stress_hpt_struct[cpu].last_group[0] = hpte_group; + } +} + #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) static DEFINE_RAW_SPINLOCK(linear_map_hash_lock); diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h index 5045048ce244..a57a25f06a21 100644 --- a/arch/powerpc/mm/book3s64/internal.h +++ b/arch/powerpc/mm/book3s64/internal.h @@ -13,6 +13,17 @@ static inline bool stress_slb(void) return static_branch_unlikely(&stress_slb_key); } +extern bool stress_hpt_enabled; + +DECLARE_STATIC_KEY_FALSE(stress_hpt_key); + +static inline bool stress_hpt(void) +{ + return static_branch_unlikely(&stress_hpt_key); +} + +void hpt_do_stress(unsigned long ea, unsigned long hpte_group); + void slb_setup_new_exec(void); void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush); -- cgit v1.2.3-70-g09d2 From 6f3a81b60091031c2c14eb2373d1937b027deb46 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 2 Dec 2022 09:31:43 +0100 Subject: powerpc/code-patching: Remove protection against patching init addresses after init Once init section is freed, attempting to patch init code ends up in the weed. Commit 51c3c62b58b3 ("powerpc: Avoid code patching freed init sections") protected patch_instruction() against that, but it is the responsibility of the caller to ensure that the patched memory is valid. All callers have now been verified and fixed so the check can be removed. This improves ftrace activation by about 2% on 8xx. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/504310828f473d424e2ed229eff57bf075f52796.1669969781.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/code-patching.h | 2 -- arch/powerpc/lib/code-patching.c | 13 +------------ arch/powerpc/mm/mem.c | 1 - 3 files changed, 1 insertion(+), 15 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 1c6316ec4b74..3f881548fb61 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -22,8 +22,6 @@ #define BRANCH_SET_LINK 0x1 #define BRANCH_ABSOLUTE 0x2 -DECLARE_STATIC_KEY_FALSE(init_mem_is_free); - /* * Powerpc branch instruction is : * diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index a6a5047f8ba2..73ce4b90bb1b 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -349,7 +349,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) return err; } -static int do_patch_instruction(u32 *addr, ppc_inst_t instr) +int patch_instruction(u32 *addr, ppc_inst_t instr) { int err; unsigned long flags; @@ -372,17 +372,6 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr) return err; } - -__ro_after_init DEFINE_STATIC_KEY_FALSE(init_mem_is_free); - -int patch_instruction(u32 *addr, ppc_inst_t instr) -{ - /* Make sure we aren't patching a freed init section */ - if (static_branch_likely(&init_mem_is_free) && init_section_contains(addr, 4)) - return 0; - - return do_patch_instruction(addr, instr); -} NOKPROBE_SYMBOL(patch_instruction); int patch_branch(u32 *addr, unsigned long target, int flags) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 84d171953ba4..8b121df7b08f 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -344,7 +344,6 @@ void free_initmem(void) { ppc_md.progress = ppc_printk_progress; mark_initmem_nx(); - static_branch_enable(&init_mem_is_free); free_initmem_default(POISON_FREE_INITMEM); ftrace_free_init_tramp(); } -- cgit v1.2.3-70-g09d2