diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-09 09:54:46 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-09 09:54:46 -0700 |
commit | a5ad5742f671de906adbf29fbedf0a04705cebad (patch) | |
tree | 88d1a4c18e2025a5a8335dbbc9dea8bebeba5789 /include | |
parent | 013b2deba9a6b80ca02f4fafd7dedf875e9b4450 (diff) | |
parent | 4fa7252338a56fbc90220e6330f136a379175a7a (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge even more updates from Andrew Morton:
- a kernel-wide sweep of show_stack()
- pagetable cleanups
- abstract out accesses to mmap_sem - prep for mmap_sem scalability work
- hch's user acess work
Subsystems affected by this patch series: debug, mm/pagemap, mm/maccess,
mm/documentation.
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (93 commits)
include/linux/cache.h: expand documentation over __read_mostly
maccess: return -ERANGE when probe_kernel_read() fails
x86: use non-set_fs based maccess routines
maccess: allow architectures to provide kernel probing directly
maccess: move user access routines together
maccess: always use strict semantics for probe_kernel_read
maccess: remove strncpy_from_unsafe
tracing/kprobes: handle mixed kernel/userspace probes better
bpf: rework the compat kernel probe handling
bpf:bpf_seq_printf(): handle potentially unsafe format string better
bpf: handle the compat string in bpf_trace_copy_string better
bpf: factor out a bpf_trace_copy_string helper
maccess: unify the probe kernel arch hooks
maccess: remove probe_read_common and probe_write_common
maccess: rename strnlen_unsafe_user to strnlen_user_nofault
maccess: rename strncpy_from_unsafe_strict to strncpy_from_kernel_nofault
maccess: rename strncpy_from_unsafe_user to strncpy_from_user_nofault
maccess: update the top of file comment
maccess: clarify kerneldoc comments
maccess: remove duplicate kerneldoc comments
...
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/io.h | 2 | ||||
-rw-r--r-- | include/asm-generic/pgtable-nopmd.h | 1 | ||||
-rw-r--r-- | include/asm-generic/pgtable-nopud.h | 1 | ||||
-rw-r--r-- | include/linux/cache.h | 10 | ||||
-rw-r--r-- | include/linux/crash_dump.h | 3 | ||||
-rw-r--r-- | include/linux/dax.h | 1 | ||||
-rw-r--r-- | include/linux/dma-noncoherent.h | 2 | ||||
-rw-r--r-- | include/linux/fs.h | 4 | ||||
-rw-r--r-- | include/linux/hmm.h | 2 | ||||
-rw-r--r-- | include/linux/huge_mm.h | 2 | ||||
-rw-r--r-- | include/linux/hugetlb.h | 2 | ||||
-rw-r--r-- | include/linux/io-mapping.h | 2 | ||||
-rw-r--r-- | include/linux/kallsyms.h | 4 | ||||
-rw-r--r-- | include/linux/kasan.h | 2 | ||||
-rw-r--r-- | include/linux/mempolicy.h | 2 | ||||
-rw-r--r-- | include/linux/mm.h | 13 | ||||
-rw-r--r-- | include/linux/mm_types.h | 4 | ||||
-rw-r--r-- | include/linux/mmap_lock.h | 90 | ||||
-rw-r--r-- | include/linux/mmu_notifier.h | 13 | ||||
-rw-r--r-- | include/linux/pagemap.h | 2 | ||||
-rw-r--r-- | include/linux/pgtable.h (renamed from include/asm-generic/pgtable.h) | 128 | ||||
-rw-r--r-- | include/linux/rmap.h | 2 | ||||
-rw-r--r-- | include/linux/sched/debug.h | 3 | ||||
-rw-r--r-- | include/linux/sched/mm.h | 10 | ||||
-rw-r--r-- | include/linux/uaccess.h | 60 | ||||
-rw-r--r-- | include/xen/arm/page.h | 2 |
26 files changed, 271 insertions, 96 deletions
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 3a7871130112..8b1e020e9a03 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -972,7 +972,7 @@ static inline void iounmap(void __iomem *addr) } #endif #elif defined(CONFIG_GENERIC_IOREMAP) -#include <asm/pgtable.h> +#include <linux/pgtable.h> void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot); void iounmap(volatile void __iomem *addr); diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h index 0d9b28cba16d..3e13acd019ae 100644 --- a/include/asm-generic/pgtable-nopmd.h +++ b/include/asm-generic/pgtable-nopmd.h @@ -45,6 +45,7 @@ static inline pmd_t * pmd_offset(pud_t * pud, unsigned long address) { return (pmd_t *)pud; } +#define pmd_offset pmd_offset #define pmd_val(x) (pud_val((x).pud)) #define __pmd(x) ((pmd_t) { __pud(x) } ) diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h index ad05c1684bfc..a9d751fbda9e 100644 --- a/include/asm-generic/pgtable-nopud.h +++ b/include/asm-generic/pgtable-nopud.h @@ -43,6 +43,7 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) { return (pud_t *)p4d; } +#define pud_offset pud_offset #define pud_val(x) (p4d_val((x).p4d)) #define __pud(x) ((pud_t) { __p4d(x) }) diff --git a/include/linux/cache.h b/include/linux/cache.h index 750621e41d1c..1aa8009f6d06 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -15,8 +15,14 @@ /* * __read_mostly is used to keep rarely changing variables out of frequently - * updated cachelines. If an architecture doesn't support it, ignore the - * hint. + * updated cachelines. Its use should be reserved for data that is used + * frequently in hot paths. Performance traces can help decide when to use + * this. You want __read_mostly data to be tightly packed, so that in the + * best case multiple frequently read variables for a hot path will be next + * to each other in order to reduce the number of cachelines needed to + * execute a critical path. We should be mindful and selective of its use. + * ie: if you're going to use it please supply a *good* justification in your + * commit log */ #ifndef __read_mostly #define __read_mostly diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index bc156285d097..a5192b718dbe 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -5,9 +5,10 @@ #include <linux/kexec.h> #include <linux/proc_fs.h> #include <linux/elf.h> +#include <linux/pgtable.h> #include <uapi/linux/vmcore.h> -#include <asm/pgtable.h> /* for pgprot_t */ +#include <linux/pgtable.h> /* for pgprot_t */ #ifdef CONFIG_CRASH_DUMP #define ELFCORE_ADDR_MAX (-1ULL) diff --git a/include/linux/dax.h b/include/linux/dax.h index d7af5d243f24..6904d4e0b2e0 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -5,7 +5,6 @@ #include <linux/fs.h> #include <linux/mm.h> #include <linux/radix-tree.h> -#include <asm/pgtable.h> /* Flag for synchronous flush */ #define DAXDEV_F_SYNC (1UL << 0) diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index b59f1b6be3e9..ca09a4e07d2d 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -3,7 +3,7 @@ #define _LINUX_DMA_NONCOHERENT_H 1 #include <linux/dma-mapping.h> -#include <asm/pgtable.h> +#include <linux/pgtable.h> #ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H #include <asm/dma-coherence.h> diff --git a/include/linux/fs.h b/include/linux/fs.h index 01f5d296f9bb..0b026329dbed 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1679,10 +1679,10 @@ static inline int sb_start_write_trylock(struct super_block *sb) * * Since page fault freeze protection behaves as a lock, users have to preserve * ordering of freeze protection and other filesystem locks. It is advised to - * put sb_start_pagefault() close to mmap_sem in lock ordering. Page fault + * put sb_start_pagefault() close to mmap_lock in lock ordering. Page fault * handling code implies lock dependency: * - * mmap_sem + * mmap_lock * -> sb_start_pagefault */ static inline void sb_start_pagefault(struct super_block *sb) diff --git a/include/linux/hmm.h b/include/linux/hmm.h index e912b9dc4633..f4a09ed223ac 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -10,7 +10,7 @@ #define LINUX_HMM_H #include <linux/kconfig.h> -#include <asm/pgtable.h> +#include <linux/pgtable.h> #include <linux/device.h> #include <linux/migrate.h> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index cfbb0a87c5f0..71f20776b06c 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -248,7 +248,7 @@ static inline int is_swap_pmd(pmd_t pmd) return !pmd_none(pmd) && !pmd_present(pmd); } -/* mmap_sem must be held on entry */ +/* mmap_lock must be held on entry */ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 0cced410e0bd..50650d0d01b9 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -9,7 +9,7 @@ #include <linux/cgroup.h> #include <linux/list.h> #include <linux/kref.h> -#include <asm/pgtable.h> +#include <linux/pgtable.h> struct ctl_table; struct user_struct; diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index b336622612f3..0beaa3eba155 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -10,6 +10,7 @@ #include <linux/slab.h> #include <linux/bug.h> #include <linux/io.h> +#include <linux/pgtable.h> #include <asm/page.h> /* @@ -99,7 +100,6 @@ io_mapping_unmap(void __iomem *vaddr) #else #include <linux/uaccess.h> -#include <asm/pgtable.h> /* Create the io_mapping object*/ static inline struct io_mapping * diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 657a83b943f0..98338dc6b5d2 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -165,9 +165,9 @@ static inline int kallsyms_show_value(void) #endif /*CONFIG_KALLSYMS*/ -static inline void print_ip_sym(unsigned long ip) +static inline void print_ip_sym(const char *loglvl, unsigned long ip) { - printk("[<%px>] %pS\n", (void *) ip, (void *) ip); + printk("%s[<%px>] %pS\n", loglvl, (void *) ip, (void *) ip); } #endif /*_LINUX_KALLSYMS_H*/ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 31314ca7c635..82522e996c76 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -11,8 +11,8 @@ struct task_struct; #ifdef CONFIG_KASAN +#include <linux/pgtable.h> #include <asm/kasan.h> -#include <asm/pgtable.h> extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE]; diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 8165278c348a..ea9c15b60a96 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -31,7 +31,7 @@ struct mm_struct; * Locking policy for interlave: * In process context there is no locking because only the process accesses * its own state. All vma manipulation is somewhat protected by a down_read on - * mmap_sem. + * mmap_lock. * * Freeing policy: * Mempolicy objects are reference counted. A mempolicy will be freed when diff --git a/include/linux/mm.h b/include/linux/mm.h index 9d6042178ca7..dc7b87310c10 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -15,6 +15,7 @@ #include <linux/atomic.h> #include <linux/debug_locks.h> #include <linux/mm_types.h> +#include <linux/mmap_lock.h> #include <linux/range.h> #include <linux/pfn.h> #include <linux/percpu-refcount.h> @@ -28,6 +29,7 @@ #include <linux/overflow.h> #include <linux/sizes.h> #include <linux/sched.h> +#include <linux/pgtable.h> struct mempolicy; struct anon_vma; @@ -92,7 +94,6 @@ extern int mmap_rnd_compat_bits __read_mostly; #endif #include <asm/page.h> -#include <asm/pgtable.h> #include <asm/processor.h> /* @@ -401,7 +402,7 @@ extern pgprot_t protection_map[16]; * @FAULT_FLAG_WRITE: Fault was a write fault. * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE. * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked. - * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_sem and wait when retrying. + * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying. * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region. * @FAULT_FLAG_TRIED: The fault has been tried once. * @FAULT_FLAG_USER: The fault originated in userspace. @@ -451,10 +452,10 @@ extern pgprot_t protection_map[16]; * fault_flag_allow_retry_first - check ALLOW_RETRY the first time * * This is mostly used for places where we want to try to avoid taking - * the mmap_sem for too long a time when waiting for another condition + * the mmap_lock for too long a time when waiting for another condition * to change, in which case we can try to be polite to release the - * mmap_sem in the first round to avoid potential starvation of other - * processes that would also want the mmap_sem. + * mmap_lock in the first round to avoid potential starvation of other + * processes that would also want the mmap_lock. * * Return: true if the page fault allows retry and this is the first * attempt of the fault handling; false otherwise. @@ -581,7 +582,7 @@ struct vm_operations_struct { * (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure * in mm/mempolicy.c will do this automatically. * get_policy() must NOT add a ref if the policy at (vma,addr) is not - * marked as MPOL_SHARED. vma policies are protected by the mmap_sem. + * marked as MPOL_SHARED. vma policies are protected by the mmap_lock. * If no [shared/vma] mempolicy exists at the addr, get_policy() op * must return NULL--i.e., do not "fallback" to task or system default * policy. diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ef6d3aface8a..64ede5f150dc 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -344,7 +344,7 @@ struct vm_area_struct { * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack * or brk vma (with NULL file) can only be in an anon_vma list. */ - struct list_head anon_vma_chain; /* Serialized by mmap_sem & + struct list_head anon_vma_chain; /* Serialized by mmap_lock & * page_table_lock */ struct anon_vma *anon_vma; /* Serialized by page_table_lock */ @@ -440,7 +440,7 @@ struct mm_struct { spinlock_t page_table_lock; /* Protects page tables and some * counters */ - struct rw_semaphore mmap_sem; + struct rw_semaphore mmap_lock; struct list_head mmlist; /* List of maybe swapped mm's. These * are globally strung together off diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h new file mode 100644 index 000000000000..0707671851a8 --- /dev/null +++ b/include/linux/mmap_lock.h @@ -0,0 +1,90 @@ +#ifndef _LINUX_MMAP_LOCK_H +#define _LINUX_MMAP_LOCK_H + +#include <linux/mmdebug.h> + +#define MMAP_LOCK_INITIALIZER(name) \ + .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock), + +static inline void mmap_init_lock(struct mm_struct *mm) +{ + init_rwsem(&mm->mmap_lock); +} + +static inline void mmap_write_lock(struct mm_struct *mm) +{ + down_write(&mm->mmap_lock); +} + +static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass) +{ + down_write_nested(&mm->mmap_lock, subclass); +} + +static inline int mmap_write_lock_killable(struct mm_struct *mm) +{ + return down_write_killable(&mm->mmap_lock); +} + +static inline bool mmap_write_trylock(struct mm_struct *mm) +{ + return down_write_trylock(&mm->mmap_lock) != 0; +} + +static inline void mmap_write_unlock(struct mm_struct *mm) +{ + up_write(&mm->mmap_lock); +} + +static inline void mmap_write_downgrade(struct mm_struct *mm) +{ + downgrade_write(&mm->mmap_lock); +} + +static inline void mmap_read_lock(struct mm_struct *mm) +{ + down_read(&mm->mmap_lock); +} + +static inline int mmap_read_lock_killable(struct mm_struct *mm) +{ + return down_read_killable(&mm->mmap_lock); +} + +static inline bool mmap_read_trylock(struct mm_struct *mm) +{ + return down_read_trylock(&mm->mmap_lock) != 0; +} + +static inline void mmap_read_unlock(struct mm_struct *mm) +{ + up_read(&mm->mmap_lock); +} + +static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm) +{ + if (down_read_trylock(&mm->mmap_lock)) { + rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_); + return true; + } + return false; +} + +static inline void mmap_read_unlock_non_owner(struct mm_struct *mm) +{ + up_read_non_owner(&mm->mmap_lock); +} + +static inline void mmap_assert_locked(struct mm_struct *mm) +{ + lockdep_assert_held(&mm->mmap_lock); + VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); +} + +static inline void mmap_assert_write_locked(struct mm_struct *mm) +{ + lockdep_assert_held_write(&mm->mmap_lock); + VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); +} + +#endif /* _LINUX_MMAP_LOCK_H */ diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 736f6918335e..fc68f3570e19 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -5,6 +5,7 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/mm_types.h> +#include <linux/mmap_lock.h> #include <linux/srcu.h> #include <linux/interval_tree.h> @@ -121,7 +122,7 @@ struct mmu_notifier_ops { /* * invalidate_range_start() and invalidate_range_end() must be - * paired and are called only when the mmap_sem and/or the + * paired and are called only when the mmap_lock and/or the * locks protecting the reverse maps are held. If the subsystem * can't guarantee that no additional references are taken to * the pages in the range, it has to implement the @@ -212,13 +213,13 @@ struct mmu_notifier_ops { }; /* - * The notifier chains are protected by mmap_sem and/or the reverse map + * The notifier chains are protected by mmap_lock and/or the reverse map * semaphores. Notifier chains are only changed when all reverse maps and - * the mmap_sem locks are taken. + * the mmap_lock locks are taken. * * Therefore notifier chains can only be traversed when either * - * 1. mmap_sem is held. + * 1. mmap_lock is held. * 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem). * 3. No other concurrent thread can access the list (release) */ @@ -277,9 +278,9 @@ mmu_notifier_get(const struct mmu_notifier_ops *ops, struct mm_struct *mm) { struct mmu_notifier *ret; - down_write(&mm->mmap_sem); + mmap_write_lock(mm); ret = mmu_notifier_get_locked(ops, mm); - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); return ret; } void mmu_notifier_put(struct mmu_notifier *subscription); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 8e085713150c..cf2468da68e9 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -538,7 +538,7 @@ static inline int lock_page_killable(struct page *page) * lock_page_or_retry - Lock the page, unless this would block and the * caller indicated that it can handle a retry. * - * Return value and mmap_sem implications depend on flags; see + * Return value and mmap_lock implications depend on flags; see * __lock_page_or_retry(). */ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm, diff --git a/include/asm-generic/pgtable.h b/include/linux/pgtable.h index 0a9329656ae6..32b6c52d41b9 100644 --- a/include/asm-generic/pgtable.h +++ b/include/linux/pgtable.h @@ -1,8 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_GENERIC_PGTABLE_H -#define _ASM_GENERIC_PGTABLE_H +#ifndef _LINUX_PGTABLE_H +#define _LINUX_PGTABLE_H #include <linux/pfn.h> +#include <asm/pgtable.h> #ifndef __ASSEMBLY__ #ifdef CONFIG_MMU @@ -27,6 +28,121 @@ #define USER_PGTABLES_CEILING 0UL #endif +/* + * A page table page can be thought of an array like this: pXd_t[PTRS_PER_PxD] + * + * The pXx_index() functions return the index of the entry in the page + * table page which would control the given virtual address + * + * As these functions may be used by the same code for different levels of + * the page table folding, they are always available, regardless of + * CONFIG_PGTABLE_LEVELS value. For the folded levels they simply return 0 + * because in such cases PTRS_PER_PxD equals 1. + */ + +static inline unsigned long pte_index(unsigned long address) +{ + return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); +} + +#ifndef pmd_index +static inline unsigned long pmd_index(unsigned long address) +{ + return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); +} +#define pmd_index pmd_index +#endif + +#ifndef pud_index +static inline unsigned long pud_index(unsigned long address) +{ + return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); +} +#define pud_index pud_index +#endif + +#ifndef pgd_index +/* Must be a compile-time constant, so implement it as a macro */ +#define pgd_index(a) (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) +#endif + +#ifndef pte_offset_kernel +static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) +{ + return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address); +} +#define pte_offset_kernel pte_offset_kernel +#endif + +#if defined(CONFIG_HIGHPTE) +#define pte_offset_map(dir, address) \ + ((pte_t *)kmap_atomic(pmd_page(*(dir))) + \ + pte_index((address))) +#define pte_unmap(pte) kunmap_atomic((pte)) +#else +#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) +#define pte_unmap(pte) ((void)(pte)) /* NOP */ +#endif + +/* Find an entry in the second-level page table.. */ +#ifndef pmd_offset +static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) +{ + return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); +} +#define pmd_offset pmd_offset +#endif + +#ifndef pud_offset +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) +{ + return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address); +} +#define pud_offset pud_offset +#endif + +static inline pgd_t *pgd_offset_pgd(pgd_t *pgd, unsigned long address) +{ + return (pgd + pgd_index(address)); +}; + +/* + * a shortcut to get a pgd_t in a given mm + */ +#ifndef pgd_offset +#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address)) +#endif + +/* + * a shortcut which implies the use of the kernel's pgd, instead + * of a process's + */ +#define pgd_offset_k(address) pgd_offset(&init_mm, (address)) + +/* + * In many cases it is known that a virtual address is mapped at PMD or PTE + * level, so instead of traversing all the page table levels, we can get a + * pointer to the PMD entry in user or kernel page table or translate a virtual + * address to the pointer in the PTE in the kernel page tables with simple + * helpers. + */ +static inline pmd_t *pmd_off(struct mm_struct *mm, unsigned long va) +{ + return pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, va), va), va), va); +} + +static inline pmd_t *pmd_off_k(unsigned long va) +{ + return pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va); +} + +static inline pte_t *virt_to_kpte(unsigned long vaddr) +{ + pmd_t *pmd = pmd_off_k(vaddr); + + return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr); +} + #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, @@ -1018,11 +1134,11 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp) #endif /* * This function is meant to be used by sites walking pagetables with - * the mmap_sem hold in read mode to protect against MADV_DONTNEED and + * the mmap_lock held in read mode to protect against MADV_DONTNEED and * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd * into a null pmd and the transhuge page fault can convert a null pmd * into an hugepmd or into a regular pmd (if the hugepage allocation - * fails). While holding the mmap_sem in read mode the pmd becomes + * fails). While holding the mmap_lock in read mode the pmd becomes * stable and stops changing under us only if it's not null and not a * transhuge pmd. When those races occurs and this function makes a * difference vs the standard pmd_none_or_clear_bad, the result is @@ -1032,7 +1148,7 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp) * * For 32bit kernels with a 64bit large pmd_t this automatically takes * care of reading the pmd atomically to avoid SMP race conditions - * against pmd_populate() when the mmap_sem is hold for reading by the + * against pmd_populate() when the mmap_lock is hold for reading by the * caller (a special atomic read not done by "gcc" as in the generic * version above, is also needed when THP is disabled because the page * fault can populate the pmd from under us). @@ -1319,4 +1435,4 @@ typedef unsigned int pgtbl_mod_mask; #define pmd_leaf(x) 0 #endif -#endif /* _ASM_GENERIC_PGTABLE_H */ +#endif /* _LINUX_PGTABLE_H */ diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 988d176472df..3a6adfa70fb0 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -77,7 +77,7 @@ struct anon_vma { struct anon_vma_chain { struct vm_area_struct *vma; struct anon_vma *anon_vma; - struct list_head same_vma; /* locked by mmap_sem & page_table_lock */ + struct list_head same_vma; /* locked by mmap_lock & page_table_lock */ struct rb_node rb; /* locked by anon_vma->rwsem */ unsigned long rb_subtree_last; #ifdef CONFIG_DEBUG_VM_RB diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h index 95fb9e025247..00c45a0e6abe 100644 --- a/include/linux/sched/debug.h +++ b/include/linux/sched/debug.h @@ -30,7 +30,8 @@ extern void show_regs(struct pt_regs *); * task), SP is the stack pointer of the first frame that should be shown in the back * trace (or NULL if the entire call-chain of the task should be shown). */ -extern void show_stack(struct task_struct *task, unsigned long *sp); +extern void show_stack(struct task_struct *task, unsigned long *sp, + const char *loglvl); extern void sched_show_task(struct task_struct *p); diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index a132d875d351..480a4d1b7dd8 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -53,7 +53,7 @@ void mmdrop(struct mm_struct *mm); /* * This has to be called after a get_task_mm()/mmget_not_zero() - * followed by taking the mmap_sem for writing before modifying the + * followed by taking the mmap_lock for writing before modifying the * vmas or anything the coredump pretends not to change from under it. * * It also has to be called when mmgrab() is used in the context of @@ -61,14 +61,14 @@ void mmdrop(struct mm_struct *mm); * the context of the process to run down_write() on that pinned mm. * * NOTE: find_extend_vma() called from GUP context is the only place - * that can modify the "mm" (notably the vm_start/end) under mmap_sem + * that can modify the "mm" (notably the vm_start/end) under mmap_lock * for reading and outside the context of the process, so it is also - * the only case that holds the mmap_sem for reading that must call - * this function. Generally if the mmap_sem is hold for reading + * the only case that holds the mmap_lock for reading that must call + * this function. Generally if the mmap_lock is hold for reading * there's no need of this check after get_task_mm()/mmget_not_zero(). * * This function can be obsoleted and the check can be removed, after - * the coredump code will hold the mmap_sem for writing before + * the coredump code will hold the mmap_lock for writing before * invoking the ->core_dump methods. */ static inline bool mmget_still_valid(struct mm_struct *mm) diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 9861c89f93be..dac1db05bf7e 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -301,62 +301,20 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src, return 0; } -/* - * probe_kernel_read(): safely attempt to read from a location - * @dst: pointer to the buffer that shall take the data - * @src: address to read from - * @size: size of the data chunk - * - * Safely read from address @src to the buffer at @dst. If a kernel fault - * happens, handle that and return -EFAULT. - */ -extern long probe_kernel_read(void *dst, const void *src, size_t size); -extern long probe_kernel_read_strict(void *dst, const void *src, size_t size); -extern long __probe_kernel_read(void *dst, const void *src, size_t size); +bool probe_kernel_read_allowed(const void *unsafe_src, size_t size); -/* - * probe_user_read(): safely attempt to read from a location in user space - * @dst: pointer to the buffer that shall take the data - * @src: address to read from - * @size: size of the data chunk - * - * Safely read from address @src to the buffer at @dst. If a kernel fault - * happens, handle that and return -EFAULT. - */ +extern long probe_kernel_read(void *dst, const void *src, size_t size); extern long probe_user_read(void *dst, const void __user *src, size_t size); -extern long __probe_user_read(void *dst, const void __user *src, size_t size); -/* - * probe_kernel_write(): safely attempt to write to a location - * @dst: address to write to - * @src: pointer to the data that shall be written - * @size: size of the data chunk - * - * Safely write to address @dst from the buffer at @src. If a kernel fault - * happens, handle that and return -EFAULT. - */ extern long notrace probe_kernel_write(void *dst, const void *src, size_t size); -extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size); - -/* - * probe_user_write(): safely attempt to write to a location in user space - * @dst: address to write to - * @src: pointer to the data that shall be written - * @size: size of the data chunk - * - * Safely write to address @dst from the buffer at @src. If a kernel fault - * happens, handle that and return -EFAULT. - */ extern long notrace probe_user_write(void __user *dst, const void *src, size_t size); -extern long notrace __probe_user_write(void __user *dst, const void *src, size_t size); - -extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); -extern long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr, - long count); -extern long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); -extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr, - long count); -extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count); + +long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, + long count); + +long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, + long count); +long strnlen_user_nofault(const void __user *unsafe_addr, long count); /** * probe_kernel_address(): safely attempt to read from a location diff --git a/include/xen/arm/page.h b/include/xen/arm/page.h index f77dcbcba5a6..d7f6af50e200 100644 --- a/include/xen/arm/page.h +++ b/include/xen/arm/page.h @@ -3,11 +3,11 @@ #define _ASM_ARM_XEN_PAGE_H #include <asm/page.h> -#include <asm/pgtable.h> #include <linux/pfn.h> #include <linux/types.h> #include <linux/dma-mapping.h> +#include <linux/pgtable.h> #include <xen/xen.h> #include <xen/interface/grant_table.h> |