From 464920104bf7adac12722035bfefb3d772eb04d8 Mon Sep 17 00:00:00 2001
From: Pratyush Anand <panand@redhat.com>
Date: Mon, 27 Feb 2017 14:27:31 -0800
Subject: /proc/kcore: update physical address for kcore ram and text

Currently all the p_paddr of PT_LOAD headers are assigned to 0, which is
not true and could be misleading, since 0 is a valid physical address.

User space tools like makedumpfile needs to know physical address for
PT_LOAD segments of direct mapped regions.  Therefore this patch updates
paddr for such regions.  It also sets an invalid paddr (-1) for other
regions, so that user space tool can know whether a physical address
provided in PT_LOAD is correct or not.

I do not know why it was 0, which is a valid physical address.  But
certainly, it might break some user space tools, and those need to be
fixed.  For example, see following code from kexec-tools

kexec/kexec-elf.c:build_mem_phdrs()

                    if ((phdr->p_paddr + phdr->p_memsz) < phdr->p_paddr) {
                            /* The memory address wraps */
                            if (probe_debug) {
                                    fprintf(stderr, "ELF address wrap around\n");
                            }
                            return -1;
                    }

We do not need to perform above check for an invalid physical address.

I think, kexec-tools and makedumpfile will need fixup.  I already have
those fixup which will be sent upstream once this patch makes through.
Pro with this approach is that, it will help to calculate variable like
page_offset, phys_base from PT_LOAD even when they are randomized and
therefore will reduce many variable and version specific values in user
space tools.

Having an ASLR offset information can help to translate an identity
mapped virtual address to a physical address.  But that would be an
additional field in PT_LOAD header structure and an arch dependent
value.

Moreover, sending a valid physical address like 0 does not seem right.
So, IMHO it is better to fix that and send valid physical address when
available (identity mapped).

Link: http://lkml.kernel.org/r/f951340d2917cdd2a329fae9837a83f2059dc3b2.1485318868.git.panand@redhat.com
Signed-off-by: Pratyush Anand <panand@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Dave Anderson <anderson@redhat.com>
Cc: Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp>
Cc: Simon Horman <simon.horman@netronome.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/kcore.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs/proc')

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 0b80ad87b4d6..ea9f3d1ae830 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -373,7 +373,10 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 		phdr->p_flags	= PF_R|PF_W|PF_X;
 		phdr->p_offset	= kc_vaddr_to_offset(m->addr) + dataoff;
 		phdr->p_vaddr	= (size_t)m->addr;
-		phdr->p_paddr	= 0;
+		if (m->type == KCORE_RAM || m->type == KCORE_TEXT)
+			phdr->p_paddr	= __pa(m->addr);
+		else
+			phdr->p_paddr	= (elf_addr_t)-1;
 		phdr->p_filesz	= phdr->p_memsz	= m->size;
 		phdr->p_align	= PAGE_SIZE;
 	}
-- 
cgit v1.2.3-70-g09d2


From f1f1007644ffc8051a4c11427d58b1967ae7b75a Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@oracle.com>
Date: Mon, 27 Feb 2017 14:30:07 -0800
Subject: mm: add new mmgrab() helper

Apart from adding the helper function itself, the rest of the kernel is
converted mechanically using:

  git grep -l 'atomic_inc.*mm_count' | xargs sed -i 's/atomic_inc(&\(.*\)->mm_count);/mmgrab\(\1\);/'
  git grep -l 'atomic_inc.*mm_count' | xargs sed -i 's/atomic_inc(&\(.*\)\.mm_count);/mmgrab\(\&\1\);/'

This is needed for a later patch that hooks into the helper, but might
be a worthwhile cleanup on its own.

(Michal Hocko provided most of the kerneldoc comment.)

Link: http://lkml.kernel.org/r/20161218123229.22952-1-vegard.nossum@oracle.com
Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/smp.c                  |  2 +-
 arch/arc/kernel/smp.c                    |  2 +-
 arch/arm/kernel/smp.c                    |  2 +-
 arch/arm64/kernel/smp.c                  |  2 +-
 arch/blackfin/mach-common/smp.c          |  2 +-
 arch/hexagon/kernel/smp.c                |  2 +-
 arch/ia64/kernel/setup.c                 |  2 +-
 arch/m32r/kernel/setup.c                 |  2 +-
 arch/metag/kernel/smp.c                  |  2 +-
 arch/mips/kernel/traps.c                 |  2 +-
 arch/mn10300/kernel/smp.c                |  2 +-
 arch/parisc/kernel/smp.c                 |  2 +-
 arch/powerpc/kernel/smp.c                |  2 +-
 arch/s390/kernel/processor.c             |  2 +-
 arch/score/kernel/traps.c                |  2 +-
 arch/sh/kernel/smp.c                     |  2 +-
 arch/sparc/kernel/leon_smp.c             |  2 +-
 arch/sparc/kernel/smp_64.c               |  2 +-
 arch/sparc/kernel/sun4d_smp.c            |  2 +-
 arch/sparc/kernel/sun4m_smp.c            |  2 +-
 arch/sparc/kernel/traps_32.c             |  2 +-
 arch/sparc/kernel/traps_64.c             |  2 +-
 arch/tile/kernel/smpboot.c               |  2 +-
 arch/x86/kernel/cpu/common.c             |  4 ++--
 arch/xtensa/kernel/smp.c                 |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |  2 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c  |  2 +-
 drivers/infiniband/hw/hfi1/file_ops.c    |  2 +-
 fs/proc/base.c                           |  4 ++--
 fs/userfaultfd.c                         |  2 +-
 include/linux/sched.h                    | 22 ++++++++++++++++++++++
 kernel/exit.c                            |  2 +-
 kernel/futex.c                           |  2 +-
 kernel/sched/core.c                      |  4 ++--
 mm/khugepaged.c                          |  2 +-
 mm/ksm.c                                 |  2 +-
 mm/mmu_context.c                         |  2 +-
 mm/mmu_notifier.c                        |  2 +-
 mm/oom_kill.c                            |  4 ++--
 virt/kvm/kvm_main.c                      |  2 +-
 40 files changed, 65 insertions(+), 43 deletions(-)

(limited to 'fs/proc')

diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 46bf263c3153..acb4b146a607 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -144,7 +144,7 @@ smp_callin(void)
 		alpha_mv.smp_callin();
 
 	/* All kernel threads share the same mm context.  */
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	current->active_mm = &init_mm;
 
 	/* inform the notifiers about the new cpu */
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 2afbafadb6ab..695624181682 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -140,7 +140,7 @@ void start_kernel_secondary(void)
 	setup_processor();
 
 	atomic_inc(&mm->mm_users);
-	atomic_inc(&mm->mm_count);
+	mmgrab(mm);
 	current->active_mm = mm;
 	cpumask_set_cpu(cpu, mm_cpumask(mm));
 
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 7dd14e8395e6..c6514ce0fcbc 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -371,7 +371,7 @@ asmlinkage void secondary_start_kernel(void)
 	 * reference and switch to it.
 	 */
 	cpu = smp_processor_id();
-	atomic_inc(&mm->mm_count);
+	mmgrab(mm);
 	current->active_mm = mm;
 	cpumask_set_cpu(cpu, mm_cpumask(mm));
 
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index a8ec5da530af..827d52d78b67 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -222,7 +222,7 @@ asmlinkage void secondary_start_kernel(void)
 	 * All kernel threads share the same mm context; grab a
 	 * reference and switch to it.
 	 */
-	atomic_inc(&mm->mm_count);
+	mmgrab(mm);
 	current->active_mm = mm;
 
 	/*
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 23c4ef5f8bdc..bc5617ef7128 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -308,7 +308,7 @@ void secondary_start_kernel(void)
 
 	/* Attach the new idle task to the global mm. */
 	atomic_inc(&mm->mm_users);
-	atomic_inc(&mm->mm_count);
+	mmgrab(mm);
 	current->active_mm = mm;
 
 	preempt_disable();
diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c
index 983bae7d2665..c02a6455839e 100644
--- a/arch/hexagon/kernel/smp.c
+++ b/arch/hexagon/kernel/smp.c
@@ -162,7 +162,7 @@ void start_secondary(void)
 	);
 
 	/*  Set the memory struct  */
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	current->active_mm = &init_mm;
 
 	cpu = smp_processor_id();
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index c483ece3eb84..d68322966f33 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -994,7 +994,7 @@ cpu_init (void)
 	 */
 	ia64_setreg(_IA64_REG_CR_DCR,  (  IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR
 					| IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	current->active_mm = &init_mm;
 	BUG_ON(current->mm);
 
diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c
index 136c69f1fb8a..b18bc0bd6544 100644
--- a/arch/m32r/kernel/setup.c
+++ b/arch/m32r/kernel/setup.c
@@ -403,7 +403,7 @@ void __init cpu_init (void)
 	printk(KERN_INFO "Initializing CPU#%d\n", cpu_id);
 
 	/* Set up and load the per-CPU TSS and LDT */
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	current->active_mm = &init_mm;
 	if (current->mm)
 		BUG();
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
index bad13232de51..af9cff547a19 100644
--- a/arch/metag/kernel/smp.c
+++ b/arch/metag/kernel/smp.c
@@ -345,7 +345,7 @@ asmlinkage void secondary_start_kernel(void)
 	 * reference and switch to it.
 	 */
 	atomic_inc(&mm->mm_users);
-	atomic_inc(&mm->mm_count);
+	mmgrab(mm);
 	current->active_mm = mm;
 	cpumask_set_cpu(cpu, mm_cpumask(mm));
 	enter_lazy_tlb(mm, current);
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index cb479be31a50..49c6df20672a 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2232,7 +2232,7 @@ void per_cpu_trap_init(bool is_boot_cpu)
 	if (!cpu_data[cpu].asid_cache)
 		cpu_data[cpu].asid_cache = asid_first_version(cpu);
 
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	current->active_mm = &init_mm;
 	BUG_ON(current->mm);
 	enter_lazy_tlb(&init_mm, current);
diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c
index 426173c4b0b9..e65b5cc2fa67 100644
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -589,7 +589,7 @@ static void __init smp_cpu_init(void)
 	}
 	printk(KERN_INFO "Initializing CPU#%d\n", cpu_id);
 
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	current->active_mm = &init_mm;
 	BUG_ON(current->mm);
 
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 75dab2871346..67b452b41ff6 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -279,7 +279,7 @@ smp_cpu_init(int cpunum)
 	set_cpu_online(cpunum, true);
 
 	/* Initialise the idle task for this CPU */
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	current->active_mm = &init_mm;
 	BUG_ON(current->mm);
 	enter_lazy_tlb(&init_mm, current);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 893bd7f79be6..573fb3a461b5 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -707,7 +707,7 @@ void start_secondary(void *unused)
 	unsigned int cpu = smp_processor_id();
 	int i, base;
 
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	current->active_mm = &init_mm;
 
 	smp_store_cpu_info(cpu);
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 21004aaac69b..bc2b60dcb178 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -73,7 +73,7 @@ void cpu_init(void)
 	get_cpu_id(id);
 	if (machine_has_cpu_mhz)
 		update_cpu_mhz(NULL);
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	current->active_mm = &init_mm;
 	BUG_ON(current->mm);
 	enter_lazy_tlb(&init_mm, current);
diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c
index 2b22bcf02c27..569ac02f68df 100644
--- a/arch/score/kernel/traps.c
+++ b/arch/score/kernel/traps.c
@@ -336,7 +336,7 @@ void __init trap_init(void)
 	set_except_vector(18, handle_dbe);
 	flush_icache_range(DEBUG_VECTOR_BASE_ADDR, IRQ_VECTOR_BASE_ADDR);
 
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	current->active_mm = &init_mm;
 	cpu_cache_init();
 }
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 38e7860845db..ee379c699c08 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -178,7 +178,7 @@ asmlinkage void start_secondary(void)
 	struct mm_struct *mm = &init_mm;
 
 	enable_mmu();
-	atomic_inc(&mm->mm_count);
+	mmgrab(mm);
 	atomic_inc(&mm->mm_users);
 	current->active_mm = mm;
 #ifdef CONFIG_MMU
diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c
index 71e16f2241c2..b99d33797e1d 100644
--- a/arch/sparc/kernel/leon_smp.c
+++ b/arch/sparc/kernel/leon_smp.c
@@ -93,7 +93,7 @@ void leon_cpu_pre_online(void *arg)
 			     : "memory" /* paranoid */);
 
 	/* Attach to the address space of init_task. */
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	current->active_mm = &init_mm;
 
 	while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 90a02cb64e20..8e3e13924594 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -122,7 +122,7 @@ void smp_callin(void)
 	current_thread_info()->new_child = 0;
 
 	/* Attach to the address space of init_task. */
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	current->active_mm = &init_mm;
 
 	/* inform the notifiers about the new cpu */
diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c
index 9d98e5002a09..7b55c50eabe5 100644
--- a/arch/sparc/kernel/sun4d_smp.c
+++ b/arch/sparc/kernel/sun4d_smp.c
@@ -93,7 +93,7 @@ void sun4d_cpu_pre_online(void *arg)
 	show_leds(cpuid);
 
 	/* Attach to the address space of init_task. */
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	current->active_mm = &init_mm;
 
 	local_ops->cache_all();
diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c
index 278c40abce82..633c4cf6fdb0 100644
--- a/arch/sparc/kernel/sun4m_smp.c
+++ b/arch/sparc/kernel/sun4m_smp.c
@@ -59,7 +59,7 @@ void sun4m_cpu_pre_online(void *arg)
 			     : "memory" /* paranoid */);
 
 	/* Attach to the address space of init_task. */
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	current->active_mm = &init_mm;
 
 	while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c
index 4f21df7d4f13..ecddac5a4c96 100644
--- a/arch/sparc/kernel/traps_32.c
+++ b/arch/sparc/kernel/traps_32.c
@@ -448,7 +448,7 @@ void trap_init(void)
 		thread_info_offsets_are_bolixed_pete();
 
 	/* Attach to the address space of init_task. */
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	current->active_mm = &init_mm;
 
 	/* NOTE: Other cpus have this done as they are started
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index dfc97a47c9a0..e022d7b00390 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -2837,6 +2837,6 @@ void __init trap_init(void)
 	/* Attach to the address space of init_task.  On SMP we
 	 * do this in smp.c:smp_callin for other cpus.
 	 */
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	current->active_mm = &init_mm;
 }
diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c
index 6c0abaacec33..53ce940a5016 100644
--- a/arch/tile/kernel/smpboot.c
+++ b/arch/tile/kernel/smpboot.c
@@ -160,7 +160,7 @@ static void start_secondary(void)
 	__this_cpu_write(current_asid, min_asid);
 
 	/* Set up this thread as another owner of the init_mm */
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	current->active_mm = &init_mm;
 	if (current->mm)
 		BUG();
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f07005e6f461..c64ca5929cb5 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1510,7 +1510,7 @@ void cpu_init(void)
 	for (i = 0; i <= IO_BITMAP_LONGS; i++)
 		t->io_bitmap[i] = ~0UL;
 
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	me->active_mm = &init_mm;
 	BUG_ON(me->mm);
 	enter_lazy_tlb(&init_mm, me);
@@ -1561,7 +1561,7 @@ void cpu_init(void)
 	/*
 	 * Set up and load the per-CPU TSS and LDT
 	 */
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	curr->active_mm = &init_mm;
 	BUG_ON(curr->mm);
 	enter_lazy_tlb(&init_mm, curr);
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c
index fc4ad21a5ed4..9bf5cea3bae4 100644
--- a/arch/xtensa/kernel/smp.c
+++ b/arch/xtensa/kernel/smp.c
@@ -136,7 +136,7 @@ void secondary_start_kernel(void)
 	/* All kernel threads share the same mm context. */
 
 	atomic_inc(&mm->mm_users);
-	atomic_inc(&mm->mm_count);
+	mmgrab(mm);
 	current->active_mm = mm;
 	cpumask_set_cpu(cpu, mm_cpumask(mm));
 	enter_lazy_tlb(mm, current);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index ef7c8de7060e..ca5f2aa7232d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -262,7 +262,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 	 * and because the mmu_notifier_unregister function also drop
 	 * mm_count we need to take an extra count here.
 	 */
-	atomic_inc(&p->mm->mm_count);
+	mmgrab(p->mm);
 	mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm);
 	mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 6a8fa085b74e..65802d93fdc1 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -334,7 +334,7 @@ i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj)
 		mm->i915 = to_i915(obj->base.dev);
 
 		mm->mm = current->mm;
-		atomic_inc(&current->mm->mm_count);
+		mmgrab(current->mm);
 
 		mm->mn = NULL;
 
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index f46033984d07..3b19c16a9e45 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -185,7 +185,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
 	if (fd) {
 		fd->rec_cpu_num = -1; /* no cpu affinity by default */
 		fd->mm = current->mm;
-		atomic_inc(&fd->mm->mm_count);
+		mmgrab(fd->mm);
 		fp->private_data = fd;
 	} else {
 		fp->private_data = NULL;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b8f06273353e..5d51a188871b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -766,7 +766,7 @@ struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode)
 
 		if (!IS_ERR_OR_NULL(mm)) {
 			/* ensure this mm_struct can't be freed */
-			atomic_inc(&mm->mm_count);
+			mmgrab(mm);
 			/* but do not pin its memory */
 			mmput(mm);
 		}
@@ -1064,7 +1064,7 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
 		if (p) {
 			if (atomic_read(&p->mm->mm_users) > 1) {
 				mm = p->mm;
-				atomic_inc(&mm->mm_count);
+				mmgrab(mm);
 			}
 			task_unlock(p);
 		}
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index e6e0a619cb3a..3c421d06a18e 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1847,7 +1847,7 @@ static struct file *userfaultfd_file_create(int flags)
 	ctx->released = false;
 	ctx->mm = current->mm;
 	/* prevent the mm struct to be freed */
-	atomic_inc(&ctx->mm->mm_count);
+	mmgrab(ctx->mm);
 
 	file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx,
 				  O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS));
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 451e241f32c5..7cfa5546c840 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2904,6 +2904,28 @@ static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig)
  */
 extern struct mm_struct * mm_alloc(void);
 
+/**
+ * mmgrab() - Pin a &struct mm_struct.
+ * @mm: The &struct mm_struct to pin.
+ *
+ * Make sure that @mm will not get freed even after the owning task
+ * exits. This doesn't guarantee that the associated address space
+ * will still exist later on and mmget_not_zero() has to be used before
+ * accessing it.
+ *
+ * This is a preferred way to to pin @mm for a longer/unbounded amount
+ * of time.
+ *
+ * Use mmdrop() to release the reference acquired by mmgrab().
+ *
+ * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
+ * of &mm_struct.mm_count vs &mm_struct.mm_users.
+ */
+static inline void mmgrab(struct mm_struct *mm)
+{
+	atomic_inc(&mm->mm_count);
+}
+
 /* mmdrop drops the mm and the page tables */
 extern void __mmdrop(struct mm_struct *);
 static inline void mmdrop(struct mm_struct *mm)
diff --git a/kernel/exit.c b/kernel/exit.c
index 90b09ca35c84..8a768a3672a5 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -539,7 +539,7 @@ static void exit_mm(void)
 		__set_current_state(TASK_RUNNING);
 		down_read(&mm->mmap_sem);
 	}
-	atomic_inc(&mm->mm_count);
+	mmgrab(mm);
 	BUG_ON(mm != current->active_mm);
 	/* more a memory barrier than a real lock */
 	task_lock(current);
diff --git a/kernel/futex.c b/kernel/futex.c
index cdf365036141..b687cb22301c 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -338,7 +338,7 @@ static inline bool should_fail_futex(bool fshared)
 
 static inline void futex_get_mm(union futex_key *key)
 {
-	atomic_inc(&key->private.mm->mm_count);
+	mmgrab(key->private.mm);
 	/*
 	 * Ensure futex_get_mm() implies a full barrier such that
 	 * get_futex_key() implies a full barrier. This is relied upon
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e1ae6ac15eac..6ea1925ac5c0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2847,7 +2847,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 
 	if (!mm) {
 		next->active_mm = oldmm;
-		atomic_inc(&oldmm->mm_count);
+		mmgrab(oldmm);
 		enter_lazy_tlb(oldmm, next);
 	} else
 		switch_mm_irqs_off(oldmm, mm, next);
@@ -6098,7 +6098,7 @@ void __init sched_init(void)
 	/*
 	 * The boot idle thread does lazy MMU switching as well:
 	 */
-	atomic_inc(&init_mm.mm_count);
+	mmgrab(&init_mm);
 	enter_lazy_tlb(&init_mm, current);
 
 	/*
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 77ae3239c3de..34bce5c308e3 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -420,7 +420,7 @@ int __khugepaged_enter(struct mm_struct *mm)
 	list_add_tail(&mm_slot->mm_node, &khugepaged_scan.mm_head);
 	spin_unlock(&khugepaged_mm_lock);
 
-	atomic_inc(&mm->mm_count);
+	mmgrab(mm);
 	if (wakeup)
 		wake_up_interruptible(&khugepaged_wait);
 
diff --git a/mm/ksm.c b/mm/ksm.c
index cf211c01ceac..520e4c37fec7 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1854,7 +1854,7 @@ int __ksm_enter(struct mm_struct *mm)
 	spin_unlock(&ksm_mmlist_lock);
 
 	set_bit(MMF_VM_MERGEABLE, &mm->flags);
-	atomic_inc(&mm->mm_count);
+	mmgrab(mm);
 
 	if (needs_wakeup)
 		wake_up_interruptible(&ksm_thread_wait);
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index 6f4d27c5bb32..daf67bb02b4a 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -25,7 +25,7 @@ void use_mm(struct mm_struct *mm)
 	task_lock(tsk);
 	active_mm = tsk->active_mm;
 	if (active_mm != mm) {
-		atomic_inc(&mm->mm_count);
+		mmgrab(mm);
 		tsk->active_mm = mm;
 	}
 	tsk->mm = mm;
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index f4259e496f83..32bc9f2ff7eb 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -275,7 +275,7 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn,
 		mm->mmu_notifier_mm = mmu_notifier_mm;
 		mmu_notifier_mm = NULL;
 	}
-	atomic_inc(&mm->mm_count);
+	mmgrab(mm);
 
 	/*
 	 * Serialize the update against mmu_notifier_unregister. A
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 578321f1c070..51c091849dcb 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -653,7 +653,7 @@ static void mark_oom_victim(struct task_struct *tsk)
 
 	/* oom_mm is bound to the signal struct life time. */
 	if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm))
-		atomic_inc(&tsk->signal->oom_mm->mm_count);
+		mmgrab(tsk->signal->oom_mm);
 
 	/*
 	 * Make sure that the task is woken up from uninterruptible sleep
@@ -870,7 +870,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
 
 	/* Get a reference to safely compare mm after task_unlock(victim) */
 	mm = victim->mm;
-	atomic_inc(&mm->mm_count);
+	mmgrab(mm);
 	/*
 	 * We should send SIGKILL before setting TIF_MEMDIE in order to prevent
 	 * the OOM victim from depleting the memory reserves from the user
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5b0dd4a9b2cb..35f71409d9ee 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -611,7 +611,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
 		return ERR_PTR(-ENOMEM);
 
 	spin_lock_init(&kvm->mmu_lock);
-	atomic_inc(&current->mm->mm_count);
+	mmgrab(current->mm);
 	kvm->mm = current->mm;
 	kvm_eventfd_init(kvm);
 	mutex_init(&kvm->lock);
-- 
cgit v1.2.3-70-g09d2


From 388f79345502232d335467e8fa6f8e55a18844e1 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@oracle.com>
Date: Mon, 27 Feb 2017 14:30:13 -0800
Subject: mm: use mmget_not_zero() helper

We already have the helper, we can convert the rest of the kernel
mechanically using:

  git grep -l 'atomic_inc_not_zero.*mm_users' | xargs sed -i 's/atomic_inc_not_zero(&\(.*\)->mm_users)/mmget_not_zero\(\1\)/'

This is needed for a later patch that hooks into the helper, but might
be a worthwhile cleanup on its own.

Link: http://lkml.kernel.org/r/20161218123229.22952-3-vegard.nossum@oracle.com
Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/i915/i915_gem_userptr.c | 2 +-
 drivers/iommu/intel-svm.c               | 2 +-
 fs/proc/base.c                          | 4 ++--
 fs/proc/task_mmu.c                      | 4 ++--
 fs/proc/task_nommu.c                    | 2 +-
 kernel/events/uprobes.c                 | 2 +-
 mm/swapfile.c                           | 2 +-
 7 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'fs/proc')

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 65802d93fdc1..0115989e324a 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -507,7 +507,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 			flags |= FOLL_WRITE;
 
 		ret = -EFAULT;
-		if (atomic_inc_not_zero(&mm->mm_users)) {
+		if (mmget_not_zero(mm)) {
 			down_read(&mm->mmap_sem);
 			while (pinned < npages) {
 				ret = get_user_pages_remote
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index cb72e0011310..51f2b228723f 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -579,7 +579,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 		if (!svm->mm)
 			goto bad_req;
 		/* If the mm is already defunct, don't handle faults. */
-		if (!atomic_inc_not_zero(&svm->mm->mm_users))
+		if (!mmget_not_zero(svm->mm))
 			goto bad_req;
 		down_read(&svm->mm->mmap_sem);
 		vma = find_extend_vma(svm->mm, address);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5d51a188871b..1e1e182d571b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -813,7 +813,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
 		return -ENOMEM;
 
 	copied = 0;
-	if (!atomic_inc_not_zero(&mm->mm_users))
+	if (!mmget_not_zero(mm))
 		goto free;
 
 	/* Maybe we should limit FOLL_FORCE to actual ptrace users? */
@@ -921,7 +921,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
 		return -ENOMEM;
 
 	ret = 0;
-	if (!atomic_inc_not_zero(&mm->mm_users))
+	if (!mmget_not_zero(mm))
 		goto free;
 
 	down_read(&mm->mmap_sem);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8f96a49178d0..ee3efb229ef6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -167,7 +167,7 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
 		return ERR_PTR(-ESRCH);
 
 	mm = priv->mm;
-	if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+	if (!mm || !mmget_not_zero(mm))
 		return NULL;
 
 	down_read(&mm->mmap_sem);
@@ -1352,7 +1352,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 	unsigned long end_vaddr;
 	int ret = 0, copied = 0;
 
-	if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+	if (!mm || !mmget_not_zero(mm))
 		goto out;
 
 	ret = -EINVAL;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 37175621e890..1ef97cfcf422 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -219,7 +219,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
 		return ERR_PTR(-ESRCH);
 
 	mm = priv->mm;
-	if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+	if (!mm || !mmget_not_zero(mm))
 		return NULL;
 
 	down_read(&mm->mmap_sem);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 18c6b23edd3c..d630f8ac4d2f 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -747,7 +747,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
 			continue;
 		}
 
-		if (!atomic_inc_not_zero(&vma->vm_mm->mm_users))
+		if (!mmget_not_zero(vma->vm_mm))
 			continue;
 
 		info = prev;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 7a0713b76668..fadc6a1c0da0 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1763,7 +1763,7 @@ int try_to_unuse(unsigned int type, bool frontswap,
 			while (swap_count(*swap_map) && !retval &&
 					(p = p->next) != &start_mm->mmlist) {
 				mm = list_entry(p, struct mm_struct, mmlist);
-				if (!atomic_inc_not_zero(&mm->mm_users))
+				if (!mmget_not_zero(mm))
 					continue;
 				spin_unlock(&mmlist_lock);
 				mmput(prev_mm);
-- 
cgit v1.2.3-70-g09d2