summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/fuse/virtio_fs.c15
-rw-r--r--fs/proc/Kconfig2
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/task_mmu.c17
-rw-r--r--fs/userfaultfd.c86
6 files changed, 44 insertions, 80 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index ea2f77446080..a46b0cbc4d8f 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -60,7 +60,6 @@ endif # BLOCK
config FS_DAX
bool "File system based Direct Access (DAX) support"
depends on MMU
- depends on !(ARM || MIPS || SPARC)
depends on ZONE_DEVICE || FS_DAX_LIMITED
select FS_IOMAP
select DAX
@@ -261,6 +260,7 @@ menuconfig HUGETLBFS
depends on X86 || SPARC64 || ARCH_SUPPORTS_HUGETLBFS || BROKEN
depends on (SYSFS || SYSCTL)
select MEMFD_CREATE
+ select PADATA if SMP
help
hugetlbfs is a filesystem backing for HugeTLB pages, based on
ramfs. For architectures that support it, say Y here and read
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 5f1be1da92ce..a28466c2da71 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -16,6 +16,7 @@
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
#include <linux/highmem.h>
+#include <linux/cleanup.h>
#include <linux/uio.h>
#include "fuse_i.h"
@@ -795,8 +796,11 @@ static void virtio_fs_cleanup_dax(void *data)
put_dax(dax_dev);
}
+DEFINE_FREE(cleanup_dax, struct dax_dev *, if (!IS_ERR_OR_NULL(_T)) virtio_fs_cleanup_dax(_T))
+
static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
{
+ struct dax_device *dax_dev __free(cleanup_dax) = NULL;
struct virtio_shm_region cache_reg;
struct dev_pagemap *pgmap;
bool have_cache;
@@ -804,6 +808,12 @@ static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
if (!IS_ENABLED(CONFIG_FUSE_DAX))
return 0;
+ dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
+ if (IS_ERR(dax_dev)) {
+ int rc = PTR_ERR(dax_dev);
+ return rc == -EOPNOTSUPP ? 0 : rc;
+ }
+
/* Get cache region */
have_cache = virtio_get_shm_region(vdev, &cache_reg,
(u8)VIRTIO_FS_SHMCAP_ID_CACHE);
@@ -849,10 +859,7 @@ static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
__func__, fs->window_kaddr, cache_reg.addr, cache_reg.len);
- fs->dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
- if (IS_ERR(fs->dax_dev))
- return PTR_ERR(fs->dax_dev);
-
+ fs->dax_dev = no_free_ptr(dax_dev);
return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax,
fs->dax_dev);
}
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 32b1116ae137..d80a1431ef7b 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -32,7 +32,7 @@ config PROC_FS
config PROC_KCORE
bool "/proc/kcore support" if !ARM
depends on PROC_FS && MMU
- select CRASH_CORE
+ select VMCORE_INFO
help
Provides a virtual ELF core file of the live kernel. This can
be read with gdb and other ELF tools. No modifications can be
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 6422e569b080..8e08a9a1b7ed 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -10,7 +10,7 @@
* Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
*/
-#include <linux/crash_core.h>
+#include <linux/vmcore_info.h>
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/kcore.h>
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3f78ebbb795f..23fbab954c20 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1352,8 +1352,7 @@ static inline pagemap_entry_t make_pme(u64 frame, u64 flags)
return (pagemap_entry_t) { .pme = (frame & PM_PFRAME_MASK) | flags };
}
-static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
- struct pagemapread *pm)
+static int add_to_pagemap(pagemap_entry_t *pme, struct pagemapread *pm)
{
pm->buffer[pm->pos++] = *pme;
if (pm->pos >= pm->len)
@@ -1380,7 +1379,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
hole_end = end;
for (; addr < hole_end; addr += PAGE_SIZE) {
- err = add_to_pagemap(addr, &pme, pm);
+ err = add_to_pagemap(&pme, pm);
if (err)
goto out;
}
@@ -1392,7 +1391,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
if (vma->vm_flags & VM_SOFTDIRTY)
pme = make_pme(0, PM_SOFT_DIRTY);
for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) {
- err = add_to_pagemap(addr, &pme, pm);
+ err = add_to_pagemap(&pme, pm);
if (err)
goto out;
}
@@ -1519,7 +1518,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
for (; addr != end; addr += PAGE_SIZE) {
pagemap_entry_t pme = make_pme(frame, flags);
- err = add_to_pagemap(addr, &pme, pm);
+ err = add_to_pagemap(&pme, pm);
if (err)
break;
if (pm->show_pfn) {
@@ -1547,7 +1546,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
pagemap_entry_t pme;
pme = pte_to_pagemap_entry(pm, vma, addr, ptep_get(pte));
- err = add_to_pagemap(addr, &pme, pm);
+ err = add_to_pagemap(&pme, pm);
if (err)
break;
}
@@ -1597,7 +1596,7 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
for (; addr != end; addr += PAGE_SIZE) {
pagemap_entry_t pme = make_pme(frame, flags);
- err = add_to_pagemap(addr, &pme, pm);
+ err = add_to_pagemap(&pme, pm);
if (err)
return err;
if (pm->show_pfn && (flags & PM_PRESENT))
@@ -1807,7 +1806,7 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
if (p->masks_of_interest & PAGE_IS_FILE) {
swp = pte_to_swp_entry(pte);
if (is_pfn_swap_entry(swp) &&
- !PageAnon(pfn_swap_entry_to_page(swp)))
+ !folio_test_anon(pfn_swap_entry_folio(swp)))
categories |= PAGE_IS_FILE;
}
if (pte_swp_soft_dirty(pte))
@@ -1873,7 +1872,7 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
if (p->masks_of_interest & PAGE_IS_FILE) {
swp = pmd_to_swp_entry(pmd);
if (is_pfn_swap_entry(swp) &&
- !PageAnon(pfn_swap_entry_to_page(swp)))
+ !folio_test_anon(pfn_swap_entry_folio(swp)))
categories |= PAGE_IS_FILE;
}
}
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 959551ff9a95..60dcfafdc11a 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -50,45 +50,6 @@ static struct ctl_table vm_userfaultfd_table[] = {
static struct kmem_cache *userfaultfd_ctx_cachep __ro_after_init;
-/*
- * Start with fault_pending_wqh and fault_wqh so they're more likely
- * to be in the same cacheline.
- *
- * Locking order:
- * fd_wqh.lock
- * fault_pending_wqh.lock
- * fault_wqh.lock
- * event_wqh.lock
- *
- * To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
- * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
- * also taken in IRQ context.
- */
-struct userfaultfd_ctx {
- /* waitqueue head for the pending (i.e. not read) userfaults */
- wait_queue_head_t fault_pending_wqh;
- /* waitqueue head for the userfaults */
- wait_queue_head_t fault_wqh;
- /* waitqueue head for the pseudo fd to wakeup poll/read */
- wait_queue_head_t fd_wqh;
- /* waitqueue head for events */
- wait_queue_head_t event_wqh;
- /* a refile sequence protected by fault_pending_wqh lock */
- seqcount_spinlock_t refile_seq;
- /* pseudo fd refcounting */
- refcount_t refcount;
- /* userfaultfd syscall flags */
- unsigned int flags;
- /* features requested from the userspace */
- unsigned int features;
- /* released */
- bool released;
- /* memory mappings are changing because of non-cooperative event */
- atomic_t mmap_changing;
- /* mm with one ore more vmas attached to this userfaultfd_ctx */
- struct mm_struct *mm;
-};
-
struct userfaultfd_fork_ctx {
struct userfaultfd_ctx *orig;
struct userfaultfd_ctx *new;
@@ -724,12 +685,15 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
ctx->flags = octx->flags;
ctx->features = octx->features;
ctx->released = false;
+ init_rwsem(&ctx->map_changing_lock);
atomic_set(&ctx->mmap_changing, 0);
ctx->mm = vma->vm_mm;
mmgrab(ctx->mm);
userfaultfd_ctx_get(octx);
+ down_write(&octx->map_changing_lock);
atomic_inc(&octx->mmap_changing);
+ up_write(&octx->map_changing_lock);
fctx->orig = octx;
fctx->new = ctx;
list_add_tail(&fctx->list, fcs);
@@ -776,7 +740,9 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
if (ctx->features & UFFD_FEATURE_EVENT_REMAP) {
vm_ctx->ctx = ctx;
userfaultfd_ctx_get(ctx);
+ down_write(&ctx->map_changing_lock);
atomic_inc(&ctx->mmap_changing);
+ up_write(&ctx->map_changing_lock);
} else {
/* Drop uffd context if remap feature not enabled */
vma_start_write(vma);
@@ -822,7 +788,9 @@ bool userfaultfd_remove(struct vm_area_struct *vma,
return true;
userfaultfd_ctx_get(ctx);
+ down_write(&ctx->map_changing_lock);
atomic_inc(&ctx->mmap_changing);
+ up_write(&ctx->map_changing_lock);
mmap_read_unlock(mm);
msg_init(&ewq.msg);
@@ -864,7 +832,9 @@ int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long start,
return -ENOMEM;
userfaultfd_ctx_get(ctx);
+ down_write(&ctx->map_changing_lock);
atomic_inc(&ctx->mmap_changing);
+ up_write(&ctx->map_changing_lock);
unmap_ctx->ctx = ctx;
unmap_ctx->start = start;
unmap_ctx->end = end;
@@ -1748,9 +1718,8 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
if (uffdio_copy.mode & UFFDIO_COPY_MODE_WP)
flags |= MFILL_ATOMIC_WP;
if (mmget_not_zero(ctx->mm)) {
- ret = mfill_atomic_copy(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
- uffdio_copy.len, &ctx->mmap_changing,
- flags);
+ ret = mfill_atomic_copy(ctx, uffdio_copy.dst, uffdio_copy.src,
+ uffdio_copy.len, flags);
mmput(ctx->mm);
} else {
return -ESRCH;
@@ -1800,9 +1769,8 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
goto out;
if (mmget_not_zero(ctx->mm)) {
- ret = mfill_atomic_zeropage(ctx->mm, uffdio_zeropage.range.start,
- uffdio_zeropage.range.len,
- &ctx->mmap_changing);
+ ret = mfill_atomic_zeropage(ctx, uffdio_zeropage.range.start,
+ uffdio_zeropage.range.len);
mmput(ctx->mm);
} else {
return -ESRCH;
@@ -1857,9 +1825,8 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
return -EINVAL;
if (mmget_not_zero(ctx->mm)) {
- ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start,
- uffdio_wp.range.len, mode_wp,
- &ctx->mmap_changing);
+ ret = mwriteprotect_range(ctx, uffdio_wp.range.start,
+ uffdio_wp.range.len, mode_wp);
mmput(ctx->mm);
} else {
return -ESRCH;
@@ -1909,9 +1876,8 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
flags |= MFILL_ATOMIC_WP;
if (mmget_not_zero(ctx->mm)) {
- ret = mfill_atomic_continue(ctx->mm, uffdio_continue.range.start,
- uffdio_continue.range.len,
- &ctx->mmap_changing, flags);
+ ret = mfill_atomic_continue(ctx, uffdio_continue.range.start,
+ uffdio_continue.range.len, flags);
mmput(ctx->mm);
} else {
return -ESRCH;
@@ -1964,9 +1930,8 @@ static inline int userfaultfd_poison(struct userfaultfd_ctx *ctx, unsigned long
goto out;
if (mmget_not_zero(ctx->mm)) {
- ret = mfill_atomic_poison(ctx->mm, uffdio_poison.range.start,
- uffdio_poison.range.len,
- &ctx->mmap_changing, 0);
+ ret = mfill_atomic_poison(ctx, uffdio_poison.range.start,
+ uffdio_poison.range.len, 0);
mmput(ctx->mm);
} else {
return -ESRCH;
@@ -2040,16 +2005,8 @@ static int userfaultfd_move(struct userfaultfd_ctx *ctx,
return -EINVAL;
if (mmget_not_zero(mm)) {
- mmap_read_lock(mm);
-
- /* Re-check after taking mmap_lock */
- if (likely(!atomic_read(&ctx->mmap_changing)))
- ret = move_pages(ctx, mm, uffdio_move.dst, uffdio_move.src,
- uffdio_move.len, uffdio_move.mode);
- else
- ret = -EINVAL;
-
- mmap_read_unlock(mm);
+ ret = move_pages(ctx, uffdio_move.dst, uffdio_move.src,
+ uffdio_move.len, uffdio_move.mode);
mmput(mm);
} else {
return -ESRCH;
@@ -2255,6 +2212,7 @@ static int new_userfaultfd(int flags)
ctx->flags = flags;
ctx->features = 0;
ctx->released = false;
+ init_rwsem(&ctx->map_changing_lock);
atomic_set(&ctx->mmap_changing, 0);
ctx->mm = current->mm;
/* prevent the mm struct to be freed */