diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-26 12:47:20 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-26 12:47:20 -0700 |
commit | a0433f8cae3ac51f59b4b1863032822aaa2d8164 (patch) | |
tree | 9eb7b096aa9f7fa53921e6ff247488f3a55471f5 /mm | |
parent | 0aa69d53ac7c30f6184f88f2e310d808b32b35a5 (diff) | |
parent | fcaa174a9c995cf0af3967e55644a1543ea07e36 (diff) |
Merge tag 'for-6.5/block-2023-06-23' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- NVMe pull request via Keith:
- Various cleanups all around (Irvin, Chaitanya, Christophe)
- Better struct packing (Christophe JAILLET)
- Reduce controller error logs for optional commands (Keith)
- Support for >=64KiB block sizes (Daniel Gomez)
- Fabrics fixes and code organization (Max, Chaitanya, Daniel
Wagner)
- bcache updates via Coly:
- Fix a race at init time (Mingzhe Zou)
- Misc fixes and cleanups (Andrea, Thomas, Zheng, Ye)
- use page pinning in the block layer for dio (David)
- convert old block dio code to page pinning (David, Christoph)
- cleanups for pktcdvd (Andy)
- cleanups for rnbd (Guoqing)
- use the unchecked __bio_add_page() for the initial single page
additions (Johannes)
- fix overflows in the Amiga partition handling code (Michael)
- improve mq-deadline zoned device support (Bart)
- keep passthrough requests out of the IO schedulers (Christoph, Ming)
- improve support for flush requests, making them less special to deal
with (Christoph)
- add bdev holder ops and shutdown methods (Christoph)
- fix the name_to_dev_t() situation and use cases (Christoph)
- decouple the block open flags from fmode_t (Christoph)
- ublk updates and cleanups, including adding user copy support (Ming)
- BFQ sanity checking (Bart)
- convert brd from radix to xarray (Pankaj)
- constify various structures (Thomas, Ivan)
- more fine grained persistent reservation ioctl capability checks
(Jingbo)
- misc fixes and cleanups (Arnd, Azeem, Demi, Ed, Hengqi, Hou, Jan,
Jordy, Li, Min, Yu, Zhong, Waiman)
* tag 'for-6.5/block-2023-06-23' of git://git.kernel.dk/linux: (266 commits)
scsi/sg: don't grab scsi host module reference
ext4: Fix warning in blkdev_put()
block: don't return -EINVAL for not found names in devt_from_devname
cdrom: Fix spectre-v1 gadget
block: Improve kernel-doc headers
blk-mq: don't insert passthrough request into sw queue
bsg: make bsg_class a static const structure
ublk: make ublk_chr_class a static const structure
aoe: make aoe_class a static const structure
block/rnbd: make all 'class' structures const
block: fix the exclusive open mask in disk_scan_partitions
block: add overflow checks for Amiga partition support
block: change all __u32 annotations to __be32 in affs_hardblocks.h
block: fix signed int overflow in Amiga partition support
block: add capacity validation in bdev_add_partition()
block: fine-granular CAP_SYS_ADMIN for Persistent Reservation
block: disallow Persistent Reservation on partitions
reiserfs: fix blkdev_put() warning from release_journal_dev()
block: fix wrong mode for blkdev_get_by_dev() from disk_scan_partitions()
block: document the holder argument to blkdev_get_by_path
...
Diffstat (limited to 'mm')
-rw-r--r-- | mm/gup.c | 58 | ||||
-rw-r--r-- | mm/page_io.c | 8 | ||||
-rw-r--r-- | mm/swapfile.c | 6 |
3 files changed, 64 insertions, 8 deletions
@@ -51,7 +51,8 @@ static inline void sanity_check_pinned_pages(struct page **pages, struct page *page = *pages; struct folio *folio = page_folio(page); - if (!folio_test_anon(folio)) + if (is_zero_page(page) || + !folio_test_anon(folio)) continue; if (!folio_test_large(folio) || folio_test_hugetlb(folio)) VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page), page); @@ -132,6 +133,13 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) struct folio *folio; /* + * Don't take a pin on the zero page - it's not going anywhere + * and it is used in a *lot* of places. + */ + if (is_zero_page(page)) + return page_folio(page); + + /* * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a * right zone, so fail and let the caller fall back to the slow * path. @@ -180,6 +188,8 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) { if (flags & FOLL_PIN) { + if (is_zero_folio(folio)) + return; node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs); if (folio_test_large(folio)) atomic_sub(refs, &folio->_pincount); @@ -225,6 +235,13 @@ int __must_check try_grab_page(struct page *page, unsigned int flags) folio_ref_inc(folio); else if (flags & FOLL_PIN) { /* + * Don't take a pin on the zero page - it's not going anywhere + * and it is used in a *lot* of places. + */ + if (is_zero_page(page)) + return 0; + + /* * Similar to try_grab_folio(): be sure to *also* * increment the normal page refcount field at least once, * so that the page really is pinned. @@ -258,6 +275,33 @@ void unpin_user_page(struct page *page) } EXPORT_SYMBOL(unpin_user_page); +/** + * folio_add_pin - Try to get an additional pin on a pinned folio + * @folio: The folio to be pinned + * + * Get an additional pin on a folio we already have a pin on. Makes no change + * if the folio is a zero_page. + */ +void folio_add_pin(struct folio *folio) +{ + if (is_zero_folio(folio)) + return; + + /* + * Similar to try_grab_folio(): be sure to *also* increment the normal + * page refcount field at least once, so that the page really is + * pinned. + */ + if (folio_test_large(folio)) { + WARN_ON_ONCE(atomic_read(&folio->_pincount) < 1); + folio_ref_inc(folio); + atomic_inc(&folio->_pincount); + } else { + WARN_ON_ONCE(folio_ref_count(folio) < GUP_PIN_COUNTING_BIAS); + folio_ref_add(folio, GUP_PIN_COUNTING_BIAS); + } +} + static inline struct folio *gup_folio_range_next(struct page *start, unsigned long npages, unsigned long i, unsigned int *ntails) { @@ -3079,6 +3123,9 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast); * * FOLL_PIN means that the pages must be released via unpin_user_page(). Please * see Documentation/core-api/pin_user_pages.rst for further details. + * + * Note that if a zero_page is amongst the returned pages, it will not have + * pins in it and unpin_user_page() will not remove pins from it. */ int pin_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages) @@ -3110,6 +3157,9 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast); * * FOLL_PIN means that the pages must be released via unpin_user_page(). Please * see Documentation/core-api/pin_user_pages.rst for details. + * + * Note that if a zero_page is amongst the returned pages, it will not have + * pins in it and unpin_user_page*() will not remove pins from it. */ long pin_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, @@ -3143,6 +3193,9 @@ EXPORT_SYMBOL(pin_user_pages_remote); * * FOLL_PIN means that the pages must be released via unpin_user_page(). Please * see Documentation/core-api/pin_user_pages.rst for details. + * + * Note that if a zero_page is amongst the returned pages, it will not have + * pins in it and unpin_user_page*() will not remove pins from it. */ long pin_user_pages(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, @@ -3161,6 +3214,9 @@ EXPORT_SYMBOL(pin_user_pages); * pin_user_pages_unlocked() is the FOLL_PIN variant of * get_user_pages_unlocked(). Behavior is the same, except that this one sets * FOLL_PIN and rejects FOLL_GET. + * + * Note that if a zero_page is amongst the returned pages, it will not have + * pins in it and unpin_user_page*() will not remove pins from it. */ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags) diff --git a/mm/page_io.c b/mm/page_io.c index 87b682d18850..684cd3c7b59b 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -338,7 +338,7 @@ static void swap_writepage_bdev_sync(struct page *page, bio_init(&bio, sis->bdev, &bv, 1, REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc)); bio.bi_iter.bi_sector = swap_page_sector(page); - bio_add_page(&bio, page, thp_size(page), 0); + __bio_add_page(&bio, page, thp_size(page), 0); bio_associate_blkg_from_page(&bio, page); count_swpout_vm_event(page); @@ -360,7 +360,7 @@ static void swap_writepage_bdev_async(struct page *page, GFP_NOIO); bio->bi_iter.bi_sector = swap_page_sector(page); bio->bi_end_io = end_swap_bio_write; - bio_add_page(bio, page, thp_size(page), 0); + __bio_add_page(bio, page, thp_size(page), 0); bio_associate_blkg_from_page(bio, page); count_swpout_vm_event(page); @@ -468,7 +468,7 @@ static void swap_readpage_bdev_sync(struct page *page, bio_init(&bio, sis->bdev, &bv, 1, REQ_OP_READ); bio.bi_iter.bi_sector = swap_page_sector(page); - bio_add_page(&bio, page, thp_size(page), 0); + __bio_add_page(&bio, page, thp_size(page), 0); /* * Keep this task valid during swap readpage because the oom killer may * attempt to access it in the page fault retry time check. @@ -488,7 +488,7 @@ static void swap_readpage_bdev_async(struct page *page, bio = bio_alloc(sis->bdev, 1, REQ_OP_READ, GFP_KERNEL); bio->bi_iter.bi_sector = swap_page_sector(page); bio->bi_end_io = end_swap_bio_read; - bio_add_page(bio, page, thp_size(page), 0); + __bio_add_page(bio, page, thp_size(page), 0); count_vm_event(PSWPIN); submit_bio(bio); } diff --git a/mm/swapfile.c b/mm/swapfile.c index 274bbf797480..6bc83060df9a 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2539,7 +2539,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) struct block_device *bdev = I_BDEV(inode); set_blocksize(bdev, old_block_size); - blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(bdev, p); } inode_lock(inode); @@ -2770,7 +2770,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) if (S_ISBLK(inode->i_mode)) { p->bdev = blkdev_get_by_dev(inode->i_rdev, - FMODE_READ | FMODE_WRITE | FMODE_EXCL, p); + BLK_OPEN_READ | BLK_OPEN_WRITE, p, NULL); if (IS_ERR(p->bdev)) { error = PTR_ERR(p->bdev); p->bdev = NULL; @@ -3221,7 +3221,7 @@ bad_swap: p->cluster_next_cpu = NULL; if (inode && S_ISBLK(inode->i_mode) && p->bdev) { set_blocksize(p->bdev, p->old_block_size); - blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + blkdev_put(p->bdev, p); } inode = NULL; destroy_swap_extents(p); |