diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-04 20:19:16 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-04 20:19:16 -0700 |
commit | b2a88c212e652e94f1e4b635910972ac57ba4e97 (patch) | |
tree | f575188c9788c091d896218946261f75be7b8eb8 /fs/xfs/xfs_buf.c | |
parent | 9daee913dc8d15eb65e0ff560803ab1c28bb480b (diff) | |
parent | 5e9466a5d0604e20082d828008047b3165592caf (diff) |
Merge tag 'xfs-5.20-merge-6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Darrick Wong:
"The biggest changes for this release are the log scalability
improvements, lockless lookups for the buffer cache, and making the
attr fork a permanent part of the incore inode in preparation for
directory parent pointers.
There's also a bunch of bug fixes that have accumulated since -rc5. I
might send you a second pull request with some more bug fixes that I'm
still working on.
Once the merge window ends, I will hand maintainership back to Dave
Chinner until the 6.1-rc1 release so that I can conduct the design
review for the online fsck feature, and try to get it merged.
Summary:
- Improve scalability of the XFS log by removing spinlocks and global
synchronization points.
- Add security labels to whiteout inodes to match the other
filesystems.
- Clean up per-ag pointer passing to simplify call sites.
- Reduce verifier overhead by precalculating more AG geometry.
- Implement fast-path lockless lookups in the buffer cache to reduce
spinlock hammering.
- Make attr forks a permanent part of the inode structure to fix a
UAF bug and because most files these days tend to have security
labels and soon will have parent pointers too.
- Clean up XFS_IFORK_Q usage and give it a better name.
- Fix more UAF bugs in the xattr code.
- SOB my tags.
- Fix some typos in the timestamp range documentation.
- Fix a few more memory leaks.
- Code cleanups and typo fixes.
- Fix an unlocked inode fork pointer access in getbmap"
* tag 'xfs-5.20-merge-6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (61 commits)
xfs: delete extra space and tab in blank line
xfs: fix NULL pointer dereference in xfs_getbmap()
xfs: Fix typo 'the the' in comment
xfs: Fix comment typo
xfs: don't leak memory when attr fork loading fails
xfs: fix for variable set but not used warning
xfs: xfs_buf cache destroy isn't RCU safe
xfs: delete unnecessary NULL checks
xfs: fix comment for start time value of inode with bigtime enabled
xfs: fix use-after-free in xattr node block inactivation
xfs: lockless buffer lookup
xfs: remove a superflous hash lookup when inserting new buffers
xfs: reduce the number of atomic when locking a buffer after lookup
xfs: merge xfs_buf_find() and xfs_buf_get_map()
xfs: break up xfs_buf_find() into individual pieces
xfs: add in-memory iunlink log item
xfs: add log item precommit operation
xfs: combine iunlink inode update functions
xfs: clean up xfs_iunlink_update_inode()
xfs: double link the unlinked inode list
...
Diffstat (limited to 'fs/xfs/xfs_buf.c')
-rw-r--r-- | fs/xfs/xfs_buf.c | 288 |
1 files changed, 149 insertions, 139 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 5e8f40d8c052..584afe076923 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -21,7 +21,7 @@ #include "xfs_error.h" #include "xfs_ag.h" -static struct kmem_cache *xfs_buf_cache; +struct kmem_cache *xfs_buf_cache; /* * Locking orders @@ -295,6 +295,16 @@ xfs_buf_free_pages( } static void +xfs_buf_free_callback( + struct callback_head *cb) +{ + struct xfs_buf *bp = container_of(cb, struct xfs_buf, b_rcu); + + xfs_buf_free_maps(bp); + kmem_cache_free(xfs_buf_cache, bp); +} + +static void xfs_buf_free( struct xfs_buf *bp) { @@ -307,8 +317,7 @@ xfs_buf_free( else if (bp->b_flags & _XBF_KMEM) kmem_free(bp->b_addr); - xfs_buf_free_maps(bp); - kmem_cache_free(xfs_buf_cache, bp); + call_rcu(&bp->b_rcu, xfs_buf_free_callback); } static int @@ -503,100 +512,45 @@ xfs_buf_hash_destroy( rhashtable_destroy(&pag->pag_buf_hash); } -/* - * Look up a buffer in the buffer cache and return it referenced and locked - * in @found_bp. - * - * If @new_bp is supplied and we have a lookup miss, insert @new_bp into the - * cache. - * - * If XBF_TRYLOCK is set in @flags, only try to lock the buffer and return - * -EAGAIN if we fail to lock it. - * - * Return values are: - * -EFSCORRUPTED if have been supplied with an invalid address - * -EAGAIN on trylock failure - * -ENOENT if we fail to find a match and @new_bp was NULL - * 0, with @found_bp: - * - @new_bp if we inserted it into the cache - * - the buffer we found and locked. - */ static int -xfs_buf_find( +xfs_buf_map_verify( struct xfs_buftarg *btp, - struct xfs_buf_map *map, - int nmaps, - xfs_buf_flags_t flags, - struct xfs_buf *new_bp, - struct xfs_buf **found_bp) + struct xfs_buf_map *map) { - struct xfs_perag *pag; - struct xfs_buf *bp; - struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn }; xfs_daddr_t eofs; - int i; - - *found_bp = NULL; - - for (i = 0; i < nmaps; i++) - cmap.bm_len += map[i].bm_len; /* Check for IOs smaller than the sector size / not sector aligned */ - ASSERT(!(BBTOB(cmap.bm_len) < btp->bt_meta_sectorsize)); - ASSERT(!(BBTOB(cmap.bm_bn) & (xfs_off_t)btp->bt_meta_sectormask)); + ASSERT(!(BBTOB(map->bm_len) < btp->bt_meta_sectorsize)); + ASSERT(!(BBTOB(map->bm_bn) & (xfs_off_t)btp->bt_meta_sectormask)); /* * Corrupted block numbers can get through to here, unfortunately, so we * have to check that the buffer falls within the filesystem bounds. */ eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks); - if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) { + if (map->bm_bn < 0 || map->bm_bn >= eofs) { xfs_alert(btp->bt_mount, "%s: daddr 0x%llx out of range, EOFS 0x%llx", - __func__, cmap.bm_bn, eofs); + __func__, map->bm_bn, eofs); WARN_ON(1); return -EFSCORRUPTED; } - - pag = xfs_perag_get(btp->bt_mount, - xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn)); - - spin_lock(&pag->pag_buf_lock); - bp = rhashtable_lookup_fast(&pag->pag_buf_hash, &cmap, - xfs_buf_hash_params); - if (bp) { - atomic_inc(&bp->b_hold); - goto found; - } - - /* No match found */ - if (!new_bp) { - XFS_STATS_INC(btp->bt_mount, xb_miss_locked); - spin_unlock(&pag->pag_buf_lock); - xfs_perag_put(pag); - return -ENOENT; - } - - /* the buffer keeps the perag reference until it is freed */ - new_bp->b_pag = pag; - rhashtable_insert_fast(&pag->pag_buf_hash, &new_bp->b_rhash_head, - xfs_buf_hash_params); - spin_unlock(&pag->pag_buf_lock); - *found_bp = new_bp; return 0; +} -found: - spin_unlock(&pag->pag_buf_lock); - xfs_perag_put(pag); - - if (!xfs_buf_trylock(bp)) { - if (flags & XBF_TRYLOCK) { - xfs_buf_rele(bp); - XFS_STATS_INC(btp->bt_mount, xb_busy_locked); +static int +xfs_buf_find_lock( + struct xfs_buf *bp, + xfs_buf_flags_t flags) +{ + if (flags & XBF_TRYLOCK) { + if (!xfs_buf_trylock(bp)) { + XFS_STATS_INC(bp->b_mount, xb_busy_locked); return -EAGAIN; } + } else { xfs_buf_lock(bp); - XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited); + XFS_STATS_INC(bp->b_mount, xb_get_locked_waited); } /* @@ -609,57 +563,59 @@ found: bp->b_flags &= _XBF_KMEM | _XBF_PAGES; bp->b_ops = NULL; } - - trace_xfs_buf_find(bp, flags, _RET_IP_); - XFS_STATS_INC(btp->bt_mount, xb_get_locked); - *found_bp = bp; return 0; } -struct xfs_buf * -xfs_buf_incore( - struct xfs_buftarg *target, - xfs_daddr_t blkno, - size_t numblks, - xfs_buf_flags_t flags) +static inline int +xfs_buf_lookup( + struct xfs_perag *pag, + struct xfs_buf_map *map, + xfs_buf_flags_t flags, + struct xfs_buf **bpp) { - struct xfs_buf *bp; + struct xfs_buf *bp; int error; - DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - error = xfs_buf_find(target, &map, 1, flags, NULL, &bp); - if (error) - return NULL; - return bp; + rcu_read_lock(); + bp = rhashtable_lookup(&pag->pag_buf_hash, map, xfs_buf_hash_params); + if (!bp || !atomic_inc_not_zero(&bp->b_hold)) { + rcu_read_unlock(); + return -ENOENT; + } + rcu_read_unlock(); + + error = xfs_buf_find_lock(bp, flags); + if (error) { + xfs_buf_rele(bp); + return error; + } + + trace_xfs_buf_find(bp, flags, _RET_IP_); + *bpp = bp; + return 0; } /* - * Assembles a buffer covering the specified range. The code is optimised for - * cache hits, as metadata intensive workloads will see 3 orders of magnitude - * more hits than misses. + * Insert the new_bp into the hash table. This consumes the perag reference + * taken for the lookup regardless of the result of the insert. */ -int -xfs_buf_get_map( - struct xfs_buftarg *target, +static int +xfs_buf_find_insert( + struct xfs_buftarg *btp, + struct xfs_perag *pag, + struct xfs_buf_map *cmap, struct xfs_buf_map *map, int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp) { - struct xfs_buf *bp; struct xfs_buf *new_bp; + struct xfs_buf *bp; int error; - *bpp = NULL; - error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp); - if (!error) - goto found; - if (error != -ENOENT) - return error; - - error = _xfs_buf_alloc(target, map, nmaps, flags, &new_bp); + error = _xfs_buf_alloc(btp, map, nmaps, flags, &new_bp); if (error) - return error; + goto out_drop_pag; /* * For buffers that fit entirely within a single page, first attempt to @@ -674,18 +630,94 @@ xfs_buf_get_map( goto out_free_buf; } - error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp); - if (error) + spin_lock(&pag->pag_buf_lock); + bp = rhashtable_lookup_get_insert_fast(&pag->pag_buf_hash, + &new_bp->b_rhash_head, xfs_buf_hash_params); + if (IS_ERR(bp)) { + error = PTR_ERR(bp); + spin_unlock(&pag->pag_buf_lock); goto out_free_buf; + } + if (bp) { + /* found an existing buffer */ + atomic_inc(&bp->b_hold); + spin_unlock(&pag->pag_buf_lock); + error = xfs_buf_find_lock(bp, flags); + if (error) + xfs_buf_rele(bp); + else + *bpp = bp; + goto out_free_buf; + } + + /* The new buffer keeps the perag reference until it is freed. */ + new_bp->b_pag = pag; + spin_unlock(&pag->pag_buf_lock); + *bpp = new_bp; + return 0; - if (bp != new_bp) - xfs_buf_free(new_bp); +out_free_buf: + xfs_buf_free(new_bp); +out_drop_pag: + xfs_perag_put(pag); + return error; +} -found: +/* + * Assembles a buffer covering the specified range. The code is optimised for + * cache hits, as metadata intensive workloads will see 3 orders of magnitude + * more hits than misses. + */ +int +xfs_buf_get_map( + struct xfs_buftarg *btp, + struct xfs_buf_map *map, + int nmaps, + xfs_buf_flags_t flags, + struct xfs_buf **bpp) +{ + struct xfs_perag *pag; + struct xfs_buf *bp = NULL; + struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn }; + int error; + int i; + + for (i = 0; i < nmaps; i++) + cmap.bm_len += map[i].bm_len; + + error = xfs_buf_map_verify(btp, &cmap); + if (error) + return error; + + pag = xfs_perag_get(btp->bt_mount, + xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn)); + + error = xfs_buf_lookup(pag, &cmap, flags, &bp); + if (error && error != -ENOENT) + goto out_put_perag; + + /* cache hits always outnumber misses by at least 10:1 */ + if (unlikely(!bp)) { + XFS_STATS_INC(btp->bt_mount, xb_miss_locked); + + if (flags & XBF_INCORE) + goto out_put_perag; + + /* xfs_buf_find_insert() consumes the perag reference. */ + error = xfs_buf_find_insert(btp, pag, &cmap, map, nmaps, + flags, &bp); + if (error) + return error; + } else { + XFS_STATS_INC(btp->bt_mount, xb_get_locked); + xfs_perag_put(pag); + } + + /* We do not hold a perag reference anymore. */ if (!bp->b_addr) { error = _xfs_buf_map_pages(bp, flags); if (unlikely(error)) { - xfs_warn_ratelimited(target->bt_mount, + xfs_warn_ratelimited(btp->bt_mount, "%s: failed to map %u pages", __func__, bp->b_page_count); xfs_buf_relse(bp); @@ -700,12 +732,13 @@ found: if (!(flags & XBF_READ)) xfs_buf_ioerror(bp, 0); - XFS_STATS_INC(target->bt_mount, xb_get); + XFS_STATS_INC(btp->bt_mount, xb_get); trace_xfs_buf_get(bp, flags, _RET_IP_); *bpp = bp; return 0; -out_free_buf: - xfs_buf_free(new_bp); + +out_put_perag: + xfs_perag_put(pag); return error; } @@ -2275,29 +2308,6 @@ xfs_buf_delwri_pushbuf( return error; } -int __init -xfs_buf_init(void) -{ - xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0, - SLAB_HWCACHE_ALIGN | - SLAB_RECLAIM_ACCOUNT | - SLAB_MEM_SPREAD, - NULL); - if (!xfs_buf_cache) - goto out; - - return 0; - - out: - return -ENOMEM; -} - -void -xfs_buf_terminate(void) -{ - kmem_cache_destroy(xfs_buf_cache); -} - void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) { /* |