From d40c2865bdbbbba6418436b0a877daebe1d7c63e Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 28 May 2024 12:12:39 +0800 Subject: xfs: avoid redundant AGFL buffer invalidation Currently AGFL blocks can be filled from the following three sources: - allocbt free blocks, as in xfs_allocbt_free_block(); - rmapbt free blocks, as in xfs_rmapbt_free_block(); - refilled from freespace btrees, as in xfs_alloc_fix_freelist(). Originally, allocbt free blocks would be marked as stale only when they put back in the general free space pool as Dave mentioned on IRC, "we don't stale AGF metadata btree blocks when they are returned to the AGFL .. but once they get put back in the general free space pool, we have to make sure the buffers are marked stale as the next user of those blocks might be user data...." However, after commit ca250b1b3d71 ("xfs: invalidate allocbt blocks moved to the free list") and commit edfd9dd54921 ("xfs: move buffer invalidation to xfs_btree_free_block"), even allocbt / bmapbt free blocks will be invalidated immediately since they may fail to pass V5 format validation on writeback even writeback to free space would be safe. IOWs, IMHO currently there is actually no difference of free blocks between AGFL freespace pool and the general free space pool. So let's avoid extra redundant AGFL buffer invalidation, since otherwise we're currently facing unnecessary xfs_log_force() due to xfs_trans_binval() again on buffers already marked as stale before as below: [ 333.507469] Call Trace: [ 333.507862] xfs_buf_find+0x371/0x6a0 <- xfs_buf_lock [ 333.508451] xfs_buf_get_map+0x3f/0x230 [ 333.509062] xfs_trans_get_buf_map+0x11a/0x280 [ 333.509751] xfs_free_agfl_block+0xa1/0xd0 [ 333.510403] xfs_agfl_free_finish_item+0x16e/0x1d0 [ 333.511157] xfs_defer_finish_noroll+0x1ef/0x5c0 [ 333.511871] xfs_defer_finish+0xc/0xa0 [ 333.512471] xfs_itruncate_extents_flags+0x18a/0x5e0 [ 333.513253] xfs_inactive_truncate+0xb8/0x130 [ 333.513930] xfs_inactive+0x223/0x270 xfs_log_force() will take tens of milliseconds with AGF buffer locked. It becomes an unnecessary long latency especially on our PMEM devices with FSDAX enabled and fsops like xfs_reflink_find_shared() at the same time are stuck due to the same AGF lock. Removing the double invalidation on the AGFL blocks does not make this issue go away, but this patch fixes for our workloads in reality and it should also work by the code analysis. Note that I'm not sure I need to remove another redundant one in xfs_alloc_ag_vextent_small() since it's unrelated to our workloads. Also fstests are passed with this patch. Signed-off-by: Gao Xiang Reviewed-by: Dave Chinner Signed-off-by: Chandan Babu R --- fs/xfs/libxfs/xfs_alloc.c | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) (limited to 'fs/xfs/libxfs/xfs_alloc.c') diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 6c55a6e88eba..63315ddc46c6 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -1932,7 +1932,7 @@ out_nominleft: /* * Free the extent starting at agno/bno for length. */ -STATIC int +int xfs_free_ag_extent( struct xfs_trans *tp, struct xfs_buf *agbp, @@ -2422,32 +2422,6 @@ xfs_alloc_space_available( return true; } -int -xfs_free_agfl_block( - struct xfs_trans *tp, - xfs_agnumber_t agno, - xfs_agblock_t agbno, - struct xfs_buf *agbp, - struct xfs_owner_info *oinfo) -{ - int error; - struct xfs_buf *bp; - - error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo, - XFS_AG_RESV_AGFL); - if (error) - return error; - - error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp, - XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno), - tp->t_mountp->m_bsize, 0, &bp); - if (error) - return error; - xfs_trans_binval(tp, bp); - - return 0; -} - /* * Check the agfl fields of the agf for inconsistency or corruption. * -- cgit v1.2.3-70-g09d2 From 4e0e2c0fe35b44cd4db6a138ed4316178ed60b5c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 2 Jul 2024 11:22:50 -0700 Subject: xfs: clean up extent free log intent item tracepoint callsites Pass the incore EFI structure to the tracepoints instead of open-coding the argument passing. This cleans up the call sites a bit. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_alloc.c | 7 +++---- fs/xfs/xfs_extfree_item.c | 6 ++---- fs/xfs/xfs_trace.h | 33 +++++++++++++++------------------ 3 files changed, 20 insertions(+), 26 deletions(-) (limited to 'fs/xfs/libxfs/xfs_alloc.c') diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 63315ddc46c6..4d4fc37d738c 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2544,7 +2544,7 @@ xfs_defer_agfl_block( xefi->xefi_owner = oinfo->oi_owner; xefi->xefi_agresv = XFS_AG_RESV_AGFL; - trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); + trace_xfs_agfl_free_defer(mp, xefi); xfs_extent_free_get_group(mp, xefi); xfs_defer_add(tp, &xefi->xefi_list, &xfs_agfl_free_defer_type); @@ -2606,9 +2606,8 @@ xfs_defer_extent_free( } else { xefi->xefi_owner = XFS_RMAP_OWN_NULL; } - trace_xfs_bmap_free_defer(mp, - XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0, - XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); + + trace_xfs_extent_free_defer(mp, xefi); xfs_extent_free_get_group(mp, xefi); *dfpp = xfs_defer_add(tp, &xefi->xefi_list, &xfs_extent_free_defer_type); diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 01ebbd7691a5..5a76af9d8560 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -464,8 +464,7 @@ xfs_extent_free_finish_item( if (xefi->xefi_flags & XFS_EFI_BMBT_BLOCK) oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK; - trace_xfs_bmap_free_deferred(tp->t_mountp, xefi->xefi_pag->pag_agno, 0, - agbno, xefi->xefi_blockcount); + trace_xfs_extent_free_deferred(mp, xefi); /* * If we need a new transaction to make progress, the caller will log a @@ -542,8 +541,7 @@ xfs_agfl_free_finish_item( agbno = XFS_FSB_TO_AGBNO(mp, xefi->xefi_startblock); oinfo.oi_owner = xefi->xefi_owner; - trace_xfs_agfl_free_deferred(mp, xefi->xefi_pag->pag_agno, 0, agbno, - xefi->xefi_blockcount); + trace_xfs_agfl_free_deferred(mp, xefi); error = xfs_alloc_read_agf(xefi->xefi_pag, tp, 0, &agbp); if (!error) diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ba839ce6a9cf..b2ea9d5141a7 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -90,6 +90,7 @@ struct xfs_exchrange; struct xfs_getparents; struct xfs_parent_irec; struct xfs_attrlist_cursor_kern; +struct xfs_extent_free_item; #define XFS_ATTR_FILTER_FLAGS \ { XFS_ATTR_ROOT, "ROOT" }, \ @@ -2710,41 +2711,37 @@ DEFINE_DEFER_PENDING_EVENT(xfs_defer_item_pause); DEFINE_DEFER_PENDING_EVENT(xfs_defer_item_unpause); DECLARE_EVENT_CLASS(xfs_free_extent_deferred_class, - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, - int type, xfs_agblock_t agbno, xfs_extlen_t len), - TP_ARGS(mp, agno, type, agbno, len), + TP_PROTO(struct xfs_mount *mp, struct xfs_extent_free_item *free), + TP_ARGS(mp, free), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) - __field(int, type) __field(xfs_agblock_t, agbno) __field(xfs_extlen_t, len) + __field(unsigned int, flags) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; - __entry->agno = agno; - __entry->type = type; - __entry->agbno = agbno; - __entry->len = len; + __entry->agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock); + __entry->agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock); + __entry->len = free->xefi_blockcount; + __entry->flags = free->xefi_flags; ), - TP_printk("dev %d:%d op %d agno 0x%x agbno 0x%x fsbcount 0x%x", + TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x flags 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->type, __entry->agno, __entry->agbno, - __entry->len) + __entry->len, + __entry->flags) ); #define DEFINE_FREE_EXTENT_DEFERRED_EVENT(name) \ DEFINE_EVENT(xfs_free_extent_deferred_class, name, \ - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ - int type, \ - xfs_agblock_t bno, \ - xfs_extlen_t len), \ - TP_ARGS(mp, agno, type, bno, len)) -DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_bmap_free_defer); -DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_bmap_free_deferred); + TP_PROTO(struct xfs_mount *mp, struct xfs_extent_free_item *free), \ + TP_ARGS(mp, free)) DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_agfl_free_defer); DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_agfl_free_deferred); +DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_defer); +DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_deferred); DECLARE_EVENT_CLASS(xfs_defer_pending_item_class, TP_PROTO(struct xfs_mount *mp, struct xfs_defer_pending *dfp, -- cgit v1.2.3-70-g09d2 From 980faece91a60c279e7c24cb1d1a378bbbb74bb9 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 2 Jul 2024 11:22:51 -0700 Subject: xfs: convert "skip_discard" to a proper flags bitset Convert the boolean to skip discard on free into a proper flags field so that we can add more flags in the next patch. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_ag.c | 2 +- fs/xfs/libxfs/xfs_alloc.c | 13 +++++++------ fs/xfs/libxfs/xfs_alloc.h | 9 +++++++-- fs/xfs/libxfs/xfs_bmap.c | 12 ++++++++---- fs/xfs/libxfs/xfs_bmap_btree.c | 2 +- fs/xfs/libxfs/xfs_ialloc.c | 5 ++--- fs/xfs/libxfs/xfs_ialloc_btree.c | 2 +- fs/xfs/libxfs/xfs_refcount.c | 6 +++--- fs/xfs/libxfs/xfs_refcount_btree.c | 2 +- fs/xfs/scrub/newbt.c | 5 +++-- fs/xfs/scrub/reap.c | 7 ++++--- fs/xfs/xfs_reflink.c | 2 +- 12 files changed, 39 insertions(+), 28 deletions(-) (limited to 'fs/xfs/libxfs/xfs_alloc.c') diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 240e079cb3fb..7e80732cb547 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -1008,7 +1008,7 @@ xfs_ag_shrink_space( goto resv_err; err2 = xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, - XFS_AG_RESV_NONE, true); + XFS_AG_RESV_NONE, XFS_FREE_EXTENT_SKIP_DISCARD); if (err2) goto resv_err; diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 4d4fc37d738c..089031151eed 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2562,7 +2562,7 @@ xfs_defer_extent_free( xfs_filblks_t len, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type, - bool skip_discard, + unsigned int free_flags, struct xfs_defer_pending **dfpp) { struct xfs_extent_free_item *xefi; @@ -2582,6 +2582,7 @@ xfs_defer_extent_free( ASSERT(len < mp->m_sb.sb_agblocks); ASSERT(agbno + len <= mp->m_sb.sb_agblocks); #endif + ASSERT(!(free_flags & ~XFS_FREE_EXTENT_ALL_FLAGS)); ASSERT(xfs_extfree_item_cache != NULL); ASSERT(type != XFS_AG_RESV_AGFL); @@ -2593,7 +2594,7 @@ xfs_defer_extent_free( xefi->xefi_startblock = bno; xefi->xefi_blockcount = (xfs_extlen_t)len; xefi->xefi_agresv = type; - if (skip_discard) + if (free_flags & XFS_FREE_EXTENT_SKIP_DISCARD) xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD; if (oinfo) { ASSERT(oinfo->oi_offset == 0); @@ -2621,11 +2622,11 @@ xfs_free_extent_later( xfs_filblks_t len, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type, - bool skip_discard) + unsigned int free_flags) { struct xfs_defer_pending *dontcare = NULL; - return xfs_defer_extent_free(tp, bno, len, oinfo, type, skip_discard, + return xfs_defer_extent_free(tp, bno, len, oinfo, type, free_flags, &dontcare); } @@ -2650,13 +2651,13 @@ xfs_free_extent_later( int xfs_alloc_schedule_autoreap( const struct xfs_alloc_arg *args, - bool skip_discard, + unsigned int free_flags, struct xfs_alloc_autoreap *aarp) { int error; error = xfs_defer_extent_free(args->tp, args->fsbno, args->len, - &args->oinfo, args->resv, skip_discard, &aarp->dfp); + &args->oinfo, args->resv, free_flags, &aarp->dfp); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 3dc8e44fea76..7f51b3cb0349 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -235,7 +235,12 @@ xfs_buf_to_agfl_bno( int xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, const struct xfs_owner_info *oinfo, - enum xfs_ag_resv_type type, bool skip_discard); + enum xfs_ag_resv_type type, unsigned int free_flags); + +/* Don't issue a discard for the blocks freed. */ +#define XFS_FREE_EXTENT_SKIP_DISCARD (1U << 0) + +#define XFS_FREE_EXTENT_ALL_FLAGS (XFS_FREE_EXTENT_SKIP_DISCARD) /* * List of extents to be free "later". @@ -264,7 +269,7 @@ struct xfs_alloc_autoreap { }; int xfs_alloc_schedule_autoreap(const struct xfs_alloc_arg *args, - bool skip_discard, struct xfs_alloc_autoreap *aarp); + unsigned int free_flags, struct xfs_alloc_autoreap *aarp); void xfs_alloc_cancel_autoreap(struct xfs_trans *tp, struct xfs_alloc_autoreap *aarp); void xfs_alloc_commit_autoreap(struct xfs_trans *tp, diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 09e3302a4b72..7df74c35d9f9 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -605,7 +605,7 @@ xfs_bmap_btree_to_extents( xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo, - XFS_AG_RESV_NONE, false); + XFS_AG_RESV_NONE, 0); if (error) return error; @@ -5381,11 +5381,15 @@ xfs_bmap_del_extent_real( error = xfs_rtfree_blocks(tp, del->br_startblock, del->br_blockcount); } else { + unsigned int efi_flags = 0; + + if ((bflags & XFS_BMAPI_NODISCARD) || + del->br_state == XFS_EXT_UNWRITTEN) + efi_flags |= XFS_FREE_EXTENT_SKIP_DISCARD; + error = xfs_free_extent_later(tp, del->br_startblock, del->br_blockcount, NULL, - XFS_AG_RESV_NONE, - ((bflags & XFS_BMAPI_NODISCARD) || - del->br_state == XFS_EXT_UNWRITTEN)); + XFS_AG_RESV_NONE, efi_flags); } if (error) return error; diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index f5d84dcb58da..d1b06ccde19e 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -282,7 +282,7 @@ xfs_bmbt_free_block( xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo, - XFS_AG_RESV_NONE, false); + XFS_AG_RESV_NONE, 0); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index f8d5ed7aedde..0af5b7a33d05 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1990,7 +1990,7 @@ xfs_difree_inode_chunk( return xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES, - XFS_AG_RESV_NONE, false); + XFS_AG_RESV_NONE, 0); } /* holemask is only 16-bits (fits in an unsigned long) */ @@ -2036,8 +2036,7 @@ xfs_difree_inode_chunk( ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); error = xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, agbno), contigblk, - &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE, - false); + &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE, 0); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 42e9fd47f6c7..496e2f72a85b 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -170,7 +170,7 @@ __xfs_inobt_free_block( xfs_inobt_mod_blockcount(cur, -1); fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)); return xfs_free_extent_later(cur->bc_tp, fsbno, 1, - &XFS_RMAP_OINFO_INOBT, resv, false); + &XFS_RMAP_OINFO_INOBT, resv, 0); } STATIC int diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 511c912d515c..4d8bb760c723 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1173,7 +1173,7 @@ xfs_refcount_adjust_extents( tmp.rc_startblock); error = xfs_free_extent_later(cur->bc_tp, fsbno, tmp.rc_blockcount, NULL, - XFS_AG_RESV_NONE, false); + XFS_AG_RESV_NONE, 0); if (error) goto out_error; } @@ -1237,7 +1237,7 @@ xfs_refcount_adjust_extents( ext.rc_startblock); error = xfs_free_extent_later(cur->bc_tp, fsbno, ext.rc_blockcount, NULL, - XFS_AG_RESV_NONE, false); + XFS_AG_RESV_NONE, 0); if (error) goto out_error; } @@ -2022,7 +2022,7 @@ xfs_refcount_recover_cow_leftovers( /* Free the block. */ error = xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL, - XFS_AG_RESV_NONE, false); + XFS_AG_RESV_NONE, 0); if (error) goto out_trans; diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index ca59f6c89f3e..cb3b1d42ae9a 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -109,7 +109,7 @@ xfs_refcountbt_free_block( be32_add_cpu(&agf->agf_refcount_blocks, -1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); return xfs_free_extent_later(cur->bc_tp, fsbno, 1, - &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA, false); + &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA, 0); } STATIC int diff --git a/fs/xfs/scrub/newbt.c b/fs/xfs/scrub/newbt.c index 4a0271123d94..2aa14b7ab630 100644 --- a/fs/xfs/scrub/newbt.c +++ b/fs/xfs/scrub/newbt.c @@ -160,7 +160,8 @@ xrep_newbt_add_blocks( if (args->tp) { ASSERT(xnr->oinfo.oi_offset == 0); - error = xfs_alloc_schedule_autoreap(args, true, &resv->autoreap); + error = xfs_alloc_schedule_autoreap(args, + XFS_FREE_EXTENT_SKIP_DISCARD, &resv->autoreap); if (error) goto out_pag; } @@ -414,7 +415,7 @@ xrep_newbt_free_extent( */ fsbno = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno, free_agbno); error = xfs_free_extent_later(sc->tp, fsbno, free_aglen, &xnr->oinfo, - xnr->resv, true); + xnr->resv, XFS_FREE_EXTENT_SKIP_DISCARD); if (error) return error; diff --git a/fs/xfs/scrub/reap.c b/fs/xfs/scrub/reap.c index be283153c254..53697f3c5e1b 100644 --- a/fs/xfs/scrub/reap.c +++ b/fs/xfs/scrub/reap.c @@ -451,7 +451,7 @@ xreap_agextent_iter( xfs_refcount_free_cow_extent(sc->tp, fsbno, *aglenp); error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, NULL, - rs->resv, true); + rs->resv, XFS_FREE_EXTENT_SKIP_DISCARD); if (error) return error; @@ -477,7 +477,7 @@ xreap_agextent_iter( * system with large EFIs. */ error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo, - rs->resv, true); + rs->resv, XFS_FREE_EXTENT_SKIP_DISCARD); if (error) return error; @@ -943,7 +943,8 @@ xrep_reap_bmapi_iter( xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT, -(int64_t)imap->br_blockcount); return xfs_free_extent_later(sc->tp, imap->br_startblock, - imap->br_blockcount, NULL, XFS_AG_RESV_NONE, true); + imap->br_blockcount, NULL, XFS_AG_RESV_NONE, + XFS_FREE_EXTENT_SKIP_DISCARD); } /* diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 265a2a418bc7..6fde6ec8092f 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -603,7 +603,7 @@ xfs_reflink_cancel_cow_blocks( error = xfs_free_extent_later(*tpp, del.br_startblock, del.br_blockcount, NULL, - XFS_AG_RESV_NONE, false); + XFS_AG_RESV_NONE, 0); if (error) break; -- cgit v1.2.3-70-g09d2 From 851a6781895a0f6e0ba75168dc7aecc132d13e6a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 2 Jul 2024 11:22:55 -0700 Subject: xfs: remove duplicate asserts in xfs_defer_extent_free The bno/len verification is already done by the calls to xfs_verify_rtbext / xfs_verify_fsbext, and reporting a corruption error seem like the better handling than tripping an assert anyway. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_alloc.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'fs/xfs/libxfs/xfs_alloc.c') diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 089031151eed..adae37eb3d88 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2567,23 +2567,10 @@ xfs_defer_extent_free( { struct xfs_extent_free_item *xefi; struct xfs_mount *mp = tp->t_mountp; -#ifdef DEBUG - xfs_agnumber_t agno; - xfs_agblock_t agbno; - ASSERT(bno != NULLFSBLOCK); - ASSERT(len > 0); ASSERT(len <= XFS_MAX_BMBT_EXTLEN); ASSERT(!isnullstartblock(bno)); - agno = XFS_FSB_TO_AGNO(mp, bno); - agbno = XFS_FSB_TO_AGBNO(mp, bno); - ASSERT(agno < mp->m_sb.sb_agcount); - ASSERT(agbno < mp->m_sb.sb_agblocks); - ASSERT(len < mp->m_sb.sb_agblocks); - ASSERT(agbno + len <= mp->m_sb.sb_agblocks); -#endif ASSERT(!(free_flags & ~XFS_FREE_EXTENT_ALL_FLAGS)); - ASSERT(xfs_extfree_item_cache != NULL); ASSERT(type != XFS_AG_RESV_AGFL); if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len))) -- cgit v1.2.3-70-g09d2 From 7272f77c67c0710918e5678266f8dad6e3bfc8d2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 2 Jul 2024 11:22:56 -0700 Subject: xfs: remove xfs_defer_agfl_block xfs_free_extent_later can handle the extra AGFL special casing with very little extra logic. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_alloc.c | 68 +++++++++++++++-------------------------------- 1 file changed, 22 insertions(+), 46 deletions(-) (limited to 'fs/xfs/libxfs/xfs_alloc.c') diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index adae37eb3d88..fecfd61f5de8 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2509,48 +2509,6 @@ xfs_agfl_reset( clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate); } -/* - * Defer an AGFL block free. This is effectively equivalent to - * xfs_free_extent_later() with some special handling particular to AGFL blocks. - * - * Deferring AGFL frees helps prevent log reservation overruns due to too many - * allocation operations in a transaction. AGFL frees are prone to this problem - * because for one they are always freed one at a time. Further, an immediate - * AGFL block free can cause a btree join and require another block free before - * the real allocation can proceed. Deferring the free disconnects freeing up - * the AGFL slot from freeing the block. - */ -static int -xfs_defer_agfl_block( - struct xfs_trans *tp, - xfs_agnumber_t agno, - xfs_agblock_t agbno, - struct xfs_owner_info *oinfo) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_extent_free_item *xefi; - xfs_fsblock_t fsbno = XFS_AGB_TO_FSB(mp, agno, agbno); - - ASSERT(xfs_extfree_item_cache != NULL); - ASSERT(oinfo != NULL); - - if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno))) - return -EFSCORRUPTED; - - xefi = kmem_cache_zalloc(xfs_extfree_item_cache, - GFP_KERNEL | __GFP_NOFAIL); - xefi->xefi_startblock = fsbno; - xefi->xefi_blockcount = 1; - xefi->xefi_owner = oinfo->oi_owner; - xefi->xefi_agresv = XFS_AG_RESV_AGFL; - - trace_xfs_agfl_free_defer(mp, xefi); - - xfs_extent_free_get_group(mp, xefi); - xfs_defer_add(tp, &xefi->xefi_list, &xfs_agfl_free_defer_type); - return 0; -} - /* * Add the extent to the list of extents to be free at transaction end. * The list is maintained sorted (by block number). @@ -2571,7 +2529,6 @@ xfs_defer_extent_free( ASSERT(len <= XFS_MAX_BMBT_EXTLEN); ASSERT(!isnullstartblock(bno)); ASSERT(!(free_flags & ~XFS_FREE_EXTENT_ALL_FLAGS)); - ASSERT(type != XFS_AG_RESV_AGFL); if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len))) return -EFSCORRUPTED; @@ -2598,7 +2555,13 @@ xfs_defer_extent_free( trace_xfs_extent_free_defer(mp, xefi); xfs_extent_free_get_group(mp, xefi); - *dfpp = xfs_defer_add(tp, &xefi->xefi_list, &xfs_extent_free_defer_type); + + if (xefi->xefi_agresv == XFS_AG_RESV_AGFL) + *dfpp = xfs_defer_add(tp, &xefi->xefi_list, + &xfs_agfl_free_defer_type); + else + *dfpp = xfs_defer_add(tp, &xefi->xefi_list, + &xfs_extent_free_defer_type); return 0; } @@ -2856,8 +2819,21 @@ xfs_alloc_fix_freelist( if (error) goto out_agbp_relse; - /* defer agfl frees */ - error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo); + /* + * Defer the AGFL block free. + * + * This helps to prevent log reservation overruns due to too + * many allocation operations in a transaction. AGFL frees are + * prone to this problem because for one they are always freed + * one at a time. Further, an immediate AGFL block free can + * cause a btree join and require another block free before the + * real allocation can proceed. + * Deferring the free disconnects freeing up the AGFL slot from + * freeing the block. + */ + error = xfs_free_extent_later(tp, + XFS_AGB_TO_FSB(mp, args->agno, bno), 1, + &targs.oinfo, XFS_AG_RESV_AGFL, 0); if (error) goto out_agbp_relse; } -- cgit v1.2.3-70-g09d2 From 84a3c1576c5aade32170fae6c61d51bd2d16010f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 2 Jul 2024 11:22:56 -0700 Subject: xfs: move xfs_extent_free_defer_add to xfs_extfree_item.c Move the code that adds the incore xfs_extent_free_item deferred work data to a transaction to live with the EFI log item code. This means that the allocator code no longer has to know about the inner workings of the EFI log items. As a consequence, we can get rid of the _{get,put}_group helpers. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_alloc.c | 12 ++---------- fs/xfs/libxfs/xfs_alloc.h | 3 --- fs/xfs/xfs_extfree_item.c | 31 +++++++++++++++++-------------- fs/xfs/xfs_extfree_item.h | 6 ++++++ 4 files changed, 25 insertions(+), 27 deletions(-) (limited to 'fs/xfs/libxfs/xfs_alloc.c') diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index fecfd61f5de8..ef4f5972da5d 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -27,6 +27,7 @@ #include "xfs_ag_resv.h" #include "xfs_bmap.h" #include "xfs_health.h" +#include "xfs_extfree_item.h" struct kmem_cache *xfs_extfree_item_cache; @@ -2552,16 +2553,7 @@ xfs_defer_extent_free( xefi->xefi_owner = XFS_RMAP_OWN_NULL; } - trace_xfs_extent_free_defer(mp, xefi); - - xfs_extent_free_get_group(mp, xefi); - - if (xefi->xefi_agresv == XFS_AG_RESV_AGFL) - *dfpp = xfs_defer_add(tp, &xefi->xefi_list, - &xfs_agfl_free_defer_type); - else - *dfpp = xfs_defer_add(tp, &xefi->xefi_list, - &xfs_extent_free_defer_type); + xfs_extent_free_defer_add(tp, xefi, dfpp); return 0; } diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 7f51b3cb0349..fae170825be0 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -256,9 +256,6 @@ struct xfs_extent_free_item { enum xfs_ag_resv_type xefi_agresv; }; -void xfs_extent_free_get_group(struct xfs_mount *mp, - struct xfs_extent_free_item *xefi); - #define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */ #define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */ #define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index c755037a64d2..abffc74a924f 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -436,21 +436,24 @@ xfs_extent_free_create_done( return &efdp->efd_item; } -/* Take a passive ref to the AG containing the space we're freeing. */ +/* Add this deferred EFI to the transaction. */ void -xfs_extent_free_get_group( - struct xfs_mount *mp, - struct xfs_extent_free_item *xefi) +xfs_extent_free_defer_add( + struct xfs_trans *tp, + struct xfs_extent_free_item *xefi, + struct xfs_defer_pending **dfpp) { - xefi->xefi_pag = xfs_perag_intent_get(mp, xefi->xefi_startblock); -} + struct xfs_mount *mp = tp->t_mountp; -/* Release a passive AG ref after some freeing work. */ -static inline void -xfs_extent_free_put_group( - struct xfs_extent_free_item *xefi) -{ - xfs_perag_intent_put(xefi->xefi_pag); + trace_xfs_extent_free_defer(mp, xefi); + + xefi->xefi_pag = xfs_perag_intent_get(mp, xefi->xefi_startblock); + if (xefi->xefi_agresv == XFS_AG_RESV_AGFL) + *dfpp = xfs_defer_add(tp, &xefi->xefi_list, + &xfs_agfl_free_defer_type); + else + *dfpp = xfs_defer_add(tp, &xefi->xefi_list, + &xfs_extent_free_defer_type); } /* Cancel a free extent. */ @@ -460,7 +463,7 @@ xfs_extent_free_cancel_item( { struct xfs_extent_free_item *xefi = xefi_entry(item); - xfs_extent_free_put_group(xefi); + xfs_perag_intent_put(xefi->xefi_pag); kmem_cache_free(xfs_extfree_item_cache, xefi); } @@ -575,7 +578,7 @@ xfs_efi_recover_work( xefi->xefi_blockcount = extp->ext_len; xefi->xefi_agresv = XFS_AG_RESV_NONE; xefi->xefi_owner = XFS_RMAP_OWN_UNKNOWN; - xfs_extent_free_get_group(mp, xefi); + xefi->xefi_pag = xfs_perag_intent_get(mp, extp->ext_start); xfs_defer_add_item(dfp, &xefi->xefi_list); } diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index da6a5afa607c..41b7c4306079 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h @@ -88,4 +88,10 @@ xfs_efd_log_item_sizeof( extern struct kmem_cache *xfs_efi_cache; extern struct kmem_cache *xfs_efd_cache; +struct xfs_extent_free_item; + +void xfs_extent_free_defer_add(struct xfs_trans *tp, + struct xfs_extent_free_item *xefi, + struct xfs_defer_pending **dfpp); + #endif /* __XFS_EXTFREE_ITEM_H__ */ -- cgit v1.2.3-70-g09d2 From 94a0333b9212a114d19096a77903f76d0d5bca26 Mon Sep 17 00:00:00 2001 From: Zizhi Wo Date: Mon, 1 Jul 2024 14:02:36 +0800 Subject: xfs: Avoid races with cnt_btree lastrec updates A concurrent file creation and little writing could unexpectedly return -ENOSPC error since there is a race window that the allocator could get the wrong agf->agf_longest. Write file process steps: 1) Find the entry that best meets the conditions, then calculate the start address and length of the remaining part of the entry after allocation. 2) Delete this entry and update the -current- agf->agf_longest. 3) Insert the remaining unused parts of this entry based on the calculations in 1), and update the agf->agf_longest again if necessary. Create file process steps: 1) Check whether there are free inodes in the inode chunk. 2) If there is no free inode, check whether there has space for creating inode chunks, perform the no-lock judgment first. 3) If the judgment succeeds, the judgment is performed again with agf lock held. Otherwire, an error is returned directly. If the write process is in step 2) but not go to 3) yet, the create file process goes to 2) at this time, it may be mistaken for no space, resulting in the file system still has space but the file creation fails. We have sent two different commits to the community in order to fix this problem[1][2]. Unfortunately, both solutions have flaws. In [2], I discussed with Dave and Darrick, realized that a better solution to this problem requires the "last cnt record tracking" to be ripped out of the generic btree code. And surprisingly, Dave directly provided his fix code. This patch includes appropriate modifications based on his tmp-code to address this issue. The entire fix can be roughly divided into two parts: 1) Delete the code related to lastrec-update in the generic btree code. 2) Place the process of updating longest freespace with cntbt separately to the end of the cntbt modifications. Move the cursor to the rightmost firstly, and update the longest free extent based on the record. Note that we can not update the longest with xfs_alloc_get_rec() after find the longest record, as xfs_verify_agbno() may not pass because pag->block_count is updated on the outside. Therefore, use xfs_btree_get_rec() as a replacement. [1] https://lore.kernel.org/all/20240419061848.1032366-2-yebin10@huawei.com [2] https://lore.kernel.org/all/20240604071121.3981686-1-wozizhi@huawei.com Reported by: Ye Bin Signed-off-by: Zizhi Wo Reviewed-by: Darrick J. Wong Signed-off-by: Chandan Babu R --- fs/xfs/libxfs/xfs_alloc.c | 114 ++++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_alloc_btree.c | 64 ---------------------- fs/xfs/libxfs/xfs_btree.c | 51 ------------------ fs/xfs/libxfs/xfs_btree.h | 16 +----- 4 files changed, 115 insertions(+), 130 deletions(-) (limited to 'fs/xfs/libxfs/xfs_alloc.c') diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index ef4f5972da5d..59326f84f6a5 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -466,6 +466,97 @@ xfs_alloc_fix_len( args->len = rlen; } +/* + * Determine if the cursor points to the block that contains the right-most + * block of records in the by-count btree. This block contains the largest + * contiguous free extent in the AG, so if we modify a record in this block we + * need to call xfs_alloc_fixup_longest() once the modifications are done to + * ensure the agf->agf_longest field is kept up to date with the longest free + * extent tracked by the by-count btree. + */ +static bool +xfs_alloc_cursor_at_lastrec( + struct xfs_btree_cur *cnt_cur) +{ + struct xfs_btree_block *block; + union xfs_btree_ptr ptr; + struct xfs_buf *bp; + + block = xfs_btree_get_block(cnt_cur, 0, &bp); + + xfs_btree_get_sibling(cnt_cur, block, &ptr, XFS_BB_RIGHTSIB); + return xfs_btree_ptr_is_null(cnt_cur, &ptr); +} + +/* + * Find the rightmost record of the cntbt, and return the longest free space + * recorded in it. Simply set both the block number and the length to their + * maximum values before searching. + */ +static int +xfs_cntbt_longest( + struct xfs_btree_cur *cnt_cur, + xfs_extlen_t *longest) +{ + struct xfs_alloc_rec_incore irec; + union xfs_btree_rec *rec; + int stat = 0; + int error; + + memset(&cnt_cur->bc_rec, 0xFF, sizeof(cnt_cur->bc_rec)); + error = xfs_btree_lookup(cnt_cur, XFS_LOOKUP_LE, &stat); + if (error) + return error; + if (!stat) { + /* totally empty tree */ + *longest = 0; + return 0; + } + + error = xfs_btree_get_rec(cnt_cur, &rec, &stat); + if (error) + return error; + if (XFS_IS_CORRUPT(cnt_cur->bc_mp, !stat)) { + xfs_btree_mark_sick(cnt_cur); + return -EFSCORRUPTED; + } + + xfs_alloc_btrec_to_irec(rec, &irec); + *longest = irec.ar_blockcount; + return 0; +} + +/* + * Update the longest contiguous free extent in the AG from the by-count cursor + * that is passed to us. This should be done at the end of any allocation or + * freeing operation that touches the longest extent in the btree. + * + * Needing to update the longest extent can be determined by calling + * xfs_alloc_cursor_at_lastrec() after the cursor is positioned for record + * modification but before the modification begins. + */ +static int +xfs_alloc_fixup_longest( + struct xfs_btree_cur *cnt_cur) +{ + struct xfs_perag *pag = cnt_cur->bc_ag.pag; + struct xfs_buf *bp = cnt_cur->bc_ag.agbp; + struct xfs_agf *agf = bp->b_addr; + xfs_extlen_t longest = 0; + int error; + + /* Lookup last rec in order to update AGF. */ + error = xfs_cntbt_longest(cnt_cur, &longest); + if (error) + return error; + + pag->pagf_longest = longest; + agf->agf_longest = cpu_to_be32(pag->pagf_longest); + xfs_alloc_log_agf(cnt_cur->bc_tp, bp, XFS_AGF_LONGEST); + + return 0; +} + /* * Update the two btrees, logically removing from freespace the extent * starting at rbno, rlen blocks. The extent is contained within the @@ -490,6 +581,7 @@ xfs_alloc_fixup_trees( xfs_extlen_t nflen1=0; /* first new free length */ xfs_extlen_t nflen2=0; /* second new free length */ struct xfs_mount *mp; + bool fixup_longest = false; mp = cnt_cur->bc_mp; @@ -578,6 +670,10 @@ xfs_alloc_fixup_trees( nfbno2 = rbno + rlen; nflen2 = (fbno + flen) - nfbno2; } + + if (xfs_alloc_cursor_at_lastrec(cnt_cur)) + fixup_longest = true; + /* * Delete the entry from the by-size btree. */ @@ -655,6 +751,10 @@ xfs_alloc_fixup_trees( return -EFSCORRUPTED; } } + + if (fixup_longest) + return xfs_alloc_fixup_longest(cnt_cur); + return 0; } @@ -1957,6 +2057,7 @@ xfs_free_ag_extent( int i; int error; struct xfs_perag *pag = agbp->b_pag; + bool fixup_longest = false; bno_cur = cnt_cur = NULL; mp = tp->t_mountp; @@ -2220,8 +2321,13 @@ xfs_free_ag_extent( } xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); bno_cur = NULL; + /* * In all cases we need to insert the new freespace in the by-size tree. + * + * If this new freespace is being inserted in the block that contains + * the largest free space in the btree, make sure we also fix up the + * agf->agf-longest tracker field. */ if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) goto error0; @@ -2230,6 +2336,8 @@ xfs_free_ag_extent( error = -EFSCORRUPTED; goto error0; } + if (xfs_alloc_cursor_at_lastrec(cnt_cur)) + fixup_longest = true; if ((error = xfs_btree_insert(cnt_cur, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { @@ -2237,6 +2345,12 @@ xfs_free_ag_extent( error = -EFSCORRUPTED; goto error0; } + if (fixup_longest) { + error = xfs_alloc_fixup_longest(cnt_cur); + if (error) + goto error0; + } + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); cnt_cur = NULL; diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 6ef5ddd89600..585e98e87ef9 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -115,67 +115,6 @@ xfs_allocbt_free_block( return 0; } -/* - * Update the longest extent in the AGF - */ -STATIC void -xfs_allocbt_update_lastrec( - struct xfs_btree_cur *cur, - const struct xfs_btree_block *block, - const union xfs_btree_rec *rec, - int ptr, - int reason) -{ - struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; - struct xfs_perag *pag; - __be32 len; - int numrecs; - - ASSERT(!xfs_btree_is_bno(cur->bc_ops)); - - switch (reason) { - case LASTREC_UPDATE: - /* - * If this is the last leaf block and it's the last record, - * then update the size of the longest extent in the AG. - */ - if (ptr != xfs_btree_get_numrecs(block)) - return; - len = rec->alloc.ar_blockcount; - break; - case LASTREC_INSREC: - if (be32_to_cpu(rec->alloc.ar_blockcount) <= - be32_to_cpu(agf->agf_longest)) - return; - len = rec->alloc.ar_blockcount; - break; - case LASTREC_DELREC: - numrecs = xfs_btree_get_numrecs(block); - if (ptr <= numrecs) - return; - ASSERT(ptr == numrecs + 1); - - if (numrecs) { - xfs_alloc_rec_t *rrp; - - rrp = XFS_ALLOC_REC_ADDR(cur->bc_mp, block, numrecs); - len = rrp->ar_blockcount; - } else { - len = 0; - } - - break; - default: - ASSERT(0); - return; - } - - agf->agf_longest = len; - pag = cur->bc_ag.agbp->b_pag; - pag->pagf_longest = be32_to_cpu(len); - xfs_alloc_log_agf(cur->bc_tp, cur->bc_ag.agbp, XFS_AGF_LONGEST); -} - STATIC int xfs_allocbt_get_minrecs( struct xfs_btree_cur *cur, @@ -493,7 +432,6 @@ const struct xfs_btree_ops xfs_bnobt_ops = { .set_root = xfs_allocbt_set_root, .alloc_block = xfs_allocbt_alloc_block, .free_block = xfs_allocbt_free_block, - .update_lastrec = xfs_allocbt_update_lastrec, .get_minrecs = xfs_allocbt_get_minrecs, .get_maxrecs = xfs_allocbt_get_maxrecs, .init_key_from_rec = xfs_allocbt_init_key_from_rec, @@ -511,7 +449,6 @@ const struct xfs_btree_ops xfs_bnobt_ops = { const struct xfs_btree_ops xfs_cntbt_ops = { .name = "cnt", .type = XFS_BTREE_TYPE_AG, - .geom_flags = XFS_BTGEO_LASTREC_UPDATE, .rec_len = sizeof(xfs_alloc_rec_t), .key_len = sizeof(xfs_alloc_key_t), @@ -525,7 +462,6 @@ const struct xfs_btree_ops xfs_cntbt_ops = { .set_root = xfs_allocbt_set_root, .alloc_block = xfs_allocbt_alloc_block, .free_block = xfs_allocbt_free_block, - .update_lastrec = xfs_allocbt_update_lastrec, .get_minrecs = xfs_allocbt_get_minrecs, .get_maxrecs = xfs_allocbt_get_maxrecs, .init_key_from_rec = xfs_allocbt_init_key_from_rec, diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index d29547572a68..a5c4af148853 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -1331,30 +1331,6 @@ xfs_btree_init_block_cur( xfs_btree_owner(cur)); } -/* - * Return true if ptr is the last record in the btree and - * we need to track updates to this record. The decision - * will be further refined in the update_lastrec method. - */ -STATIC int -xfs_btree_is_lastrec( - struct xfs_btree_cur *cur, - struct xfs_btree_block *block, - int level) -{ - union xfs_btree_ptr ptr; - - if (level > 0) - return 0; - if (!(cur->bc_ops->geom_flags & XFS_BTGEO_LASTREC_UPDATE)) - return 0; - - xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); - if (!xfs_btree_ptr_is_null(cur, &ptr)) - return 0; - return 1; -} - STATIC void xfs_btree_buf_to_ptr( struct xfs_btree_cur *cur, @@ -2420,15 +2396,6 @@ xfs_btree_update( xfs_btree_copy_recs(cur, rp, rec, 1); xfs_btree_log_recs(cur, bp, ptr, ptr); - /* - * If we are tracking the last record in the tree and - * we are at the far right edge of the tree, update it. - */ - if (xfs_btree_is_lastrec(cur, block, 0)) { - cur->bc_ops->update_lastrec(cur, block, rec, - ptr, LASTREC_UPDATE); - } - /* Pass new key value up to our parent. */ if (xfs_btree_needs_key_update(cur, ptr)) { error = xfs_btree_update_keys(cur, 0); @@ -3617,15 +3584,6 @@ xfs_btree_insrec( goto error0; } - /* - * If we are tracking the last record in the tree and - * we are at the far right edge of the tree, update it. - */ - if (xfs_btree_is_lastrec(cur, block, level)) { - cur->bc_ops->update_lastrec(cur, block, rec, - ptr, LASTREC_INSREC); - } - /* * Return the new block number, if any. * If there is one, give back a record value and a cursor too. @@ -3983,15 +3941,6 @@ xfs_btree_delrec( xfs_btree_set_numrecs(block, --numrecs); xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); - /* - * If we are tracking the last record in the tree and - * we are at the far right edge of the tree, update it. - */ - if (xfs_btree_is_lastrec(cur, block, level)) { - cur->bc_ops->update_lastrec(cur, block, NULL, - ptr, LASTREC_DELREC); - } - /* * We're at the root level. First, shrink the root block in-memory. * Try to get rid of the next level down. If we can't then there's diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index f93374278aa1..10b7ddc3b2b3 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -154,12 +154,6 @@ struct xfs_btree_ops { int *stat); int (*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp); - /* update last record information */ - void (*update_lastrec)(struct xfs_btree_cur *cur, - const struct xfs_btree_block *block, - const union xfs_btree_rec *rec, - int ptr, int reason); - /* records in block/level */ int (*get_minrecs)(struct xfs_btree_cur *cur, int level); int (*get_maxrecs)(struct xfs_btree_cur *cur, int level); @@ -222,15 +216,7 @@ struct xfs_btree_ops { }; /* btree geometry flags */ -#define XFS_BTGEO_LASTREC_UPDATE (1U << 0) /* track last rec externally */ -#define XFS_BTGEO_OVERLAPPING (1U << 1) /* overlapping intervals */ - -/* - * Reasons for the update_lastrec method to be called. - */ -#define LASTREC_UPDATE 0 -#define LASTREC_INSREC 1 -#define LASTREC_DELREC 2 +#define XFS_BTGEO_OVERLAPPING (1U << 0) /* overlapping intervals */ union xfs_btree_irec { -- cgit v1.2.3-70-g09d2