diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-10-10 09:45:45 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-10-10 09:45:45 -0700 |
commit | 825ec756afeeb082395ac6430e7b07e3a9997665 (patch) | |
tree | 19f64ce7c6ac48b36cc392919b295c126d9e9613 | |
parent | d3d1556696c1a993eec54ac585fe5bf677e07474 (diff) | |
parent | 77bfe1b11ea0c0c4b0ce19b742cd1aa82f60e45d (diff) |
Merge tag 'xfs-6.12-fixes-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs fixes from Carlos Maiolino:
- A few small typo fixes
- fstests xfs/538 DEBUG-only fix
- Performance fix on blockgc on COW'ed files, by skipping trims on
cowblock inodes currently opened for write
- Prevent cowblocks to be freed under dirty pagecache during unshare
- Update MAINTAINERS file to quote the new maintainer
* tag 'xfs-6.12-fixes-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs: fix a typo
xfs: don't free cowblocks from under dirty pagecache on unshare
xfs: skip background cowblock trims on inodes open for write
xfs: support lowmode allocations in xfs_bmap_exact_minlen_extent_alloc
xfs: call xfs_bmap_exact_minlen_extent_alloc from xfs_bmap_btalloc
xfs: don't ifdef around the exact minlen allocations
xfs: fold xfs_bmap_alloc_userdata into xfs_bmapi_allocate
xfs: distinguish extra split from real ENOSPC from xfs_attr_node_try_addname
xfs: distinguish extra split from real ENOSPC from xfs_attr3_leaf_split
xfs: return bool from xfs_attr3_leaf_add
xfs: merge xfs_attr_leaf_try_add into xfs_attr_leaf_addname
xfs: Use try_cmpxchg() in xlog_cil_insert_pcp_aggregate()
xfs: scrub: convert comma to semicolon
xfs: Remove empty declartion in header file
MAINTAINERS: add Carlos Maiolino as XFS release manager
-rw-r--r-- | MAINTAINERS | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc.c | 7 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc.h | 4 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr.c | 190 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr_leaf.c | 40 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr_leaf.h | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 140 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_da_btree.c | 5 | ||||
-rw-r--r-- | fs/xfs/scrub/ialloc_repair.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.c | 37 | ||||
-rw-r--r-- | fs/xfs/xfs_log.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_log_cil.c | 11 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.h | 19 |
15 files changed, 207 insertions, 261 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index a097afd76ded..d01256208c9f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -25404,7 +25404,7 @@ F: include/xen/arm/swiotlb-xen.h F: include/xen/swiotlb-xen.h XFS FILESYSTEM -M: Chandan Babu R <chandan.babu@oracle.com> +M: Carlos Maiolino <cem@kernel.org> R: Darrick J. Wong <djwong@kernel.org> L: linux-xfs@vger.kernel.org S: Supported diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 59326f84f6a5..04f64cf9777e 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2766,7 +2766,6 @@ xfs_alloc_commit_autoreap( xfs_defer_item_unpause(tp, aarp->dfp); } -#ifdef DEBUG /* * Check if an AGF has a free extent record whose length is equal to * args->minlen. @@ -2806,7 +2805,6 @@ out: return error; } -#endif /* * Decide whether to use this allocation group for this allocation. @@ -2880,15 +2878,14 @@ xfs_alloc_fix_freelist( if (!xfs_alloc_space_available(args, need, alloc_flags)) goto out_agbp_relse; -#ifdef DEBUG - if (args->alloc_minlen_only) { + if (IS_ENABLED(CONFIG_XFS_DEBUG) && args->alloc_minlen_only) { int stat; error = xfs_exact_minlen_extent_available(args, agbp, &stat); if (error || !stat) goto out_agbp_relse; } -#endif + /* * Make the freelist shorter if it's too long. * diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index fae170825be0..0165452e7cd0 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -53,11 +53,9 @@ typedef struct xfs_alloc_arg { int datatype; /* mask defining data type treatment */ char wasdel; /* set if allocation was prev delayed */ char wasfromfl; /* set if allocation is from freelist */ + bool alloc_minlen_only; /* allocate exact minlen extent */ struct xfs_owner_info oinfo; /* owner of blocks being allocated */ enum xfs_ag_resv_type resv; /* block reservation to use */ -#ifdef DEBUG - bool alloc_minlen_only; /* allocate exact minlen extent */ -#endif } xfs_alloc_arg_t; /* diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index f30bcc64100d..c63da14eee04 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -51,7 +51,6 @@ STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args); STATIC int xfs_attr_leaf_get(xfs_da_args_t *args); STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args); STATIC int xfs_attr_leaf_hasname(struct xfs_da_args *args, struct xfs_buf **bp); -STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args); /* * Internal routines when attribute list is more than one block. @@ -437,6 +436,33 @@ xfs_attr_hashval( return xfs_attr_hashname(name, namelen); } +/* Save the current remote block info and clear the current pointers. */ +static void +xfs_attr_save_rmt_blk( + struct xfs_da_args *args) +{ + args->blkno2 = args->blkno; + args->index2 = args->index; + args->rmtblkno2 = args->rmtblkno; + args->rmtblkcnt2 = args->rmtblkcnt; + args->rmtvaluelen2 = args->rmtvaluelen; + args->rmtblkno = 0; + args->rmtblkcnt = 0; + args->rmtvaluelen = 0; +} + +/* Set stored info about a remote block */ +static void +xfs_attr_restore_rmt_blk( + struct xfs_da_args *args) +{ + args->blkno = args->blkno2; + args->index = args->index2; + args->rmtblkno = args->rmtblkno2; + args->rmtblkcnt = args->rmtblkcnt2; + args->rmtvaluelen = args->rmtvaluelen2; +} + /* * PPTR_REPLACE operations require the caller to set the old and new names and * values explicitly. Update the canonical fields to the new name and value @@ -482,48 +508,73 @@ xfs_attr_complete_op( return replace_state; } +/* + * Try to add an attribute to an inode in leaf form. + */ static int xfs_attr_leaf_addname( struct xfs_attr_intent *attr) { struct xfs_da_args *args = attr->xattri_da_args; + struct xfs_buf *bp; int error; ASSERT(xfs_attr_is_leaf(args->dp)); + error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp); + if (error) + return error; + /* - * Use the leaf buffer we may already hold locked as a result of - * a sf-to-leaf conversion. + * Look up the xattr name to set the insertion point for the new xattr. */ - error = xfs_attr_leaf_try_add(args); - - if (error == -ENOSPC) { - error = xfs_attr3_leaf_to_node(args); - if (error) - return error; + error = xfs_attr3_leaf_lookup_int(bp, args); + switch (error) { + case -ENOATTR: + if (args->op_flags & XFS_DA_OP_REPLACE) + goto out_brelse; + break; + case -EEXIST: + if (!(args->op_flags & XFS_DA_OP_REPLACE)) + goto out_brelse; + trace_xfs_attr_leaf_replace(args); /* - * We're not in leaf format anymore, so roll the transaction and - * retry the add to the newly allocated node block. + * Save the existing remote attr state so that the current + * values reflect the state of the new attribute we are about to + * add, not the attribute we just found and will remove later. */ - attr->xattri_dela_state = XFS_DAS_NODE_ADD; - goto out; + xfs_attr_save_rmt_blk(args); + break; + case 0: + break; + default: + goto out_brelse; } - if (error) - return error; /* * We need to commit and roll if we need to allocate remote xattr blocks * or perform more xattr manipulations. Otherwise there is nothing more * to do and we can return success. */ - if (args->rmtblkno) + if (!xfs_attr3_leaf_add(bp, args)) { + error = xfs_attr3_leaf_to_node(args); + if (error) + return error; + + attr->xattri_dela_state = XFS_DAS_NODE_ADD; + } else if (args->rmtblkno) { attr->xattri_dela_state = XFS_DAS_LEAF_SET_RMT; - else - attr->xattri_dela_state = xfs_attr_complete_op(attr, - XFS_DAS_LEAF_REPLACE); -out: + } else { + attr->xattri_dela_state = + xfs_attr_complete_op(attr, XFS_DAS_LEAF_REPLACE); + } + trace_xfs_attr_leaf_addname_return(attr->xattri_dela_state, args->dp); + return 0; + +out_brelse: + xfs_trans_brelse(args->trans, bp); return error; } @@ -546,7 +597,7 @@ xfs_attr_node_addname( return error; error = xfs_attr_node_try_addname(attr); - if (error == -ENOSPC) { + if (error == 1) { error = xfs_attr3_leaf_to_node(args); if (error) return error; @@ -1170,88 +1221,6 @@ xfs_attr_shortform_addname( * External routines when attribute list is one block *========================================================================*/ -/* Save the current remote block info and clear the current pointers. */ -static void -xfs_attr_save_rmt_blk( - struct xfs_da_args *args) -{ - args->blkno2 = args->blkno; - args->index2 = args->index; - args->rmtblkno2 = args->rmtblkno; - args->rmtblkcnt2 = args->rmtblkcnt; - args->rmtvaluelen2 = args->rmtvaluelen; - args->rmtblkno = 0; - args->rmtblkcnt = 0; - args->rmtvaluelen = 0; -} - -/* Set stored info about a remote block */ -static void -xfs_attr_restore_rmt_blk( - struct xfs_da_args *args) -{ - args->blkno = args->blkno2; - args->index = args->index2; - args->rmtblkno = args->rmtblkno2; - args->rmtblkcnt = args->rmtblkcnt2; - args->rmtvaluelen = args->rmtvaluelen2; -} - -/* - * Tries to add an attribute to an inode in leaf form - * - * This function is meant to execute as part of a delayed operation and leaves - * the transaction handling to the caller. On success the attribute is added - * and the inode and transaction are left dirty. If there is not enough space, - * the attr data is converted to node format and -ENOSPC is returned. Caller is - * responsible for handling the dirty inode and transaction or adding the attr - * in node format. - */ -STATIC int -xfs_attr_leaf_try_add( - struct xfs_da_args *args) -{ - struct xfs_buf *bp; - int error; - - error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp); - if (error) - return error; - - /* - * Look up the xattr name to set the insertion point for the new xattr. - */ - error = xfs_attr3_leaf_lookup_int(bp, args); - switch (error) { - case -ENOATTR: - if (args->op_flags & XFS_DA_OP_REPLACE) - goto out_brelse; - break; - case -EEXIST: - if (!(args->op_flags & XFS_DA_OP_REPLACE)) - goto out_brelse; - - trace_xfs_attr_leaf_replace(args); - /* - * Save the existing remote attr state so that the current - * values reflect the state of the new attribute we are about to - * add, not the attribute we just found and will remove later. - */ - xfs_attr_save_rmt_blk(args); - break; - case 0: - break; - default: - goto out_brelse; - } - - return xfs_attr3_leaf_add(bp, args); - -out_brelse: - xfs_trans_brelse(args->trans, bp); - return error; -} - /* * Return EEXIST if attr is found, or ENOATTR if not */ @@ -1417,9 +1386,12 @@ error: /* * Add a name to a Btree-format attribute list. * - * This will involve walking down the Btree, and may involve splitting - * leaf nodes and even splitting intermediate nodes up to and including - * the root node (a special case of an intermediate node). + * This will involve walking down the Btree, and may involve splitting leaf + * nodes and even splitting intermediate nodes up to and including the root + * node (a special case of an intermediate node). + * + * If the tree was still in single leaf format and needs to converted to + * real node format return 1 and let the caller handle that. */ static int xfs_attr_node_try_addname( @@ -1427,21 +1399,21 @@ xfs_attr_node_try_addname( { struct xfs_da_state *state = attr->xattri_da_state; struct xfs_da_state_blk *blk; - int error; + int error = 0; trace_xfs_attr_node_addname(state->args); blk = &state->path.blk[state->path.active-1]; ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); - error = xfs_attr3_leaf_add(blk->bp, state->args); - if (error == -ENOSPC) { + if (!xfs_attr3_leaf_add(blk->bp, state->args)) { if (state->path.active == 1) { /* * Its really a single leaf node, but it had * out-of-line values so it looked like it *might* * have been a b-tree. Let the caller deal with this. */ + error = 1; goto out; } diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index e50d913ad32f..fddb55605e0c 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -47,7 +47,7 @@ */ STATIC int xfs_attr3_leaf_create(struct xfs_da_args *args, xfs_dablk_t which_block, struct xfs_buf **bpp); -STATIC int xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer, +STATIC void xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer, struct xfs_attr3_icleaf_hdr *ichdr, struct xfs_da_args *args, int freemap_index); STATIC void xfs_attr3_leaf_compact(struct xfs_da_args *args, @@ -995,10 +995,8 @@ xfs_attr_shortform_to_leaf( xfs_attr_sethash(&nargs); error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */ ASSERT(error == -ENOATTR); - error = xfs_attr3_leaf_add(bp, &nargs); - ASSERT(error != -ENOSPC); - if (error) - goto out; + if (!xfs_attr3_leaf_add(bp, &nargs)) + ASSERT(0); sfe = xfs_attr_sf_nextentry(sfe); } error = 0; @@ -1333,6 +1331,9 @@ xfs_attr3_leaf_create( /* * Split the leaf node, rebalance, then add the new entry. + * + * Returns 0 if the entry was added, 1 if a further split is needed or a + * negative error number otherwise. */ int xfs_attr3_leaf_split( @@ -1340,8 +1341,9 @@ xfs_attr3_leaf_split( struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk) { - xfs_dablk_t blkno; - int error; + bool added; + xfs_dablk_t blkno; + int error; trace_xfs_attr_leaf_split(state->args); @@ -1376,10 +1378,10 @@ xfs_attr3_leaf_split( */ if (state->inleaf) { trace_xfs_attr_leaf_add_old(state->args); - error = xfs_attr3_leaf_add(oldblk->bp, state->args); + added = xfs_attr3_leaf_add(oldblk->bp, state->args); } else { trace_xfs_attr_leaf_add_new(state->args); - error = xfs_attr3_leaf_add(newblk->bp, state->args); + added = xfs_attr3_leaf_add(newblk->bp, state->args); } /* @@ -1387,13 +1389,15 @@ xfs_attr3_leaf_split( */ oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL); newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL); - return error; + if (!added) + return 1; + return 0; } /* * Add a name to the leaf attribute list structure. */ -int +bool xfs_attr3_leaf_add( struct xfs_buf *bp, struct xfs_da_args *args) @@ -1402,6 +1406,7 @@ xfs_attr3_leaf_add( struct xfs_attr3_icleaf_hdr ichdr; int tablesize; int entsize; + bool added = true; int sum; int tmp; int i; @@ -1430,7 +1435,7 @@ xfs_attr3_leaf_add( if (ichdr.freemap[i].base < ichdr.firstused) tmp += sizeof(xfs_attr_leaf_entry_t); if (ichdr.freemap[i].size >= tmp) { - tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, i); + xfs_attr3_leaf_add_work(bp, &ichdr, args, i); goto out_log_hdr; } sum += ichdr.freemap[i].size; @@ -1442,7 +1447,7 @@ xfs_attr3_leaf_add( * no good and we should just give up. */ if (!ichdr.holes && sum < entsize) - return -ENOSPC; + return false; /* * Compact the entries to coalesce free space. @@ -1455,24 +1460,24 @@ xfs_attr3_leaf_add( * free region, in freemap[0]. If it is not big enough, give up. */ if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) { - tmp = -ENOSPC; + added = false; goto out_log_hdr; } - tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0); + xfs_attr3_leaf_add_work(bp, &ichdr, args, 0); out_log_hdr: xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr); xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, &leaf->hdr, xfs_attr3_leaf_hdr_size(leaf))); - return tmp; + return added; } /* * Add a name to a leaf attribute list structure. */ -STATIC int +STATIC void xfs_attr3_leaf_add_work( struct xfs_buf *bp, struct xfs_attr3_icleaf_hdr *ichdr, @@ -1590,7 +1595,6 @@ xfs_attr3_leaf_add_work( } } ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index); - return 0; } /* diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index bac219589896..589f810eedc0 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -76,7 +76,7 @@ int xfs_attr3_leaf_split(struct xfs_da_state *state, int xfs_attr3_leaf_lookup_int(struct xfs_buf *leaf, struct xfs_da_args *args); int xfs_attr3_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args); -int xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer, +bool xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer, struct xfs_da_args *args); int xfs_attr3_leaf_remove(struct xfs_buf *leaf_buffer, struct xfs_da_args *args); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 8090e8249116..36dd08d13293 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3477,31 +3477,19 @@ xfs_bmap_process_allocated_extent( xfs_bmap_alloc_account(ap); } -#ifdef DEBUG static int xfs_bmap_exact_minlen_extent_alloc( - struct xfs_bmalloca *ap) + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args) { - struct xfs_mount *mp = ap->ip->i_mount; - struct xfs_alloc_arg args = { .tp = ap->tp, .mp = mp }; - xfs_fileoff_t orig_offset; - xfs_extlen_t orig_length; - int error; - - ASSERT(ap->length); - if (ap->minlen != 1) { - ap->blkno = NULLFSBLOCK; - ap->length = 0; + args->fsbno = NULLFSBLOCK; return 0; } - orig_offset = ap->offset; - orig_length = ap->length; - - args.alloc_minlen_only = 1; - - xfs_bmap_compute_alignments(ap, &args); + args->alloc_minlen_only = 1; + args->minlen = args->maxlen = ap->minlen; + args->total = ap->total; /* * Unlike the longest extent available in an AG, we don't track @@ -3511,39 +3499,16 @@ xfs_bmap_exact_minlen_extent_alloc( * we need not be concerned about a drop in performance in * "debug only" code paths. */ - ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0); + ap->blkno = XFS_AGB_TO_FSB(ap->ip->i_mount, 0, 0); - args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE; - args.minlen = args.maxlen = ap->minlen; - args.total = ap->total; - - args.alignment = 1; - args.minalignslop = 0; - - args.minleft = ap->minleft; - args.wasdel = ap->wasdel; - args.resv = XFS_AG_RESV_NONE; - args.datatype = ap->datatype; - - error = xfs_alloc_vextent_first_ag(&args, ap->blkno); - if (error) - return error; - - if (args.fsbno != NULLFSBLOCK) { - xfs_bmap_process_allocated_extent(ap, &args, orig_offset, - orig_length); - } else { - ap->blkno = NULLFSBLOCK; - ap->length = 0; - } - - return 0; + /* + * Call xfs_bmap_btalloc_low_space here as it first does a "normal" AG + * iteration and then drops args->total to args->minlen, which might be + * required to find an allocation for the transaction reservation when + * the file system is very full. + */ + return xfs_bmap_btalloc_low_space(ap, args); } -#else - -#define xfs_bmap_exact_minlen_extent_alloc(bma) (-EFSCORRUPTED) - -#endif /* * If we are not low on available data blocks and we are allocating at @@ -3801,8 +3766,11 @@ xfs_bmap_btalloc( /* Trim the allocation back to the maximum an AG can fit. */ args.maxlen = min(ap->length, mp->m_ag_max_usable); - if ((ap->datatype & XFS_ALLOC_USERDATA) && - xfs_inode_is_filestream(ap->ip)) + if (unlikely(XFS_TEST_ERROR(false, mp, + XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT))) + error = xfs_bmap_exact_minlen_extent_alloc(ap, &args); + else if ((ap->datatype & XFS_ALLOC_USERDATA) && + xfs_inode_is_filestream(ap->ip)) error = xfs_bmap_btalloc_filestreams(ap, &args, stripe_align); else error = xfs_bmap_btalloc_best_length(ap, &args, stripe_align); @@ -4177,43 +4145,6 @@ out: } static int -xfs_bmap_alloc_userdata( - struct xfs_bmalloca *bma) -{ - struct xfs_mount *mp = bma->ip->i_mount; - int whichfork = xfs_bmapi_whichfork(bma->flags); - int error; - - /* - * Set the data type being allocated. For the data fork, the first data - * in the file is treated differently to all other allocations. For the - * attribute fork, we only need to ensure the allocated range is not on - * the busy list. - */ - bma->datatype = XFS_ALLOC_NOBUSY; - if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) { - bma->datatype |= XFS_ALLOC_USERDATA; - if (bma->offset == 0) - bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; - - if (mp->m_dalign && bma->length >= mp->m_dalign) { - error = xfs_bmap_isaeof(bma, whichfork); - if (error) - return error; - } - - if (XFS_IS_REALTIME_INODE(bma->ip)) - return xfs_bmap_rtalloc(bma); - } - - if (unlikely(XFS_TEST_ERROR(false, mp, - XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT))) - return xfs_bmap_exact_minlen_extent_alloc(bma); - - return xfs_bmap_btalloc(bma); -} - -static int xfs_bmapi_allocate( struct xfs_bmalloca *bma) { @@ -4230,15 +4161,32 @@ xfs_bmapi_allocate( else bma->minlen = 1; - if (bma->flags & XFS_BMAPI_METADATA) { - if (unlikely(XFS_TEST_ERROR(false, mp, - XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT))) - error = xfs_bmap_exact_minlen_extent_alloc(bma); - else - error = xfs_bmap_btalloc(bma); - } else { - error = xfs_bmap_alloc_userdata(bma); + if (!(bma->flags & XFS_BMAPI_METADATA)) { + /* + * For the data and COW fork, the first data in the file is + * treated differently to all other allocations. For the + * attribute fork, we only need to ensure the allocated range + * is not on the busy list. + */ + bma->datatype = XFS_ALLOC_NOBUSY; + if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) { + bma->datatype |= XFS_ALLOC_USERDATA; + if (bma->offset == 0) + bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; + + if (mp->m_dalign && bma->length >= mp->m_dalign) { + error = xfs_bmap_isaeof(bma, whichfork); + if (error) + return error; + } + } } + + if ((bma->datatype & XFS_ALLOC_USERDATA) && + XFS_IS_REALTIME_INODE(bma->ip)) + error = xfs_bmap_rtalloc(bma); + else + error = xfs_bmap_btalloc(bma); if (error) return error; if (bma->blkno == NULLFSBLOCK) diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 16a529a88780..17d9e6154f19 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -593,9 +593,8 @@ xfs_da3_split( switch (oldblk->magic) { case XFS_ATTR_LEAF_MAGIC: error = xfs_attr3_leaf_split(state, oldblk, newblk); - if ((error != 0) && (error != -ENOSPC)) { + if (error < 0) return error; /* GROT: attr is inconsistent */ - } if (!error) { addblk = newblk; break; @@ -617,6 +616,8 @@ xfs_da3_split( error = xfs_attr3_leaf_split(state, newblk, &state->extrablk); } + if (error == 1) + return -ENOSPC; if (error) return error; /* GROT: attr inconsistent */ addblk = newblk; diff --git a/fs/xfs/scrub/ialloc_repair.c b/fs/xfs/scrub/ialloc_repair.c index a00ec7ae1792..c8d2196a04e1 100644 --- a/fs/xfs/scrub/ialloc_repair.c +++ b/fs/xfs/scrub/ialloc_repair.c @@ -657,7 +657,7 @@ xrep_ibt_build_new_trees( * Start by setting up the inobt staging cursor. */ fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, - XFS_IBT_BLOCK(sc->mp)), + XFS_IBT_BLOCK(sc->mp)); xrep_newbt_init_ag(&ri->new_inobt, sc, &XFS_RMAP_OINFO_INOBT, fsbno, XFS_AG_RESV_NONE); ri->new_inobt.bload.claim_block = xrep_ibt_claim_block; @@ -678,7 +678,7 @@ xrep_ibt_build_new_trees( resv = XFS_AG_RESV_NONE; fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, - XFS_FIBT_BLOCK(sc->mp)), + XFS_FIBT_BLOCK(sc->mp)); xrep_newbt_init_ag(&ri->new_finobt, sc, &XFS_RMAP_OINFO_INOBT, fsbno, resv); ri->new_finobt.bload.claim_block = xrep_fibt_claim_block; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index a680e5b82672..6b119a7a324f 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1280,14 +1280,17 @@ xfs_inode_clear_eofblocks_tag( } /* - * Set ourselves up to free CoW blocks from this file. If it's already clean - * then we can bail out quickly, but otherwise we must back off if the file - * is undergoing some kind of write. + * Prepare to free COW fork blocks from an inode. */ static bool xfs_prep_free_cowblocks( - struct xfs_inode *ip) + struct xfs_inode *ip, + struct xfs_icwalk *icw) { + bool sync; + + sync = icw && (icw->icw_flags & XFS_ICWALK_FLAG_SYNC); + /* * Just clear the tag if we have an empty cow fork or none at all. It's * possible the inode was fully unshared since it was originally tagged. @@ -1299,16 +1302,22 @@ xfs_prep_free_cowblocks( } /* - * If the mapping is dirty or under writeback we cannot touch the - * CoW fork. Leave it alone if we're in the midst of a directio. + * A cowblocks trim of an inode can have a significant effect on + * fragmentation even when a reasonable COW extent size hint is set. + * Therefore, we prefer to not process cowblocks unless they are clean + * and idle. We can never process a cowblocks inode that is dirty or has + * in-flight I/O under any circumstances, because outstanding writeback + * or dio expects targeted COW fork blocks exist through write + * completion where they can be remapped into the data fork. + * + * Therefore, the heuristic used here is to never process inodes + * currently opened for write from background (i.e. non-sync) scans. For + * sync scans, use the pagecache/dio state of the inode to ensure we + * never free COW fork blocks out from under pending I/O. */ - if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) || - mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || - mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || - atomic_read(&VFS_I(ip)->i_dio_count)) + if (!sync && inode_is_open_for_write(VFS_I(ip))) return false; - - return true; + return xfs_can_free_cowblocks(ip); } /* @@ -1337,7 +1346,7 @@ xfs_inode_free_cowblocks( if (!xfs_iflags_test(ip, XFS_ICOWBLOCKS)) return 0; - if (!xfs_prep_free_cowblocks(ip)) + if (!xfs_prep_free_cowblocks(ip, icw)) return 0; if (!xfs_icwalk_match(ip, icw)) @@ -1366,7 +1375,7 @@ xfs_inode_free_cowblocks( * Check again, nobody else should be able to dirty blocks or change * the reflink iflag now that we have the first two locks held. */ - if (xfs_prep_free_cowblocks(ip)) + if (xfs_prep_free_cowblocks(ip, icw)) ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); return ret; } diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 67c539cc9305..13455854365f 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -158,6 +158,4 @@ bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t); bool xlog_force_shutdown(struct xlog *log, uint32_t shutdown_flags); -int xfs_attr_use_log_assist(struct xfs_mount *mp); - #endif /* __XFS_LOG_H__ */ diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 391a938d690c..80da0cf87d7a 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -156,7 +156,6 @@ xlog_cil_insert_pcp_aggregate( struct xfs_cil *cil, struct xfs_cil_ctx *ctx) { - struct xlog_cil_pcp *cilpcp; int cpu; int count = 0; @@ -171,13 +170,11 @@ xlog_cil_insert_pcp_aggregate( * structures that could have a nonzero space_used. */ for_each_cpu(cpu, &ctx->cil_pcpmask) { - int old, prev; + struct xlog_cil_pcp *cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); + int old = READ_ONCE(cilpcp->space_used); - cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); - do { - old = cilpcp->space_used; - prev = cmpxchg(&cilpcp->space_used, old, 0); - } while (old != prev); + while (!try_cmpxchg(&cilpcp->space_used, &old, 0)) + ; count += old; } atomic_add(count, &ctx->space_used); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index ec766b4bc853..a13bf53fea49 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1849,7 +1849,7 @@ xlog_find_item_ops( * from the transaction. However, we can't do that until after we've * replayed all the other items because they may be dependent on the * cancelled buffer and replaying the cancelled buffer can remove it - * form the cancelled buffer table. Hence they have tobe done last. + * form the cancelled buffer table. Hence they have to be done last. * * 3. Inode allocation buffers must be replayed before inode items that * read the buffer and replay changes into it. For filesystems using the diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 6fde6ec8092f..5bf6682e701b 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1595,6 +1595,9 @@ xfs_reflink_clear_inode_flag( ASSERT(xfs_is_reflink_inode(ip)); + if (!xfs_can_free_cowblocks(ip)) + return 0; + error = xfs_reflink_inode_has_shared_extents(*tpp, ip, &needs_flag); if (error || needs_flag) return error; diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index fb55e4ce49fa..4a58e4533671 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -6,6 +6,25 @@ #ifndef __XFS_REFLINK_H #define __XFS_REFLINK_H 1 +/* + * Check whether it is safe to free COW fork blocks from an inode. It is unsafe + * to do so when an inode has dirty cache or I/O in-flight, even if no shared + * extents exist in the data fork, because outstanding I/O may target blocks + * that were speculatively allocated to the COW fork. + */ +static inline bool +xfs_can_free_cowblocks(struct xfs_inode *ip) +{ + struct inode *inode = VFS_I(ip); + + if ((inode->i_state & I_DIRTY_PAGES) || + mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) || + mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK) || + atomic_read(&inode->i_dio_count)) + return false; + return true; +} + extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, struct xfs_bmbt_irec *irec, bool *shared); int xfs_bmap_trim_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap, |