diff options
Diffstat (limited to 'fs/xfs')
| -rw-r--r-- | fs/xfs/libxfs/xfs_ag.c | 2 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_alloc.c | 289 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_alloc.h | 24 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_attr_leaf.c | 2 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 8 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_bmap_btree.c | 3 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_ialloc.c | 32 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_ialloc_btree.c | 3 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_refcount.c | 22 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_refcount_btree.c | 8 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_rmap.c | 10 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_sb.c | 56 | ||||
| -rw-r--r-- | fs/xfs/xfs_extent_busy.c | 36 | ||||
| -rw-r--r-- | fs/xfs/xfs_extent_busy.h | 6 | ||||
| -rw-r--r-- | fs/xfs/xfs_extfree_item.c | 75 | ||||
| -rw-r--r-- | fs/xfs/xfs_fsmap.c | 255 | ||||
| -rw-r--r-- | fs/xfs/xfs_log.c | 47 | ||||
| -rw-r--r-- | fs/xfs/xfs_notify_failure.c | 9 | ||||
| -rw-r--r-- | fs/xfs/xfs_reflink.c | 3 | ||||
| -rw-r--r-- | fs/xfs/xfs_trace.h | 25 | ||||
| -rw-r--r-- | fs/xfs/xfs_trans_ail.c | 2 | 
21 files changed, 586 insertions, 331 deletions
| diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index ee84835ebc66..e9cc481b4ddf 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -985,7 +985,7 @@ xfs_ag_shrink_space(  			goto resv_err;  		err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, -				true); +				XFS_AG_RESV_NONE, true);  		if (err2)  			goto resv_err; diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index c20fe99405d8..3069194527dd 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -1536,7 +1536,8 @@ xfs_alloc_ag_vextent_lastblock(   */  STATIC int  xfs_alloc_ag_vextent_near( -	struct xfs_alloc_arg	*args) +	struct xfs_alloc_arg	*args, +	uint32_t		alloc_flags)  {  	struct xfs_alloc_cur	acur = {};  	int			error;		/* error code */ @@ -1555,6 +1556,8 @@ xfs_alloc_ag_vextent_near(  	if (args->agbno > args->max_agbno)  		args->agbno = args->max_agbno; +	/* Retry once quickly if we find busy extents before blocking. */ +	alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;  restart:  	len = 0; @@ -1610,9 +1613,20 @@ restart:  	 */  	if (!acur.len) {  		if (acur.busy) { +			/* +			 * Our only valid extents must have been busy. Flush and +			 * retry the allocation again. If we get an -EAGAIN +			 * error, we're being told that a deadlock was avoided +			 * and the current transaction needs committing before +			 * the allocation can be retried. +			 */  			trace_xfs_alloc_near_busy(args); -			xfs_extent_busy_flush(args->mp, args->pag, -					      acur.busy_gen); +			error = xfs_extent_busy_flush(args->tp, args->pag, +					acur.busy_gen, alloc_flags); +			if (error) +				goto out; + +			alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;  			goto restart;  		}  		trace_xfs_alloc_size_neither(args); @@ -1635,22 +1649,25 @@ out:   * and of the form k * prod + mod unless there's nothing that large.   * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.   */ -STATIC int				/* error */ +static int  xfs_alloc_ag_vextent_size( -	xfs_alloc_arg_t	*args)		/* allocation argument structure */ +	struct xfs_alloc_arg	*args, +	uint32_t		alloc_flags)  { -	struct xfs_agf	*agf = args->agbp->b_addr; -	struct xfs_btree_cur *bno_cur;	/* cursor for bno btree */ -	struct xfs_btree_cur *cnt_cur;	/* cursor for cnt btree */ -	int		error;		/* error result */ -	xfs_agblock_t	fbno;		/* start of found freespace */ -	xfs_extlen_t	flen;		/* length of found freespace */ -	int		i;		/* temp status variable */ -	xfs_agblock_t	rbno;		/* returned block number */ -	xfs_extlen_t	rlen;		/* length of returned extent */ -	bool		busy; -	unsigned	busy_gen; +	struct xfs_agf		*agf = args->agbp->b_addr; +	struct xfs_btree_cur	*bno_cur; +	struct xfs_btree_cur	*cnt_cur; +	xfs_agblock_t		fbno;		/* start of found freespace */ +	xfs_extlen_t		flen;		/* length of found freespace */ +	xfs_agblock_t		rbno;		/* returned block number */ +	xfs_extlen_t		rlen;		/* length of returned extent */ +	bool			busy; +	unsigned		busy_gen; +	int			error; +	int			i; +	/* Retry once quickly if we find busy extents before blocking. */ +	alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;  restart:  	/*  	 * Allocate and initialize a cursor for the by-size btree. @@ -1708,19 +1725,25 @@ restart:  			error = xfs_btree_increment(cnt_cur, 0, &i);  			if (error)  				goto error0; -			if (i == 0) { -				/* -				 * Our only valid extents must have been busy. -				 * Make it unbusy by forcing the log out and -				 * retrying. -				 */ -				xfs_btree_del_cursor(cnt_cur, -						     XFS_BTREE_NOERROR); -				trace_xfs_alloc_size_busy(args); -				xfs_extent_busy_flush(args->mp, -							args->pag, busy_gen); -				goto restart; -			} +			if (i) +				continue; + +			/* +			 * Our only valid extents must have been busy. Flush and +			 * retry the allocation again. If we get an -EAGAIN +			 * error, we're being told that a deadlock was avoided +			 * and the current transaction needs committing before +			 * the allocation can be retried. +			 */ +			trace_xfs_alloc_size_busy(args); +			error = xfs_extent_busy_flush(args->tp, args->pag, +					busy_gen, alloc_flags); +			if (error) +				goto error0; + +			alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH; +			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); +			goto restart;  		}  	} @@ -1800,9 +1823,21 @@ restart:  	args->len = rlen;  	if (rlen < args->minlen) {  		if (busy) { -			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); +			/* +			 * Our only valid extents must have been busy. Flush and +			 * retry the allocation again. If we get an -EAGAIN +			 * error, we're being told that a deadlock was avoided +			 * and the current transaction needs committing before +			 * the allocation can be retried. +			 */  			trace_xfs_alloc_size_busy(args); -			xfs_extent_busy_flush(args->mp, args->pag, busy_gen); +			error = xfs_extent_busy_flush(args->tp, args->pag, +					busy_gen, alloc_flags); +			if (error) +				goto error0; + +			alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH; +			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);  			goto restart;  		}  		goto out_nominleft; @@ -2435,23 +2470,25 @@ static int  xfs_defer_agfl_block(  	struct xfs_trans		*tp,  	xfs_agnumber_t			agno, -	xfs_fsblock_t			agbno, +	xfs_agblock_t			agbno,  	struct xfs_owner_info		*oinfo)  {  	struct xfs_mount		*mp = tp->t_mountp;  	struct xfs_extent_free_item	*xefi; +	xfs_fsblock_t			fsbno = XFS_AGB_TO_FSB(mp, agno, agbno);  	ASSERT(xfs_extfree_item_cache != NULL);  	ASSERT(oinfo != NULL); +	if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno))) +		return -EFSCORRUPTED; +  	xefi = kmem_cache_zalloc(xfs_extfree_item_cache,  			       GFP_KERNEL | __GFP_NOFAIL); -	xefi->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); +	xefi->xefi_startblock = fsbno;  	xefi->xefi_blockcount = 1;  	xefi->xefi_owner = oinfo->oi_owner; - -	if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock))) -		return -EFSCORRUPTED; +	xefi->xefi_agresv = XFS_AG_RESV_AGFL;  	trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); @@ -2470,6 +2507,7 @@ __xfs_free_extent_later(  	xfs_fsblock_t			bno,  	xfs_filblks_t			len,  	const struct xfs_owner_info	*oinfo, +	enum xfs_ag_resv_type		type,  	bool				skip_discard)  {  	struct xfs_extent_free_item	*xefi; @@ -2490,6 +2528,7 @@ __xfs_free_extent_later(  	ASSERT(agbno + len <= mp->m_sb.sb_agblocks);  #endif  	ASSERT(xfs_extfree_item_cache != NULL); +	ASSERT(type != XFS_AG_RESV_AGFL);  	if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))  		return -EFSCORRUPTED; @@ -2498,6 +2537,7 @@ __xfs_free_extent_later(  			       GFP_KERNEL | __GFP_NOFAIL);  	xefi->xefi_startblock = bno;  	xefi->xefi_blockcount = (xfs_extlen_t)len; +	xefi->xefi_agresv = type;  	if (skip_discard)  		xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;  	if (oinfo) { @@ -2568,7 +2608,7 @@ out:  int			/* error */  xfs_alloc_fix_freelist(  	struct xfs_alloc_arg	*args,	/* allocation argument structure */ -	int			flags)	/* XFS_ALLOC_FLAG_... */ +	uint32_t		alloc_flags)  {  	struct xfs_mount	*mp = args->mp;  	struct xfs_perag	*pag = args->pag; @@ -2584,7 +2624,7 @@ xfs_alloc_fix_freelist(  	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);  	if (!xfs_perag_initialised_agf(pag)) { -		error = xfs_alloc_read_agf(pag, tp, flags, &agbp); +		error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);  		if (error) {  			/* Couldn't lock the AGF so skip this AG. */  			if (error == -EAGAIN) @@ -2600,13 +2640,13 @@ xfs_alloc_fix_freelist(  	 */  	if (xfs_perag_prefers_metadata(pag) &&  	    (args->datatype & XFS_ALLOC_USERDATA) && -	    (flags & XFS_ALLOC_FLAG_TRYLOCK)) { -		ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); +	    (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)) { +		ASSERT(!(alloc_flags & XFS_ALLOC_FLAG_FREEING));  		goto out_agbp_relse;  	}  	need = xfs_alloc_min_freelist(mp, pag); -	if (!xfs_alloc_space_available(args, need, flags | +	if (!xfs_alloc_space_available(args, need, alloc_flags |  			XFS_ALLOC_FLAG_CHECK))  		goto out_agbp_relse; @@ -2615,7 +2655,7 @@ xfs_alloc_fix_freelist(  	 * Can fail if we're not blocking on locks, and it's held.  	 */  	if (!agbp) { -		error = xfs_alloc_read_agf(pag, tp, flags, &agbp); +		error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);  		if (error) {  			/* Couldn't lock the AGF so skip this AG. */  			if (error == -EAGAIN) @@ -2630,7 +2670,7 @@ xfs_alloc_fix_freelist(  	/* If there isn't enough total space or single-extent, reject it. */  	need = xfs_alloc_min_freelist(mp, pag); -	if (!xfs_alloc_space_available(args, need, flags)) +	if (!xfs_alloc_space_available(args, need, alloc_flags))  		goto out_agbp_relse;  #ifdef DEBUG @@ -2668,11 +2708,12 @@ xfs_alloc_fix_freelist(  	 */  	memset(&targs, 0, sizeof(targs));  	/* struct copy below */ -	if (flags & XFS_ALLOC_FLAG_NORMAP) +	if (alloc_flags & XFS_ALLOC_FLAG_NORMAP)  		targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;  	else  		targs.oinfo = XFS_RMAP_OINFO_AG; -	while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) { +	while (!(alloc_flags & XFS_ALLOC_FLAG_NOSHRINK) && +			pag->pagf_flcount > need) {  		error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0);  		if (error)  			goto out_agbp_relse; @@ -2700,7 +2741,7 @@ xfs_alloc_fix_freelist(  		targs.resv = XFS_AG_RESV_AGFL;  		/* Allocate as many blocks as possible at once. */ -		error = xfs_alloc_ag_vextent_size(&targs); +		error = xfs_alloc_ag_vextent_size(&targs, alloc_flags);  		if (error)  			goto out_agflbp_relse; @@ -2710,7 +2751,7 @@ xfs_alloc_fix_freelist(  		 * on a completely full ag.  		 */  		if (targs.agbno == NULLAGBLOCK) { -			if (flags & XFS_ALLOC_FLAG_FREEING) +			if (alloc_flags & XFS_ALLOC_FLAG_FREEING)  				break;  			goto out_agflbp_relse;  		} @@ -2916,6 +2957,47 @@ xfs_alloc_put_freelist(  }  /* + * Check that this AGF/AGI header's sequence number and length matches the AG + * number and size in fsblocks. + */ +xfs_failaddr_t +xfs_validate_ag_length( +	struct xfs_buf		*bp, +	uint32_t		seqno, +	uint32_t		length) +{ +	struct xfs_mount	*mp = bp->b_mount; +	/* +	 * During growfs operations, the perag is not fully initialised, +	 * so we can't use it for any useful checking. growfs ensures we can't +	 * use it by using uncached buffers that don't have the perag attached +	 * so we can detect and avoid this problem. +	 */ +	if (bp->b_pag && seqno != bp->b_pag->pag_agno) +		return __this_address; + +	/* +	 * Only the last AG in the filesystem is allowed to be shorter +	 * than the AG size recorded in the superblock. +	 */ +	if (length != mp->m_sb.sb_agblocks) { +		/* +		 * During growfs, the new last AG can get here before we +		 * have updated the superblock. Give it a pass on the seqno +		 * check. +		 */ +		if (bp->b_pag && seqno != mp->m_sb.sb_agcount - 1) +			return __this_address; +		if (length < XFS_MIN_AG_BLOCKS) +			return __this_address; +		if (length > mp->m_sb.sb_agblocks) +			return __this_address; +	} + +	return NULL; +} + +/*   * Verify the AGF is consistent.   *   * We do not verify the AGFL indexes in the AGF are fully consistent here @@ -2934,6 +3016,9 @@ xfs_agf_verify(  {  	struct xfs_mount	*mp = bp->b_mount;  	struct xfs_agf		*agf = bp->b_addr; +	xfs_failaddr_t		fa; +	uint32_t		agf_seqno = be32_to_cpu(agf->agf_seqno); +	uint32_t		agf_length = be32_to_cpu(agf->agf_length);  	if (xfs_has_crc(mp)) {  		if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) @@ -2945,18 +3030,26 @@ xfs_agf_verify(  	if (!xfs_verify_magic(bp, agf->agf_magicnum))  		return __this_address; -	if (!(XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && -	      be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && -	      be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) && -	      be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) && -	      be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp))) +	if (!XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)))  		return __this_address; -	if (be32_to_cpu(agf->agf_length) > mp->m_sb.sb_dblocks) +	/* +	 * Both agf_seqno and agf_length need to validated before anything else +	 * block number related in the AGF or AGFL can be checked. +	 */ +	fa = xfs_validate_ag_length(bp, agf_seqno, agf_length); +	if (fa) +		return fa; + +	if (be32_to_cpu(agf->agf_flfirst) >= xfs_agfl_size(mp)) +		return __this_address; +	if (be32_to_cpu(agf->agf_fllast) >= xfs_agfl_size(mp)) +		return __this_address; +	if (be32_to_cpu(agf->agf_flcount) > xfs_agfl_size(mp))  		return __this_address;  	if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) || -	    be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length)) +	    be32_to_cpu(agf->agf_freeblks) > agf_length)  		return __this_address;  	if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || @@ -2967,38 +3060,28 @@ xfs_agf_verify(  						mp->m_alloc_maxlevels)  		return __this_address; -	if (xfs_has_rmapbt(mp) && -	    (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || -	     be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > -						mp->m_rmap_maxlevels)) -		return __this_address; - -	if (xfs_has_rmapbt(mp) && -	    be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length)) +	if (xfs_has_lazysbcount(mp) && +	    be32_to_cpu(agf->agf_btreeblks) > agf_length)  		return __this_address; -	/* -	 * during growfs operations, the perag is not fully initialised, -	 * so we can't use it for any useful checking. growfs ensures we can't -	 * use it by using uncached buffers that don't have the perag attached -	 * so we can detect and avoid this problem. -	 */ -	if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) -		return __this_address; +	if (xfs_has_rmapbt(mp)) { +		if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length) +			return __this_address; -	if (xfs_has_lazysbcount(mp) && -	    be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) -		return __this_address; +		if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || +		    be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > +							mp->m_rmap_maxlevels) +			return __this_address; +	} -	if (xfs_has_reflink(mp) && -	    be32_to_cpu(agf->agf_refcount_blocks) > -	    be32_to_cpu(agf->agf_length)) -		return __this_address; +	if (xfs_has_reflink(mp)) { +		if (be32_to_cpu(agf->agf_refcount_blocks) > agf_length) +			return __this_address; -	if (xfs_has_reflink(mp) && -	    (be32_to_cpu(agf->agf_refcount_level) < 1 || -	     be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels)) -		return __this_address; +		if (be32_to_cpu(agf->agf_refcount_level) < 1 || +		    be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels) +			return __this_address; +	}  	return NULL;  } @@ -3226,7 +3309,7 @@ xfs_alloc_vextent_check_args(  static int  xfs_alloc_vextent_prepare_ag(  	struct xfs_alloc_arg	*args, -	uint32_t		flags) +	uint32_t		alloc_flags)  {  	bool			need_pag = !args->pag;  	int			error; @@ -3235,7 +3318,7 @@ xfs_alloc_vextent_prepare_ag(  		args->pag = xfs_perag_get(args->mp, args->agno);  	args->agbp = NULL; -	error = xfs_alloc_fix_freelist(args, flags); +	error = xfs_alloc_fix_freelist(args, alloc_flags);  	if (error) {  		trace_xfs_alloc_vextent_nofix(args);  		if (need_pag) @@ -3357,6 +3440,7 @@ xfs_alloc_vextent_this_ag(  {  	struct xfs_mount	*mp = args->mp;  	xfs_agnumber_t		minimum_agno; +	uint32_t		alloc_flags = 0;  	int			error;  	ASSERT(args->pag != NULL); @@ -3375,9 +3459,9 @@ xfs_alloc_vextent_this_ag(  		return error;  	} -	error = xfs_alloc_vextent_prepare_ag(args, 0); +	error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);  	if (!error && args->agbp) -		error = xfs_alloc_ag_vextent_size(args); +		error = xfs_alloc_ag_vextent_size(args, alloc_flags);  	return xfs_alloc_vextent_finish(args, minimum_agno, error, false);  } @@ -3406,20 +3490,20 @@ xfs_alloc_vextent_iterate_ags(  	xfs_agnumber_t		minimum_agno,  	xfs_agnumber_t		start_agno,  	xfs_agblock_t		target_agbno, -	uint32_t		flags) +	uint32_t		alloc_flags)  {  	struct xfs_mount	*mp = args->mp;  	xfs_agnumber_t		restart_agno = minimum_agno;  	xfs_agnumber_t		agno;  	int			error = 0; -	if (flags & XFS_ALLOC_FLAG_TRYLOCK) +	if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)  		restart_agno = 0;  restart:  	for_each_perag_wrap_range(mp, start_agno, restart_agno,  			mp->m_sb.sb_agcount, agno, args->pag) {  		args->agno = agno; -		error = xfs_alloc_vextent_prepare_ag(args, flags); +		error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);  		if (error)  			break;  		if (!args->agbp) { @@ -3433,10 +3517,10 @@ restart:  		 */  		if (args->agno == start_agno && target_agbno) {  			args->agbno = target_agbno; -			error = xfs_alloc_ag_vextent_near(args); +			error = xfs_alloc_ag_vextent_near(args, alloc_flags);  		} else {  			args->agbno = 0; -			error = xfs_alloc_ag_vextent_size(args); +			error = xfs_alloc_ag_vextent_size(args, alloc_flags);  		}  		break;  	} @@ -3453,8 +3537,8 @@ restart:  	 * constraining flags by the caller, drop them and retry the allocation  	 * without any constraints being set.  	 */ -	if (flags) { -		flags = 0; +	if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK) { +		alloc_flags &= ~XFS_ALLOC_FLAG_TRYLOCK;  		restart_agno = minimum_agno;  		goto restart;  	} @@ -3482,6 +3566,7 @@ xfs_alloc_vextent_start_ag(  	xfs_agnumber_t		start_agno;  	xfs_agnumber_t		rotorstep = xfs_rotorstep;  	bool			bump_rotor = false; +	uint32_t		alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;  	int			error;  	ASSERT(args->pag == NULL); @@ -3508,7 +3593,7 @@ xfs_alloc_vextent_start_ag(  	start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));  	error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno, -			XFS_FSB_TO_AGBNO(mp, target), XFS_ALLOC_FLAG_TRYLOCK); +			XFS_FSB_TO_AGBNO(mp, target), alloc_flags);  	if (bump_rotor) {  		if (args->agno == start_agno) @@ -3535,6 +3620,7 @@ xfs_alloc_vextent_first_ag(  	struct xfs_mount	*mp = args->mp;  	xfs_agnumber_t		minimum_agno;  	xfs_agnumber_t		start_agno; +	uint32_t		alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;  	int			error;  	ASSERT(args->pag == NULL); @@ -3553,7 +3639,7 @@ xfs_alloc_vextent_first_ag(  	start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));  	error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno, -			XFS_FSB_TO_AGBNO(mp, target), 0); +			XFS_FSB_TO_AGBNO(mp, target), alloc_flags);  	return xfs_alloc_vextent_finish(args, minimum_agno, error, true);  } @@ -3606,6 +3692,7 @@ xfs_alloc_vextent_near_bno(  	struct xfs_mount	*mp = args->mp;  	xfs_agnumber_t		minimum_agno;  	bool			needs_perag = args->pag == NULL; +	uint32_t		alloc_flags = 0;  	int			error;  	if (!needs_perag) @@ -3626,9 +3713,9 @@ xfs_alloc_vextent_near_bno(  	if (needs_perag)  		args->pag = xfs_perag_grab(mp, args->agno); -	error = xfs_alloc_vextent_prepare_ag(args, 0); +	error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);  	if (!error && args->agbp) -		error = xfs_alloc_ag_vextent_near(args); +		error = xfs_alloc_ag_vextent_near(args, alloc_flags);  	return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag);  } @@ -3756,15 +3843,11 @@ xfs_alloc_query_range(  	xfs_alloc_query_range_fn		fn,  	void					*priv)  { -	union xfs_btree_irec			low_brec; -	union xfs_btree_irec			high_brec; -	struct xfs_alloc_query_range_info	query; +	union xfs_btree_irec			low_brec = { .a = *low_rec }; +	union xfs_btree_irec			high_brec = { .a = *high_rec }; +	struct xfs_alloc_query_range_info	query = { .priv = priv, .fn = fn };  	ASSERT(cur->bc_btnum == XFS_BTNUM_BNO); -	low_brec.a = *low_rec; -	high_brec.a = *high_rec; -	query.priv = priv; -	query.fn = fn;  	return xfs_btree_query_range(cur, &low_brec, &high_brec,  			xfs_alloc_query_range_helper, &query);  } diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 85ac470be0da..6bb8d295c321 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -19,11 +19,12 @@ unsigned int xfs_agfl_size(struct xfs_mount *mp);  /*   * Flags for xfs_alloc_fix_freelist.   */ -#define	XFS_ALLOC_FLAG_TRYLOCK	0x00000001  /* use trylock for buffer locking */ -#define	XFS_ALLOC_FLAG_FREEING	0x00000002  /* indicate caller is freeing extents*/ -#define	XFS_ALLOC_FLAG_NORMAP	0x00000004  /* don't modify the rmapbt */ -#define	XFS_ALLOC_FLAG_NOSHRINK	0x00000008  /* don't shrink the freelist */ -#define	XFS_ALLOC_FLAG_CHECK	0x00000010  /* test only, don't modify args */ +#define	XFS_ALLOC_FLAG_TRYLOCK	(1U << 0)  /* use trylock for buffer locking */ +#define	XFS_ALLOC_FLAG_FREEING	(1U << 1)  /* indicate caller is freeing extents*/ +#define	XFS_ALLOC_FLAG_NORMAP	(1U << 2)  /* don't modify the rmapbt */ +#define	XFS_ALLOC_FLAG_NOSHRINK	(1U << 3)  /* don't shrink the freelist */ +#define	XFS_ALLOC_FLAG_CHECK	(1U << 4)  /* test only, don't modify args */ +#define	XFS_ALLOC_FLAG_TRYFLUSH	(1U << 5)  /* don't wait in busy extent flush */  /*   * Argument structure for xfs_alloc routines. @@ -195,7 +196,7 @@ int xfs_alloc_read_agfl(struct xfs_perag *pag, struct xfs_trans *tp,  		struct xfs_buf **bpp);  int xfs_free_agfl_block(struct xfs_trans *, xfs_agnumber_t, xfs_agblock_t,  			struct xfs_buf *, struct xfs_owner_info *); -int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags); +int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, uint32_t alloc_flags);  int xfs_free_extent_fix_freelist(struct xfs_trans *tp, struct xfs_perag *pag,  		struct xfs_buf **agbp); @@ -232,7 +233,7 @@ xfs_buf_to_agfl_bno(  int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,  		xfs_filblks_t len, const struct xfs_owner_info *oinfo, -		bool skip_discard); +		enum xfs_ag_resv_type type, bool skip_discard);  /*   * List of extents to be free "later". @@ -245,6 +246,7 @@ struct xfs_extent_free_item {  	xfs_extlen_t		xefi_blockcount;/* number of blocks in extent */  	struct xfs_perag	*xefi_pag;  	unsigned int		xefi_flags; +	enum xfs_ag_resv_type	xefi_agresv;  };  void xfs_extent_free_get_group(struct xfs_mount *mp, @@ -259,9 +261,10 @@ xfs_free_extent_later(  	struct xfs_trans		*tp,  	xfs_fsblock_t			bno,  	xfs_filblks_t			len, -	const struct xfs_owner_info	*oinfo) +	const struct xfs_owner_info	*oinfo, +	enum xfs_ag_resv_type		type)  { -	return __xfs_free_extent_later(tp, bno, len, oinfo, false); +	return __xfs_free_extent_later(tp, bno, len, oinfo, type, false);  } @@ -270,4 +273,7 @@ extern struct kmem_cache	*xfs_extfree_item_cache;  int __init xfs_extfree_intent_init_cache(void);  void xfs_extfree_intent_destroy_cache(void); +xfs_failaddr_t xfs_validate_ag_length(struct xfs_buf *bp, uint32_t seqno, +		uint32_t length); +  #endif	/* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index beee51ad75ce..2580ae47209a 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -2293,8 +2293,6 @@ xfs_attr3_leaf_unbalance(  	trace_xfs_attr_leaf_unbalance(state->args); -	drop_leaf = drop_blk->bp->b_addr; -	save_leaf = save_blk->bp->b_addr;  	xfs_attr3_leaf_hdr_from_disk(state->args->geo, &drophdr, drop_leaf);  	xfs_attr3_leaf_hdr_from_disk(state->args->geo, &savehdr, save_leaf);  	entry = xfs_attr3_leaf_entryp(drop_leaf); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index fef35696adb7..30c931b38853 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -574,7 +574,8 @@ xfs_bmap_btree_to_extents(  		return error;  	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); -	error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo); +	error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo, +			XFS_AG_RESV_NONE);  	if (error)  		return error; @@ -5236,8 +5237,9 @@ xfs_bmap_del_extent_real(  		} else {  			error = __xfs_free_extent_later(tp, del->br_startblock,  					del->br_blockcount, NULL, -					(bflags & XFS_BMAPI_NODISCARD) || -					del->br_state == XFS_EXT_UNWRITTEN); +					XFS_AG_RESV_NONE, +					((bflags & XFS_BMAPI_NODISCARD) || +					del->br_state == XFS_EXT_UNWRITTEN));  			if (error)  				goto done;  		} diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 36564ae3084f..bf3f1b36fdd2 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -271,7 +271,8 @@ xfs_bmbt_free_block(  	int			error;  	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); -	error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo); +	error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo, +			XFS_AG_RESV_NONE);  	if (error)  		return error; diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 34600f94c2f4..b83e54c70906 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1853,8 +1853,8 @@ xfs_difree_inode_chunk(  		/* not sparse, calculate extent info directly */  		return xfs_free_extent_later(tp,  				XFS_AGB_TO_FSB(mp, agno, sagbno), -				M_IGEO(mp)->ialloc_blks, -				&XFS_RMAP_OINFO_INODES); +				M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES, +				XFS_AG_RESV_NONE);  	}  	/* holemask is only 16-bits (fits in an unsigned long) */ @@ -1899,8 +1899,8 @@ xfs_difree_inode_chunk(  		ASSERT(agbno % mp->m_sb.sb_spino_align == 0);  		ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);  		error = xfs_free_extent_later(tp, -				XFS_AGB_TO_FSB(mp, agno, agbno), -				contigblk, &XFS_RMAP_OINFO_INODES); +				XFS_AGB_TO_FSB(mp, agno, agbno), contigblk, +				&XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE);  		if (error)  			return error; @@ -2486,11 +2486,14 @@ xfs_ialloc_log_agi(  static xfs_failaddr_t  xfs_agi_verify( -	struct xfs_buf	*bp) +	struct xfs_buf		*bp)  { -	struct xfs_mount *mp = bp->b_mount; -	struct xfs_agi	*agi = bp->b_addr; -	int		i; +	struct xfs_mount	*mp = bp->b_mount; +	struct xfs_agi		*agi = bp->b_addr; +	xfs_failaddr_t		fa; +	uint32_t		agi_seqno = be32_to_cpu(agi->agi_seqno); +	uint32_t		agi_length = be32_to_cpu(agi->agi_length); +	int			i;  	if (xfs_has_crc(mp)) {  		if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) @@ -2507,6 +2510,10 @@ xfs_agi_verify(  	if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))  		return __this_address; +	fa = xfs_validate_ag_length(bp, agi_seqno, agi_length); +	if (fa) +		return fa; +  	if (be32_to_cpu(agi->agi_level) < 1 ||  	    be32_to_cpu(agi->agi_level) > M_IGEO(mp)->inobt_maxlevels)  		return __this_address; @@ -2516,15 +2523,6 @@ xfs_agi_verify(  	     be32_to_cpu(agi->agi_free_level) > M_IGEO(mp)->inobt_maxlevels))  		return __this_address; -	/* -	 * during growfs operations, the perag is not fully initialised, -	 * so we can't use it for any useful checking. growfs ensures we can't -	 * use it by using uncached buffers that don't have the perag attached -	 * so we can detect and avoid this problem. -	 */ -	if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno) -		return __this_address; -  	for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {  		if (agi->agi_unlinked[i] == cpu_to_be32(NULLAGINO))  			continue; diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 5a945ae21b5d..9258f01c0015 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -160,8 +160,7 @@ __xfs_inobt_free_block(  	xfs_inobt_mod_blockcount(cur, -1);  	fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)); -	return xfs_free_extent(cur->bc_tp, cur->bc_ag.pag, -			XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1, +	return xfs_free_extent_later(cur->bc_tp, fsbno, 1,  			&XFS_RMAP_OINFO_INOBT, resv);  } diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index b6e21433925c..646b3fa362ad 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1152,7 +1152,8 @@ xfs_refcount_adjust_extents(  						cur->bc_ag.pag->pag_agno,  						tmp.rc_startblock);  				error = xfs_free_extent_later(cur->bc_tp, fsbno, -						  tmp.rc_blockcount, NULL); +						  tmp.rc_blockcount, NULL, +						  XFS_AG_RESV_NONE);  				if (error)  					goto out_error;  			} @@ -1213,7 +1214,8 @@ xfs_refcount_adjust_extents(  					cur->bc_ag.pag->pag_agno,  					ext.rc_startblock);  			error = xfs_free_extent_later(cur->bc_tp, fsbno, -					ext.rc_blockcount, NULL); +					ext.rc_blockcount, NULL, +					XFS_AG_RESV_NONE);  			if (error)  				goto out_error;  		} @@ -1919,8 +1921,13 @@ xfs_refcount_recover_cow_leftovers(  	struct xfs_buf			*agbp;  	struct xfs_refcount_recovery	*rr, *n;  	struct list_head		debris; -	union xfs_btree_irec		low; -	union xfs_btree_irec		high; +	union xfs_btree_irec		low = { +		.rc.rc_domain		= XFS_REFC_DOMAIN_COW, +	}; +	union xfs_btree_irec		high = { +		.rc.rc_domain		= XFS_REFC_DOMAIN_COW, +		.rc.rc_startblock	= -1U, +	};  	xfs_fsblock_t			fsb;  	int				error; @@ -1951,10 +1958,6 @@ xfs_refcount_recover_cow_leftovers(  	cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag);  	/* Find all the leftover CoW staging extents. */ -	memset(&low, 0, sizeof(low)); -	memset(&high, 0, sizeof(high)); -	low.rc.rc_domain = high.rc.rc_domain = XFS_REFC_DOMAIN_COW; -	high.rc.rc_startblock = -1U;  	error = xfs_btree_query_range(cur, &low, &high,  			xfs_refcount_recover_extent, &debris);  	xfs_btree_del_cursor(cur, error); @@ -1981,7 +1984,8 @@ xfs_refcount_recover_cow_leftovers(  		/* Free the block. */  		error = xfs_free_extent_later(tp, fsb, -				rr->rr_rrec.rc_blockcount, NULL); +				rr->rr_rrec.rc_blockcount, NULL, +				XFS_AG_RESV_NONE);  		if (error)  			goto out_trans; diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index d4afc5f4e6a5..5c3987d8dc24 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -106,19 +106,13 @@ xfs_refcountbt_free_block(  	struct xfs_buf		*agbp = cur->bc_ag.agbp;  	struct xfs_agf		*agf = agbp->b_addr;  	xfs_fsblock_t		fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); -	int			error;  	trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno,  			XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1);  	be32_add_cpu(&agf->agf_refcount_blocks, -1);  	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); -	error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag, -			XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1, +	return xfs_free_extent_later(cur->bc_tp, fsbno, 1,  			&XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA); -	if (error) -		return error; - -	return error;  }  STATIC int diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index f4dc23b3b837..fbb0b2637463 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -2389,14 +2389,10 @@ xfs_rmap_query_range(  	xfs_rmap_query_range_fn			fn,  	void					*priv)  { -	union xfs_btree_irec			low_brec; -	union xfs_btree_irec			high_brec; -	struct xfs_rmap_query_range_info	query; +	union xfs_btree_irec			low_brec = { .r = *low_rec }; +	union xfs_btree_irec			high_brec = { .r = *high_rec }; +	struct xfs_rmap_query_range_info	query = { .priv = priv, .fn = fn }; -	low_brec.r = *low_rec; -	high_brec.r = *high_rec; -	query.priv = priv; -	query.fn = fn;  	return xfs_btree_query_range(cur, &low_brec, &high_brec,  			xfs_rmap_query_range_helper, &query);  } diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index ba0f17bc1dc0..5e174685a77c 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -412,7 +412,6 @@ xfs_validate_sb_common(  	    sbp->sb_inodelog < XFS_DINODE_MIN_LOG			||  	    sbp->sb_inodelog > XFS_DINODE_MAX_LOG			||  	    sbp->sb_inodesize != (1 << sbp->sb_inodelog)		|| -	    sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE			||  	    sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||  	    XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES	||  	    XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES	|| @@ -430,6 +429,61 @@ xfs_validate_sb_common(  		return -EFSCORRUPTED;  	} +	/* +	 * Logs that are too large are not supported at all. Reject them +	 * outright. Logs that are too small are tolerated on v4 filesystems, +	 * but we can only check that when mounting the log. Hence we skip +	 * those checks here. +	 */ +	if (sbp->sb_logblocks > XFS_MAX_LOG_BLOCKS) { +		xfs_notice(mp, +		"Log size 0x%x blocks too large, maximum size is 0x%llx blocks", +			 sbp->sb_logblocks, XFS_MAX_LOG_BLOCKS); +		return -EFSCORRUPTED; +	} + +	if (XFS_FSB_TO_B(mp, sbp->sb_logblocks) > XFS_MAX_LOG_BYTES) { +		xfs_warn(mp, +		"log size 0x%llx bytes too large, maximum size is 0x%llx bytes", +			 XFS_FSB_TO_B(mp, sbp->sb_logblocks), +			 XFS_MAX_LOG_BYTES); +		return -EFSCORRUPTED; +	} + +	/* +	 * Do not allow filesystems with corrupted log sector or stripe units to +	 * be mounted. We cannot safely size the iclogs or write to the log if +	 * the log stripe unit is not valid. +	 */ +	if (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT) { +		if (sbp->sb_logsectsize != (1U << sbp->sb_logsectlog)) { +			xfs_notice(mp, +			"log sector size in bytes/log2 (0x%x/0x%x) must match", +				sbp->sb_logsectsize, 1U << sbp->sb_logsectlog); +			return -EFSCORRUPTED; +		} +	} else if (sbp->sb_logsectsize || sbp->sb_logsectlog) { +		xfs_notice(mp, +		"log sector size in bytes/log2 (0x%x/0x%x) are not zero", +			sbp->sb_logsectsize, sbp->sb_logsectlog); +		return -EFSCORRUPTED; +	} + +	if (sbp->sb_logsunit > 1) { +		if (sbp->sb_logsunit % sbp->sb_blocksize) { +			xfs_notice(mp, +		"log stripe unit 0x%x bytes must be a multiple of block size", +				sbp->sb_logsunit); +			return -EFSCORRUPTED; +		} +		if (sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE) { +			xfs_notice(mp, +		"log stripe unit 0x%x bytes over maximum size (0x%x bytes)", +				sbp->sb_logsunit, XLOG_MAX_RECORD_BSIZE); +			return -EFSCORRUPTED; +		} +	} +  	/* Validate the realtime geometry; stolen from xfs_repair */  	if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE ||  	    sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) { diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index f3d328e4a440..7c2fdc71e42d 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -566,20 +566,45 @@ xfs_extent_busy_clear(  /*   * Flush out all busy extents for this AG. + * + * If the current transaction is holding busy extents, the caller may not want + * to wait for committed busy extents to resolve. If we are being told just to + * try a flush or progress has been made since we last skipped a busy extent, + * return immediately to allow the caller to try again. + * + * If we are freeing extents, we might actually be holding the only free extents + * in the transaction busy list and the log force won't resolve that situation. + * In this case, we must return -EAGAIN to avoid a deadlock by informing the + * caller it needs to commit the busy extents it holds before retrying the + * extent free operation.   */ -void +int  xfs_extent_busy_flush( -	struct xfs_mount	*mp, +	struct xfs_trans	*tp,  	struct xfs_perag	*pag, -	unsigned		busy_gen) +	unsigned		busy_gen, +	uint32_t		alloc_flags)  {  	DEFINE_WAIT		(wait);  	int			error; -	error = xfs_log_force(mp, XFS_LOG_SYNC); +	error = xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);  	if (error) -		return; +		return error; + +	/* Avoid deadlocks on uncommitted busy extents. */ +	if (!list_empty(&tp->t_busy)) { +		if (alloc_flags & XFS_ALLOC_FLAG_TRYFLUSH) +			return 0; + +		if (busy_gen != READ_ONCE(pag->pagb_gen)) +			return 0; + +		if (alloc_flags & XFS_ALLOC_FLAG_FREEING) +			return -EAGAIN; +	} +	/* Wait for committed busy extents to resolve. */  	do {  		prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE);  		if  (busy_gen != READ_ONCE(pag->pagb_gen)) @@ -588,6 +613,7 @@ xfs_extent_busy_flush(  	} while (1);  	finish_wait(&pag->pagb_wait, &wait); +	return 0;  }  void diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h index 4a118131059f..c37bf87e6781 100644 --- a/fs/xfs/xfs_extent_busy.h +++ b/fs/xfs/xfs_extent_busy.h @@ -51,9 +51,9 @@ bool  xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t *bno,  		xfs_extlen_t *len, unsigned *busy_gen); -void -xfs_extent_busy_flush(struct xfs_mount *mp, struct xfs_perag *pag, -	unsigned busy_gen); +int +xfs_extent_busy_flush(struct xfs_trans *tp, struct xfs_perag *pag, +		unsigned busy_gen, uint32_t alloc_flags);  void  xfs_extent_busy_wait_all(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index f9e36b810663..f1a5ecf099aa 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -337,6 +337,34 @@ xfs_trans_get_efd(  }  /* + * Fill the EFD with all extents from the EFI when we need to roll the + * transaction and continue with a new EFI. + * + * This simply copies all the extents in the EFI to the EFD rather than make + * assumptions about which extents in the EFI have already been processed. We + * currently keep the xefi list in the same order as the EFI extent list, but + * that may not always be the case. Copying everything avoids leaving a landmine + * were we fail to cancel all the extents in an EFI if the xefi list is + * processed in a different order to the extents in the EFI. + */ +static void +xfs_efd_from_efi( +	struct xfs_efd_log_item	*efdp) +{ +	struct xfs_efi_log_item *efip = efdp->efd_efip; +	uint                    i; + +	ASSERT(efip->efi_format.efi_nextents > 0); +	ASSERT(efdp->efd_next_extent < efip->efi_format.efi_nextents); + +	for (i = 0; i < efip->efi_format.efi_nextents; i++) { +	       efdp->efd_format.efd_extents[i] = +		       efip->efi_format.efi_extents[i]; +	} +	efdp->efd_next_extent = efip->efi_format.efi_nextents; +} + +/*   * Free an extent and log it to the EFD. Note that the transaction is marked   * dirty regardless of whether the extent free succeeds or fails to support the   * EFI/EFD lifecycle rules. @@ -365,7 +393,7 @@ xfs_trans_free_extent(  			agbno, xefi->xefi_blockcount);  	error = __xfs_free_extent(tp, xefi->xefi_pag, agbno, -			xefi->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE, +			xefi->xefi_blockcount, &oinfo, xefi->xefi_agresv,  			xefi->xefi_flags & XFS_EFI_SKIP_DISCARD);  	/* @@ -378,6 +406,17 @@ xfs_trans_free_extent(  	tp->t_flags |= XFS_TRANS_DIRTY | XFS_TRANS_HAS_INTENT_DONE;  	set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); +	/* +	 * If we need a new transaction to make progress, the caller will log a +	 * new EFI with the current contents. It will also log an EFD to cancel +	 * the existing EFI, and so we need to copy all the unprocessed extents +	 * in this EFI to the EFD so this works correctly. +	 */ +	if (error == -EAGAIN) { +		xfs_efd_from_efi(efdp); +		return error; +	} +  	next_extent = efdp->efd_next_extent;  	ASSERT(next_extent < efdp->efd_format.efd_nextents);  	extp = &(efdp->efd_format.efd_extents[next_extent]); @@ -495,6 +534,13 @@ xfs_extent_free_finish_item(  	error = xfs_trans_free_extent(tp, EFD_ITEM(done), xefi); +	/* +	 * Don't free the XEFI if we need a new transaction to complete +	 * processing of it. +	 */ +	if (error == -EAGAIN) +		return error; +  	xfs_extent_free_put_group(xefi);  	kmem_cache_free(xfs_extfree_item_cache, xefi);  	return error; @@ -620,6 +666,7 @@ xfs_efi_item_recover(  	struct xfs_trans		*tp;  	int				i;  	int				error = 0; +	bool				requeue_only = false;  	/*  	 * First check the validity of the extents described by the @@ -644,6 +691,7 @@ xfs_efi_item_recover(  	for (i = 0; i < efip->efi_format.efi_nextents; i++) {  		struct xfs_extent_free_item	fake = {  			.xefi_owner		= XFS_RMAP_OWN_UNKNOWN, +			.xefi_agresv		= XFS_AG_RESV_NONE,  		};  		struct xfs_extent		*extp; @@ -652,9 +700,28 @@ xfs_efi_item_recover(  		fake.xefi_startblock = extp->ext_start;  		fake.xefi_blockcount = extp->ext_len; -		xfs_extent_free_get_group(mp, &fake); -		error = xfs_trans_free_extent(tp, efdp, &fake); -		xfs_extent_free_put_group(&fake); +		if (!requeue_only) { +			xfs_extent_free_get_group(mp, &fake); +			error = xfs_trans_free_extent(tp, efdp, &fake); +			xfs_extent_free_put_group(&fake); +		} + +		/* +		 * If we can't free the extent without potentially deadlocking, +		 * requeue the rest of the extents to a new so that they get +		 * run again later with a new transaction context. +		 */ +		if (error == -EAGAIN || requeue_only) { +			error = xfs_free_extent_later(tp, fake.xefi_startblock, +					fake.xefi_blockcount, +					&XFS_RMAP_OINFO_ANY_OWNER, +					fake.xefi_agresv); +			if (!error) { +				requeue_only = true; +				continue; +			} +		} +  		if (error == -EFSCORRUPTED)  			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,  					extp, sizeof(*extp)); diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 59e7d1a14b67..10403ba9b58f 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -160,9 +160,18 @@ struct xfs_getfsmap_info {  	struct xfs_buf		*agf_bp;	/* AGF, for refcount queries */  	struct xfs_perag	*pag;		/* AG info, if applicable */  	xfs_daddr_t		next_daddr;	/* next daddr we expect */ +	/* daddr of low fsmap key when we're using the rtbitmap */ +	xfs_daddr_t		low_daddr;  	u64			missing_owner;	/* owner of holes */  	u32			dev;		/* device id */ -	struct xfs_rmap_irec	low;		/* low rmap key */ +	/* +	 * Low rmap key for the query.  If low.rm_blockcount is nonzero, this +	 * is the second (or later) call to retrieve the recordset in pieces. +	 * xfs_getfsmap_rec_before_start will compare all records retrieved +	 * by the rmapbt query to filter out any records that start before +	 * the last record. +	 */ +	struct xfs_rmap_irec	low;  	struct xfs_rmap_irec	high;		/* high rmap key */  	bool			last;		/* last extent? */  }; @@ -237,16 +246,31 @@ xfs_getfsmap_format(  	xfs_fsmap_from_internal(rec, xfm);  } +static inline bool +xfs_getfsmap_rec_before_start( +	struct xfs_getfsmap_info	*info, +	const struct xfs_rmap_irec	*rec, +	xfs_daddr_t			rec_daddr) +{ +	if (info->low_daddr != -1ULL) +		return rec_daddr < info->low_daddr; +	if (info->low.rm_blockcount) +		return xfs_rmap_compare(rec, &info->low) < 0; +	return false; +} +  /*   * Format a reverse mapping for getfsmap, having translated rm_startblock - * into the appropriate daddr units. + * into the appropriate daddr units.  Pass in a nonzero @len_daddr if the + * length could be larger than rm_blockcount in struct xfs_rmap_irec.   */  STATIC int  xfs_getfsmap_helper(  	struct xfs_trans		*tp,  	struct xfs_getfsmap_info	*info,  	const struct xfs_rmap_irec	*rec, -	xfs_daddr_t			rec_daddr) +	xfs_daddr_t			rec_daddr, +	xfs_daddr_t			len_daddr)  {  	struct xfs_fsmap		fmr;  	struct xfs_mount		*mp = tp->t_mountp; @@ -256,12 +280,15 @@ xfs_getfsmap_helper(  	if (fatal_signal_pending(current))  		return -EINTR; +	if (len_daddr == 0) +		len_daddr = XFS_FSB_TO_BB(mp, rec->rm_blockcount); +  	/*  	 * Filter out records that start before our startpoint, if the  	 * caller requested that.  	 */ -	if (xfs_rmap_compare(rec, &info->low) < 0) { -		rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); +	if (xfs_getfsmap_rec_before_start(info, rec, rec_daddr)) { +		rec_daddr += len_daddr;  		if (info->next_daddr < rec_daddr)  			info->next_daddr = rec_daddr;  		return 0; @@ -280,7 +307,7 @@ xfs_getfsmap_helper(  		info->head->fmh_entries++; -		rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); +		rec_daddr += len_daddr;  		if (info->next_daddr < rec_daddr)  			info->next_daddr = rec_daddr;  		return 0; @@ -320,7 +347,7 @@ xfs_getfsmap_helper(  	if (error)  		return error;  	fmr.fmr_offset = XFS_FSB_TO_BB(mp, rec->rm_offset); -	fmr.fmr_length = XFS_FSB_TO_BB(mp, rec->rm_blockcount); +	fmr.fmr_length = len_daddr;  	if (rec->rm_flags & XFS_RMAP_UNWRITTEN)  		fmr.fmr_flags |= FMR_OF_PREALLOC;  	if (rec->rm_flags & XFS_RMAP_ATTR_FORK) @@ -337,7 +364,7 @@ xfs_getfsmap_helper(  	xfs_getfsmap_format(mp, &fmr, info);  out: -	rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); +	rec_daddr += len_daddr;  	if (info->next_daddr < rec_daddr)  		info->next_daddr = rec_daddr;  	return 0; @@ -358,7 +385,7 @@ xfs_getfsmap_datadev_helper(  	fsb = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno, rec->rm_startblock);  	rec_daddr = XFS_FSB_TO_DADDR(mp, fsb); -	return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr); +	return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr, 0);  }  /* Transform a bnobt irec into a fsmap */ @@ -382,7 +409,7 @@ xfs_getfsmap_datadev_bnobt_helper(  	irec.rm_offset = 0;  	irec.rm_flags = 0; -	return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr); +	return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr, 0);  }  /* Set rmap flags based on the getfsmap flags */ @@ -409,31 +436,25 @@ xfs_getfsmap_logdev(  {  	struct xfs_mount		*mp = tp->t_mountp;  	struct xfs_rmap_irec		rmap; -	int				error; +	xfs_daddr_t			rec_daddr, len_daddr; +	xfs_fsblock_t			start_fsb, end_fsb; +	uint64_t			eofs; -	/* Set up search keys */ -	info->low.rm_startblock = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical); -	info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); -	error = xfs_fsmap_owner_to_rmap(&info->low, keys); -	if (error) -		return error; -	info->low.rm_blockcount = 0; -	xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); +	eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); +	if (keys[0].fmr_physical >= eofs) +		return 0; +	start_fsb = XFS_BB_TO_FSBT(mp, +				keys[0].fmr_physical + keys[0].fmr_length); +	end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); -	error = xfs_fsmap_owner_to_rmap(&info->high, keys + 1); -	if (error) -		return error; -	info->high.rm_startblock = -1U; -	info->high.rm_owner = ULLONG_MAX; -	info->high.rm_offset = ULLONG_MAX; -	info->high.rm_blockcount = 0; -	info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS; -	info->missing_owner = XFS_FMR_OWN_FREE; +	/* Adjust the low key if we are continuing from where we left off. */ +	if (keys[0].fmr_length > 0) +		info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb); -	trace_xfs_fsmap_low_key(mp, info->dev, NULLAGNUMBER, &info->low); -	trace_xfs_fsmap_high_key(mp, info->dev, NULLAGNUMBER, &info->high); +	trace_xfs_fsmap_low_key_linear(mp, info->dev, start_fsb); +	trace_xfs_fsmap_high_key_linear(mp, info->dev, end_fsb); -	if (keys[0].fmr_physical > 0) +	if (start_fsb > 0)  		return 0;  	/* Fabricate an rmap entry for the external log device. */ @@ -443,7 +464,9 @@ xfs_getfsmap_logdev(  	rmap.rm_offset = 0;  	rmap.rm_flags = 0; -	return xfs_getfsmap_helper(tp, info, &rmap, 0); +	rec_daddr = XFS_FSB_TO_BB(mp, rmap.rm_startblock); +	len_daddr = XFS_FSB_TO_BB(mp, rmap.rm_blockcount); +	return xfs_getfsmap_helper(tp, info, &rmap, rec_daddr, len_daddr);  }  #ifdef CONFIG_XFS_RT @@ -457,72 +480,58 @@ xfs_getfsmap_rtdev_rtbitmap_helper(  {  	struct xfs_getfsmap_info	*info = priv;  	struct xfs_rmap_irec		irec; -	xfs_daddr_t			rec_daddr; +	xfs_rtblock_t			rtbno; +	xfs_daddr_t			rec_daddr, len_daddr; + +	rtbno = rec->ar_startext * mp->m_sb.sb_rextsize; +	rec_daddr = XFS_FSB_TO_BB(mp, rtbno); +	irec.rm_startblock = rtbno; + +	rtbno = rec->ar_extcount * mp->m_sb.sb_rextsize; +	len_daddr = XFS_FSB_TO_BB(mp, rtbno); +	irec.rm_blockcount = rtbno; -	irec.rm_startblock = rec->ar_startext * mp->m_sb.sb_rextsize; -	rec_daddr = XFS_FSB_TO_BB(mp, irec.rm_startblock); -	irec.rm_blockcount = rec->ar_extcount * mp->m_sb.sb_rextsize;  	irec.rm_owner = XFS_RMAP_OWN_NULL;	/* "free" */  	irec.rm_offset = 0;  	irec.rm_flags = 0; -	return xfs_getfsmap_helper(tp, info, &irec, rec_daddr); +	return xfs_getfsmap_helper(tp, info, &irec, rec_daddr, len_daddr);  } -/* Execute a getfsmap query against the realtime device. */ +/* Execute a getfsmap query against the realtime device rtbitmap. */  STATIC int -__xfs_getfsmap_rtdev( +xfs_getfsmap_rtdev_rtbitmap(  	struct xfs_trans		*tp,  	const struct xfs_fsmap		*keys, -	int				(*query_fn)(struct xfs_trans *, -						    struct xfs_getfsmap_info *),  	struct xfs_getfsmap_info	*info)  { + +	struct xfs_rtalloc_rec		alow = { 0 }; +	struct xfs_rtalloc_rec		ahigh = { 0 };  	struct xfs_mount		*mp = tp->t_mountp; -	xfs_fsblock_t			start_fsb; -	xfs_fsblock_t			end_fsb; +	xfs_rtblock_t			start_rtb; +	xfs_rtblock_t			end_rtb;  	uint64_t			eofs; -	int				error = 0; +	int				error; -	eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); +	eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rextents * mp->m_sb.sb_rextsize);  	if (keys[0].fmr_physical >= eofs)  		return 0; -	start_fsb = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical); -	end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); - -	/* Set up search keys */ -	info->low.rm_startblock = start_fsb; -	error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); -	if (error) -		return error; -	info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); -	info->low.rm_blockcount = 0; -	xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); +	start_rtb = XFS_BB_TO_FSBT(mp, +				keys[0].fmr_physical + keys[0].fmr_length); +	end_rtb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); -	info->high.rm_startblock = end_fsb; -	error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]); -	if (error) -		return error; -	info->high.rm_offset = XFS_BB_TO_FSBT(mp, keys[1].fmr_offset); -	info->high.rm_blockcount = 0; -	xfs_getfsmap_set_irec_flags(&info->high, &keys[1]); - -	trace_xfs_fsmap_low_key(mp, info->dev, NULLAGNUMBER, &info->low); -	trace_xfs_fsmap_high_key(mp, info->dev, NULLAGNUMBER, &info->high); +	info->missing_owner = XFS_FMR_OWN_UNKNOWN; -	return query_fn(tp, info); -} +	/* Adjust the low key if we are continuing from where we left off. */ +	if (keys[0].fmr_length > 0) { +		info->low_daddr = XFS_FSB_TO_BB(mp, start_rtb); +		if (info->low_daddr >= eofs) +			return 0; +	} -/* Actually query the realtime bitmap. */ -STATIC int -xfs_getfsmap_rtdev_rtbitmap_query( -	struct xfs_trans		*tp, -	struct xfs_getfsmap_info	*info) -{ -	struct xfs_rtalloc_rec		alow = { 0 }; -	struct xfs_rtalloc_rec		ahigh = { 0 }; -	struct xfs_mount		*mp = tp->t_mountp; -	int				error; +	trace_xfs_fsmap_low_key_linear(mp, info->dev, start_rtb); +	trace_xfs_fsmap_high_key_linear(mp, info->dev, end_rtb);  	xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); @@ -530,8 +539,8 @@ xfs_getfsmap_rtdev_rtbitmap_query(  	 * Set up query parameters to return free rtextents covering the range  	 * we want.  	 */ -	alow.ar_startext = info->low.rm_startblock; -	ahigh.ar_startext = info->high.rm_startblock; +	alow.ar_startext = start_rtb; +	ahigh.ar_startext = end_rtb;  	do_div(alow.ar_startext, mp->m_sb.sb_rextsize);  	if (do_div(ahigh.ar_startext, mp->m_sb.sb_rextsize))  		ahigh.ar_startext++; @@ -554,18 +563,6 @@ err:  	xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);  	return error;  } - -/* Execute a getfsmap query against the realtime device rtbitmap. */ -STATIC int -xfs_getfsmap_rtdev_rtbitmap( -	struct xfs_trans		*tp, -	const struct xfs_fsmap		*keys, -	struct xfs_getfsmap_info	*info) -{ -	info->missing_owner = XFS_FMR_OWN_UNKNOWN; -	return __xfs_getfsmap_rtdev(tp, keys, xfs_getfsmap_rtdev_rtbitmap_query, -			info); -}  #endif /* CONFIG_XFS_RT */  /* Execute a getfsmap query against the regular data device. */ @@ -606,9 +603,27 @@ __xfs_getfsmap_datadev(  	error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);  	if (error)  		return error; -	info->low.rm_blockcount = 0; +	info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length);  	xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); +	/* Adjust the low key if we are continuing from where we left off. */ +	if (info->low.rm_blockcount == 0) { +		/* empty */ +	} else if (XFS_RMAP_NON_INODE_OWNER(info->low.rm_owner) || +		   (info->low.rm_flags & (XFS_RMAP_ATTR_FORK | +					  XFS_RMAP_BMBT_BLOCK | +					  XFS_RMAP_UNWRITTEN))) { +		info->low.rm_startblock += info->low.rm_blockcount; +		info->low.rm_owner = 0; +		info->low.rm_offset = 0; + +		start_fsb += info->low.rm_blockcount; +		if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs) +			return 0; +	} else { +		info->low.rm_offset += info->low.rm_blockcount; +	} +  	info->high.rm_startblock = -1U;  	info->high.rm_owner = ULLONG_MAX;  	info->high.rm_offset = ULLONG_MAX; @@ -659,12 +674,8 @@ __xfs_getfsmap_datadev(  		 * Set the AG low key to the start of the AG prior to  		 * moving on to the next AG.  		 */ -		if (pag->pag_agno == start_ag) { -			info->low.rm_startblock = 0; -			info->low.rm_owner = 0; -			info->low.rm_offset = 0; -			info->low.rm_flags = 0; -		} +		if (pag->pag_agno == start_ag) +			memset(&info->low, 0, sizeof(info->low));  		/*  		 * If this is the last AG, report any gap at the end of it @@ -791,6 +802,19 @@ xfs_getfsmap_check_keys(  	struct xfs_fsmap		*low_key,  	struct xfs_fsmap		*high_key)  { +	if (low_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) { +		if (low_key->fmr_offset) +			return false; +	} +	if (high_key->fmr_flags != -1U && +	    (high_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | +				    FMR_OF_EXTENT_MAP))) { +		if (high_key->fmr_offset && high_key->fmr_offset != -1ULL) +			return false; +	} +	if (high_key->fmr_length && high_key->fmr_length != -1ULL) +		return false; +  	if (low_key->fmr_device > high_key->fmr_device)  		return false;  	if (low_key->fmr_device < high_key->fmr_device) @@ -834,15 +858,15 @@ xfs_getfsmap_check_keys(   * ----------------   * There are multiple levels of keys and counters at work here:   * xfs_fsmap_head.fmh_keys	-- low and high fsmap keys passed in; - * 				   these reflect fs-wide sector addrs. + *				   these reflect fs-wide sector addrs.   * dkeys			-- fmh_keys used to query each device; - * 				   these are fmh_keys but w/ the low key - * 				   bumped up by fmr_length. + *				   these are fmh_keys but w/ the low key + *				   bumped up by fmr_length.   * xfs_getfsmap_info.next_daddr	-- next disk addr we expect to see; this   *				   is how we detect gaps in the fsmap  				   records and report them.   * xfs_getfsmap_info.low/high	-- per-AG low/high keys computed from - * 				   dkeys; used to query the metadata. + *				   dkeys; used to query the metadata.   */  int  xfs_getfsmap( @@ -863,6 +887,8 @@ xfs_getfsmap(  	if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) ||  	    !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1]))  		return -EINVAL; +	if (!xfs_getfsmap_check_keys(&head->fmh_keys[0], &head->fmh_keys[1])) +		return -EINVAL;  	use_rmap = xfs_has_rmapbt(mp) &&  		   has_capability_noaudit(current, CAP_SYS_ADMIN); @@ -901,26 +927,15 @@ xfs_getfsmap(  	 * blocks could be mapped to several other files/offsets.  	 * According to rmapbt record ordering, the minimal next  	 * possible record for the block range is the next starting -	 * offset in the same inode. Therefore, bump the file offset to -	 * continue the search appropriately.  For all other low key -	 * mapping types (attr blocks, metadata), bump the physical -	 * offset as there can be no other mapping for the same physical -	 * block range. +	 * offset in the same inode. Therefore, each fsmap backend bumps +	 * the file offset to continue the search appropriately.  For +	 * all other low key mapping types (attr blocks, metadata), each +	 * fsmap backend bumps the physical offset as there can be no +	 * other mapping for the same physical block range.  	 */  	dkeys[0] = head->fmh_keys[0]; -	if (dkeys[0].fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) { -		dkeys[0].fmr_physical += dkeys[0].fmr_length; -		dkeys[0].fmr_owner = 0; -		if (dkeys[0].fmr_offset) -			return -EINVAL; -	} else -		dkeys[0].fmr_offset += dkeys[0].fmr_length; -	dkeys[0].fmr_length = 0;  	memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap)); -	if (!xfs_getfsmap_check_keys(dkeys, &head->fmh_keys[1])) -		return -EINVAL; -  	info.next_daddr = head->fmh_keys[0].fmr_physical +  			  head->fmh_keys[0].fmr_length;  	info.fsmap_recs = fsmap_recs; @@ -960,6 +975,8 @@ xfs_getfsmap(  		info.dev = handlers[i].dev;  		info.last = false;  		info.pag = NULL; +		info.low_daddr = -1ULL; +		info.low.rm_blockcount = 0;  		error = handlers[i].fn(tp, dkeys, &info);  		if (error)  			break; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index fc61cc024023..79004d193e54 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -639,7 +639,6 @@ xfs_log_mount(  	int		num_bblks)  {  	struct xlog	*log; -	bool		fatal = xfs_has_crc(mp);  	int		error = 0;  	int		min_logfsbs; @@ -663,53 +662,37 @@ xfs_log_mount(  	mp->m_log = log;  	/* -	 * Validate the given log space and drop a critical message via syslog -	 * if the log size is too small that would lead to some unexpected -	 * situations in transaction log space reservation stage. +	 * Now that we have set up the log and it's internal geometry +	 * parameters, we can validate the given log space and drop a critical +	 * message via syslog if the log size is too small. A log that is too +	 * small can lead to unexpected situations in transaction log space +	 * reservation stage. The superblock verifier has already validated all +	 * the other log geometry constraints, so we don't have to check those +	 * here.  	 * -	 * Note: we can't just reject the mount if the validation fails.  This -	 * would mean that people would have to downgrade their kernel just to -	 * remedy the situation as there is no way to grow the log (short of -	 * black magic surgery with xfs_db). +	 * Note: For v4 filesystems, we can't just reject the mount if the +	 * validation fails.  This would mean that people would have to +	 * downgrade their kernel just to remedy the situation as there is no +	 * way to grow the log (short of black magic surgery with xfs_db).  	 * -	 * We can, however, reject mounts for CRC format filesystems, as the +	 * We can, however, reject mounts for V5 format filesystems, as the  	 * mkfs binary being used to make the filesystem should never create a  	 * filesystem with a log that is too small.  	 */  	min_logfsbs = xfs_log_calc_minimum_size(mp); -  	if (mp->m_sb.sb_logblocks < min_logfsbs) {  		xfs_warn(mp,  		"Log size %d blocks too small, minimum size is %d blocks",  			 mp->m_sb.sb_logblocks, min_logfsbs); -		error = -EINVAL; -	} else if (mp->m_sb.sb_logblocks > XFS_MAX_LOG_BLOCKS) { -		xfs_warn(mp, -		"Log size %d blocks too large, maximum size is %lld blocks", -			 mp->m_sb.sb_logblocks, XFS_MAX_LOG_BLOCKS); -		error = -EINVAL; -	} else if (XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks) > XFS_MAX_LOG_BYTES) { -		xfs_warn(mp, -		"log size %lld bytes too large, maximum size is %lld bytes", -			 XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks), -			 XFS_MAX_LOG_BYTES); -		error = -EINVAL; -	} else if (mp->m_sb.sb_logsunit > 1 && -		   mp->m_sb.sb_logsunit % mp->m_sb.sb_blocksize) { -		xfs_warn(mp, -		"log stripe unit %u bytes must be a multiple of block size", -			 mp->m_sb.sb_logsunit); -		error = -EINVAL; -		fatal = true; -	} -	if (error) { +  		/*  		 * Log check errors are always fatal on v5; or whenever bad  		 * metadata leads to a crash.  		 */ -		if (fatal) { +		if (xfs_has_crc(mp)) {  			xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!");  			ASSERT(0); +			error = -EINVAL;  			goto out_free_log;  		}  		xfs_crit(mp, "Log size out of supported range."); diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c index c4078d0ec108..4a9bbd3fe120 100644 --- a/fs/xfs/xfs_notify_failure.c +++ b/fs/xfs/xfs_notify_failure.c @@ -114,7 +114,8 @@ xfs_dax_notify_ddev_failure(  	int			error = 0;  	xfs_fsblock_t		fsbno = XFS_DADDR_TO_FSB(mp, daddr);  	xfs_agnumber_t		agno = XFS_FSB_TO_AGNO(mp, fsbno); -	xfs_fsblock_t		end_fsbno = XFS_DADDR_TO_FSB(mp, daddr + bblen); +	xfs_fsblock_t		end_fsbno = XFS_DADDR_TO_FSB(mp, +							     daddr + bblen - 1);  	xfs_agnumber_t		end_agno = XFS_FSB_TO_AGNO(mp, end_fsbno);  	error = xfs_trans_alloc_empty(mp, &tp); @@ -210,7 +211,7 @@ xfs_dax_notify_failure(  	ddev_end = ddev_start + bdev_nr_bytes(mp->m_ddev_targp->bt_bdev) - 1;  	/* Ignore the range out of filesystem area */ -	if (offset + len < ddev_start) +	if (offset + len - 1 < ddev_start)  		return -ENXIO;  	if (offset > ddev_end)  		return -ENXIO; @@ -222,8 +223,8 @@ xfs_dax_notify_failure(  		len -= ddev_start - offset;  		offset = 0;  	} -	if (offset + len > ddev_end) -		len -= ddev_end - offset; +	if (offset + len - 1 > ddev_end) +		len = ddev_end - offset + 1;  	return xfs_dax_notify_ddev_failure(mp, BTOBB(offset), BTOBB(len),  			mf_flags); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index abcc559f3c64..eb9102453aff 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -617,7 +617,8 @@ xfs_reflink_cancel_cow_blocks(  					del.br_blockcount);  			error = xfs_free_extent_later(*tpp, del.br_startblock, -					  del.br_blockcount, NULL); +					del.br_blockcount, NULL, +					XFS_AG_RESV_NONE);  			if (error)  				break; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 4db669203149..f3cc204bb4bf 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3623,6 +3623,31 @@ DEFINE_FSMAP_EVENT(xfs_fsmap_low_key);  DEFINE_FSMAP_EVENT(xfs_fsmap_high_key);  DEFINE_FSMAP_EVENT(xfs_fsmap_mapping); +DECLARE_EVENT_CLASS(xfs_fsmap_linear_class, +	TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno), +	TP_ARGS(mp, keydev, bno), +	TP_STRUCT__entry( +		__field(dev_t, dev) +		__field(dev_t, keydev) +		__field(xfs_fsblock_t, bno) +	), +	TP_fast_assign( +		__entry->dev = mp->m_super->s_dev; +		__entry->keydev = new_decode_dev(keydev); +		__entry->bno = bno; +	), +	TP_printk("dev %d:%d keydev %d:%d bno 0x%llx", +		  MAJOR(__entry->dev), MINOR(__entry->dev), +		  MAJOR(__entry->keydev), MINOR(__entry->keydev), +		  __entry->bno) +) +#define DEFINE_FSMAP_LINEAR_EVENT(name) \ +DEFINE_EVENT(xfs_fsmap_linear_class, name, \ +	TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno), \ +	TP_ARGS(mp, keydev, bno)) +DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_low_key_linear); +DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_high_key_linear); +  DECLARE_EVENT_CLASS(xfs_getfsmap_class,  	TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap),  	TP_ARGS(mp, fsmap), diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 7d4109af193e..1098452e7f95 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -823,7 +823,7 @@ xfs_trans_ail_update_bulk(  			trace_xfs_ail_insert(lip, 0, lsn);  		}  		lip->li_lsn = lsn; -		list_add(&lip->li_ail, &tmp); +		list_add_tail(&lip->li_ail, &tmp);  	}  	if (!list_empty(&tmp)) | 
