diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-21 11:53:05 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-21 11:53:05 -0700 |
commit | 53d2e6976bd4042672ed7b90dfbf4b31635b7dcf (patch) | |
tree | 1d137e044c52433df6b4785fed1c7aafbb9333bc /fs/xfs/libxfs/xfs_bmap.c | |
parent | d407574e7948210223a7adca5ff26e3b0ec8143e (diff) | |
parent | 2cdb958aba6afbced5bc563167813b972b6acbfe (diff) |
Merge tag 'xfs-for-linus-4.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pull xfs updates from Dave Chinner:
"There's quite a lot in this request, and there's some cross-over with
ext4, dax and quota code due to the nature of the changes being made.
As for the rest of the XFS changes, there are lots of little things
all over the place, which add up to a lot of changes in the end.
The major changes are that we've reduced the size of the struct
xfs_inode by ~100 bytes (gives an inode cache footprint reduction of
>10%), the writepage code now only does a single set of mapping tree
lockups so uses less CPU, delayed allocation reservations won't
overrun under random write loads anymore, and we added compile time
verification for on-disk structure sizes so we find out when a commit
or platform/compiler change breaks the on disk structure as early as
possible.
Change summary:
- error propagation for direct IO failures fixes for both XFS and
ext4
- new quota interfaces and XFS implementation for iterating all the
quota IDs in the filesystem
- locking fixes for real-time device extent allocation
- reduction of duplicate information in the xfs and vfs inode, saving
roughly 100 bytes of memory per cached inode.
- buffer flag cleanup
- rework of the writepage code to use the generic write clustering
mechanisms
- several fixes for inode flag based DAX enablement
- rework of remount option parsing
- compile time verification of on-disk format structure sizes
- delayed allocation reservation overrun fixes
- lots of little error handling fixes
- small memory leak fixes
- enable xfsaild freezing again"
* tag 'xfs-for-linus-4.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (66 commits)
xfs: always set rvalp in xfs_dir2_node_trim_free
xfs: ensure committed is initialized in xfs_trans_roll
xfs: borrow indirect blocks from freed extent when available
xfs: refactor delalloc indlen reservation split into helper
xfs: update freeblocks counter after extent deletion
xfs: debug mode forced buffered write failure
xfs: remove impossible condition
xfs: check sizes of XFS on-disk structures at compile time
xfs: ioends require logically contiguous file offsets
xfs: use named array initializers for log item dumping
xfs: fix computation of inode btree maxlevels
xfs: reinitialise per-AG structures if geometry changes during recovery
xfs: remove xfs_trans_get_block_res
xfs: fix up inode32/64 (re)mount handling
xfs: fix format specifier , should be %llx and not %llu
xfs: sanitize remount options
xfs: convert mount option parsing to tokens
xfs: fix two memory leaks in xfs_attr_list.c error paths
xfs: XFS_DIFLAG2_DAX limited by PAGE_SIZE
xfs: dynamically switch modes when XFS_DIFLAG2_DAX is set/cleared
...
Diffstat (limited to 'fs/xfs/libxfs/xfs_bmap.c')
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 172 |
1 files changed, 120 insertions, 52 deletions
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index ef00156f4f96..041b6948aecc 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -477,10 +477,7 @@ xfs_bmap_check_leaf_extents( } block = XFS_BUF_TO_BLOCK(bp); } - if (bp_release) { - bp_release = 0; - xfs_trans_brelse(NULL, bp); - } + return; error0: @@ -912,7 +909,7 @@ xfs_bmap_local_to_extents( * We don't want to deal with the case of keeping inode data inline yet. * So sending the data fork of a regular inode is invalid. */ - ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK)); + ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK)); ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); @@ -1079,7 +1076,7 @@ xfs_bmap_add_attrfork_local( if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) return 0; - if (S_ISDIR(ip->i_d.di_mode)) { + if (S_ISDIR(VFS_I(ip)->i_mode)) { memset(&dargs, 0, sizeof(dargs)); dargs.geo = ip->i_mount->m_dir_geo; dargs.dp = ip; @@ -1091,7 +1088,7 @@ xfs_bmap_add_attrfork_local( return xfs_dir2_sf_to_block(&dargs); } - if (S_ISLNK(ip->i_d.di_mode)) + if (S_ISLNK(VFS_I(ip)->i_mode)) return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags, XFS_DATA_FORK, xfs_symlink_local_to_remote); @@ -4721,6 +4718,66 @@ error0: } /* + * When a delalloc extent is split (e.g., due to a hole punch), the original + * indlen reservation must be shared across the two new extents that are left + * behind. + * + * Given the original reservation and the worst case indlen for the two new + * extents (as calculated by xfs_bmap_worst_indlen()), split the original + * reservation fairly across the two new extents. If necessary, steal available + * blocks from a deleted extent to make up a reservation deficiency (e.g., if + * ores == 1). The number of stolen blocks is returned. The availability and + * subsequent accounting of stolen blocks is the responsibility of the caller. + */ +static xfs_filblks_t +xfs_bmap_split_indlen( + xfs_filblks_t ores, /* original res. */ + xfs_filblks_t *indlen1, /* ext1 worst indlen */ + xfs_filblks_t *indlen2, /* ext2 worst indlen */ + xfs_filblks_t avail) /* stealable blocks */ +{ + xfs_filblks_t len1 = *indlen1; + xfs_filblks_t len2 = *indlen2; + xfs_filblks_t nres = len1 + len2; /* new total res. */ + xfs_filblks_t stolen = 0; + + /* + * Steal as many blocks as we can to try and satisfy the worst case + * indlen for both new extents. + */ + while (nres > ores && avail) { + nres--; + avail--; + stolen++; + } + + /* + * The only blocks available are those reserved for the original + * extent and what we can steal from the extent being removed. + * If this still isn't enough to satisfy the combined + * requirements for the two new extents, skim blocks off of each + * of the new reservations until they match what is available. + */ + while (nres > ores) { + if (len1) { + len1--; + nres--; + } + if (nres == ores) + break; + if (len2) { + len2--; + nres--; + } + } + + *indlen1 = len1; + *indlen2 = len2; + + return stolen; +} + +/* * Called by xfs_bmapi to update file extent records and the btree * after removing space (or undoing a delayed allocation). */ @@ -4984,28 +5041,29 @@ xfs_bmap_del_extent( XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) + 1); } else { + xfs_filblks_t stolen; ASSERT(whichfork == XFS_DATA_FORK); - temp = xfs_bmap_worst_indlen(ip, temp); + + /* + * Distribute the original indlen reservation across the + * two new extents. Steal blocks from the deleted extent + * if necessary. Stealing blocks simply fudges the + * fdblocks accounting in xfs_bunmapi(). + */ + temp = xfs_bmap_worst_indlen(ip, got.br_blockcount); + temp2 = xfs_bmap_worst_indlen(ip, new.br_blockcount); + stolen = xfs_bmap_split_indlen(da_old, &temp, &temp2, + del->br_blockcount); + da_new = temp + temp2 - stolen; + del->br_blockcount -= stolen; + + /* + * Set the reservation for each extent. Warn if either + * is zero as this can lead to delalloc problems. + */ + WARN_ON_ONCE(!temp || !temp2); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - temp2 = xfs_bmap_worst_indlen(ip, temp2); new.br_startblock = nullstartblock((int)temp2); - da_new = temp + temp2; - while (da_new > da_old) { - if (temp) { - temp--; - da_new--; - xfs_bmbt_set_startblock(ep, - nullstartblock((int)temp)); - } - if (da_new == da_old) - break; - if (temp2) { - temp2--; - da_new--; - new.br_startblock = - nullstartblock((int)temp2); - } - } } trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); xfs_iext_insert(ip, *idx + 1, 1, &new, state); @@ -5210,7 +5268,7 @@ xfs_bunmapi( * This is better than zeroing it. */ ASSERT(del.br_state == XFS_EXT_NORM); - ASSERT(xfs_trans_get_block_res(tp) > 0); + ASSERT(tp->t_blk_res > 0); /* * If this spans a realtime extent boundary, * chop it back to the start of the one we end at. @@ -5241,7 +5299,7 @@ xfs_bunmapi( del.br_startblock += mod; } else if ((del.br_startoff == start && (del.br_state == XFS_EXT_UNWRITTEN || - xfs_trans_get_block_res(tp) == 0)) || + tp->t_blk_res == 0)) || !xfs_sb_version_hasextflgbit(&mp->m_sb)) { /* * Can't make it unwritten. There isn't @@ -5296,9 +5354,37 @@ xfs_bunmapi( goto nodelete; } } + + /* + * If it's the case where the directory code is running + * with no block reservation, and the deleted block is in + * the middle of its extent, and the resulting insert + * of an extent would cause transformation to btree format, + * then reject it. The calling code will then swap + * blocks around instead. + * We have to do this now, rather than waiting for the + * conversion to btree format, since the transaction + * will be dirty. + */ + if (!wasdel && tp->t_blk_res == 0 && + XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */ + XFS_IFORK_MAXEXT(ip, whichfork) && + del.br_startoff > got.br_startoff && + del.br_startoff + del.br_blockcount < + got.br_startoff + got.br_blockcount) { + error = -ENOSPC; + goto error0; + } + + /* + * Unreserve quota and update realtime free space, if + * appropriate. If delayed allocation, update the inode delalloc + * counter now and wait to update the sb counters as + * xfs_bmap_del_extent() might need to borrow some blocks. + */ if (wasdel) { ASSERT(startblockval(del.br_startblock) > 0); - /* Update realtime/data freespace, unreserve quota */ if (isrt) { xfs_filblks_t rtexts; @@ -5309,8 +5395,6 @@ xfs_bunmapi( ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_RTBLKS); } else { - xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, - false); (void)xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_REGBLKS); @@ -5321,32 +5405,16 @@ xfs_bunmapi( XFS_BTCUR_BPRV_WASDEL; } else if (cur) cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL; - /* - * If it's the case where the directory code is running - * with no block reservation, and the deleted block is in - * the middle of its extent, and the resulting insert - * of an extent would cause transformation to btree format, - * then reject it. The calling code will then swap - * blocks around instead. - * We have to do this now, rather than waiting for the - * conversion to btree format, since the transaction - * will be dirty. - */ - if (!wasdel && xfs_trans_get_block_res(tp) == 0 && - XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */ - XFS_IFORK_MAXEXT(ip, whichfork) && - del.br_startoff > got.br_startoff && - del.br_startoff + del.br_blockcount < - got.br_startoff + got.br_blockcount) { - error = -ENOSPC; - goto error0; - } + error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del, &tmp_logflags, whichfork); logflags |= tmp_logflags; if (error) goto error0; + + if (!isrt && wasdel) + xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false); + bno = del.br_startoff - 1; nodelete: /* |