summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/libxfs/xfs_ag.c22
-rw-r--r--fs/xfs/libxfs/xfs_ag.h16
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c15
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h12
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c84
-rw-r--r--fs/xfs/libxfs/xfs_defer.c6
-rw-r--r--fs/xfs/libxfs/xfs_defer.h1
-rw-r--r--fs/xfs/libxfs/xfs_format.h74
-rw-r--r--fs/xfs/libxfs/xfs_fs.h28
-rw-r--r--fs/xfs/libxfs/xfs_group.h33
-rw-r--r--fs/xfs/libxfs/xfs_health.h42
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h6
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h2
-rw-r--r--fs/xfs/libxfs/xfs_ondisk.h4
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c225
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.h114
-rw-r--r--fs/xfs/libxfs/xfs_rtgroup.c223
-rw-r--r--fs/xfs/libxfs/xfs_rtgroup.h98
-rw-r--r--fs/xfs/libxfs/xfs_sb.c232
-rw-r--r--fs/xfs/libxfs/xfs_sb.h6
-rw-r--r--fs/xfs/libxfs/xfs_shared.h4
-rw-r--r--fs/xfs/libxfs/xfs_types.c35
-rw-r--r--fs/xfs/scrub/agheader.c15
-rw-r--r--fs/xfs/scrub/agheader_repair.c4
-rw-r--r--fs/xfs/scrub/bmap.c16
-rw-r--r--fs/xfs/scrub/common.h2
-rw-r--r--fs/xfs/scrub/fscounters_repair.c9
-rw-r--r--fs/xfs/scrub/health.c32
-rw-r--r--fs/xfs/scrub/metapath.c92
-rw-r--r--fs/xfs/scrub/repair.c6
-rw-r--r--fs/xfs/scrub/repair.h3
-rw-r--r--fs/xfs/scrub/rgsuper.c84
-rw-r--r--fs/xfs/scrub/rtsummary.c5
-rw-r--r--fs/xfs/scrub/rtsummary_repair.c15
-rw-r--r--fs/xfs/scrub/scrub.c7
-rw-r--r--fs/xfs/scrub/scrub.h2
-rw-r--r--fs/xfs/scrub/stats.c1
-rw-r--r--fs/xfs/scrub/trace.h4
-rw-r--r--fs/xfs/xfs_bmap_item.c25
-rw-r--r--fs/xfs/xfs_bmap_util.c18
-rw-r--r--fs/xfs/xfs_buf_item_recover.c37
-rw-r--r--fs/xfs/xfs_discard.c187
-rw-r--r--fs/xfs/xfs_exchrange.c2
-rw-r--r--fs/xfs/xfs_extent_busy.c6
-rw-r--r--fs/xfs/xfs_extfree_item.c270
-rw-r--r--fs/xfs/xfs_health.c183
-rw-r--r--fs/xfs/xfs_ioctl.c39
-rw-r--r--fs/xfs/xfs_iomap.c13
-rw-r--r--fs/xfs/xfs_log_recover.c2
-rw-r--r--fs/xfs/xfs_mount.h15
-rw-r--r--fs/xfs/xfs_rtalloc.c573
-rw-r--r--fs/xfs/xfs_rtalloc.h6
-rw-r--r--fs/xfs/xfs_super.c12
-rw-r--r--fs/xfs/xfs_trace.h150
-rw-r--r--fs/xfs/xfs_trans.c64
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_buf.c25
59 files changed, 2699 insertions, 512 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 6814debac299..ed9b0dabc1f1 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -191,6 +191,7 @@ xfs-y += $(addprefix scrub/, \
xfs-$(CONFIG_XFS_ONLINE_SCRUB_STATS) += scrub/stats.o
xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
+ rgsuper.o \
rtbitmap.o \
rtsummary.o \
)
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 47e90dbb852b..b59cb461e096 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -205,9 +205,10 @@ xfs_update_last_ag_size(
if (!pag)
return -EFSCORRUPTED;
- pag->block_count = __xfs_ag_block_count(mp, prev_agcount - 1,
- mp->m_sb.sb_agcount, mp->m_sb.sb_dblocks);
- __xfs_agino_range(mp, pag->block_count, &pag->agino_min,
+ pag_group(pag)->xg_block_count = __xfs_ag_block_count(mp,
+ prev_agcount - 1, mp->m_sb.sb_agcount,
+ mp->m_sb.sb_dblocks);
+ __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min,
&pag->agino_max);
xfs_perag_rele(pag);
return 0;
@@ -241,9 +242,10 @@ xfs_perag_alloc(
/*
* Pre-calculated geometry
*/
- pag->block_count = __xfs_ag_block_count(mp, index, agcount, dblocks);
- pag->min_block = XFS_AGFL_BLOCK(mp);
- __xfs_agino_range(mp, pag->block_count, &pag->agino_min,
+ pag_group(pag)->xg_block_count = __xfs_ag_block_count(mp, index, agcount,
+ dblocks);
+ pag_group(pag)->xg_min_gbno = XFS_AGFL_BLOCK(mp) + 1;
+ __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min,
&pag->agino_max);
error = xfs_group_insert(mp, pag_group(pag), index, XG_TYPE_AG);
@@ -852,8 +854,8 @@ xfs_ag_shrink_space(
}
/* Update perag geometry */
- pag->block_count -= delta;
- __xfs_agino_range(mp, pag->block_count, &pag->agino_min,
+ pag_group(pag)->xg_block_count -= delta;
+ __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min,
&pag->agino_max);
xfs_ialloc_log_agi(*tpp, agibp, XFS_AGI_LENGTH);
@@ -924,8 +926,8 @@ xfs_ag_extend_space(
return error;
/* Update perag geometry */
- pag->block_count = be32_to_cpu(agf->agf_length);
- __xfs_agino_range(mp, pag->block_count, &pag->agino_min,
+ pag_group(pag)->xg_block_count = be32_to_cpu(agf->agf_length);
+ __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min,
&pag->agino_max);
return 0;
}
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 7290148fa6e6..1f24cfa27321 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -61,8 +61,6 @@ struct xfs_perag {
struct xfs_ag_resv pag_rmapbt_resv;
/* Precalculated geometry info */
- xfs_agblock_t block_count;
- xfs_agblock_t min_block;
xfs_agino_t agino_min;
xfs_agino_t agino_max;
@@ -220,11 +218,7 @@ void xfs_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno,
static inline bool
xfs_verify_agbno(struct xfs_perag *pag, xfs_agblock_t agbno)
{
- if (agbno >= pag->block_count)
- return false;
- if (agbno <= pag->min_block)
- return false;
- return true;
+ return xfs_verify_gbno(pag_group(pag), agbno);
}
static inline bool
@@ -233,13 +227,7 @@ xfs_verify_agbext(
xfs_agblock_t agbno,
xfs_agblock_t len)
{
- if (agbno + len <= agbno)
- return false;
-
- if (!xfs_verify_agbno(pag, agbno))
- return false;
-
- return xfs_verify_agbno(pag, agbno + len - 1);
+ return xfs_verify_gbext(pag_group(pag), agbno, len);
}
/*
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index a3bf7352a383..3d33e17f2e5c 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2648,8 +2648,17 @@ xfs_defer_extent_free(
ASSERT(!isnullstartblock(bno));
ASSERT(!(free_flags & ~XFS_FREE_EXTENT_ALL_FLAGS));
- if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
- return -EFSCORRUPTED;
+ if (free_flags & XFS_FREE_EXTENT_REALTIME) {
+ if (type != XFS_AG_RESV_NONE) {
+ ASSERT(type == XFS_AG_RESV_NONE);
+ return -EFSCORRUPTED;
+ }
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_rtbext(mp, bno, len)))
+ return -EFSCORRUPTED;
+ } else {
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
+ return -EFSCORRUPTED;
+ }
xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
GFP_KERNEL | __GFP_NOFAIL);
@@ -2658,6 +2667,8 @@ xfs_defer_extent_free(
xefi->xefi_agresv = type;
if (free_flags & XFS_FREE_EXTENT_SKIP_DISCARD)
xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
+ if (free_flags & XFS_FREE_EXTENT_REALTIME)
+ xefi->xefi_flags |= XFS_EFI_REALTIME;
if (oinfo) {
ASSERT(oinfo->oi_offset == 0);
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index efbde04fbbb1..50ef79a1ed41 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -237,7 +237,11 @@ int xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
/* Don't issue a discard for the blocks freed. */
#define XFS_FREE_EXTENT_SKIP_DISCARD (1U << 0)
-#define XFS_FREE_EXTENT_ALL_FLAGS (XFS_FREE_EXTENT_SKIP_DISCARD)
+/* Free blocks on the realtime device. */
+#define XFS_FREE_EXTENT_REALTIME (1U << 1)
+
+#define XFS_FREE_EXTENT_ALL_FLAGS (XFS_FREE_EXTENT_SKIP_DISCARD | \
+ XFS_FREE_EXTENT_REALTIME)
/*
* List of extents to be free "later".
@@ -257,6 +261,12 @@ struct xfs_extent_free_item {
#define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */
#define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */
#define XFS_EFI_CANCELLED (1U << 3) /* dont actually free the space */
+#define XFS_EFI_REALTIME (1U << 4) /* freeing realtime extent */
+
+static inline bool xfs_efi_is_realtime(const struct xfs_extent_free_item *xefi)
+{
+ return xefi->xefi_flags & XFS_EFI_REALTIME;
+}
struct xfs_alloc_autoreap {
struct xfs_defer_pending *dfp;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 9bfa8247854d..9052839305e2 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -40,6 +40,7 @@
#include "xfs_bmap_item.h"
#include "xfs_symlink_remote.h"
#include "xfs_inode_util.h"
+#include "xfs_rtgroup.h"
struct kmem_cache *xfs_bmap_intent_cache;
@@ -1426,6 +1427,24 @@ xfs_bmap_last_offset(
* Extent tree manipulation functions used during allocation.
*/
+static inline bool
+xfs_bmap_same_rtgroup(
+ struct xfs_inode *ip,
+ int whichfork,
+ struct xfs_bmbt_irec *left,
+ struct xfs_bmbt_irec *right)
+{
+ struct xfs_mount *mp = ip->i_mount;
+
+ if (xfs_ifork_is_realtime(ip, whichfork) && xfs_has_rtgroups(mp)) {
+ if (xfs_rtb_to_rgno(mp, left->br_startblock) !=
+ xfs_rtb_to_rgno(mp, right->br_startblock))
+ return false;
+ }
+
+ return true;
+}
+
/*
* Convert a delayed allocation to a real allocation.
*/
@@ -1495,7 +1514,8 @@ xfs_bmap_add_extent_delay_real(
LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
LEFT.br_state == new->br_state &&
- LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
+ LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
+ xfs_bmap_same_rtgroup(bma->ip, whichfork, &LEFT, new))
state |= BMAP_LEFT_CONTIG;
/*
@@ -1519,7 +1539,8 @@ xfs_bmap_add_extent_delay_real(
(BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
BMAP_RIGHT_FILLING) ||
LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
- <= XFS_MAX_BMBT_EXTLEN))
+ <= XFS_MAX_BMBT_EXTLEN) &&
+ xfs_bmap_same_rtgroup(bma->ip, whichfork, new, &RIGHT))
state |= BMAP_RIGHT_CONTIG;
error = 0;
@@ -2064,7 +2085,8 @@ xfs_bmap_add_extent_unwritten_real(
LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
LEFT.br_state == new->br_state &&
- LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
+ LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
+ xfs_bmap_same_rtgroup(ip, whichfork, &LEFT, new))
state |= BMAP_LEFT_CONTIG;
/*
@@ -2088,7 +2110,8 @@ xfs_bmap_add_extent_unwritten_real(
(BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
BMAP_RIGHT_FILLING) ||
LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
- <= XFS_MAX_BMBT_EXTLEN))
+ <= XFS_MAX_BMBT_EXTLEN) &&
+ xfs_bmap_same_rtgroup(ip, whichfork, new, &RIGHT))
state |= BMAP_RIGHT_CONTIG;
/*
@@ -2597,7 +2620,8 @@ xfs_bmap_add_extent_hole_delay(
*/
if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
left.br_startoff + left.br_blockcount == new->br_startoff &&
- left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
+ left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
+ xfs_bmap_same_rtgroup(ip, whichfork, &left, new))
state |= BMAP_LEFT_CONTIG;
if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
@@ -2605,7 +2629,8 @@ xfs_bmap_add_extent_hole_delay(
new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
(!(state & BMAP_LEFT_CONTIG) ||
(left.br_blockcount + new->br_blockcount +
- right.br_blockcount <= XFS_MAX_BMBT_EXTLEN)))
+ right.br_blockcount <= XFS_MAX_BMBT_EXTLEN)) &&
+ xfs_bmap_same_rtgroup(ip, whichfork, new, &right))
state |= BMAP_RIGHT_CONTIG;
/*
@@ -2748,7 +2773,8 @@ xfs_bmap_add_extent_hole_real(
left.br_startoff + left.br_blockcount == new->br_startoff &&
left.br_startblock + left.br_blockcount == new->br_startblock &&
left.br_state == new->br_state &&
- left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
+ left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
+ xfs_bmap_same_rtgroup(ip, whichfork, &left, new))
state |= BMAP_LEFT_CONTIG;
if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
@@ -2758,7 +2784,8 @@ xfs_bmap_add_extent_hole_real(
new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
(!(state & BMAP_LEFT_CONTIG) ||
left.br_blockcount + new->br_blockcount +
- right.br_blockcount <= XFS_MAX_BMBT_EXTLEN))
+ right.br_blockcount <= XFS_MAX_BMBT_EXTLEN) &&
+ xfs_bmap_same_rtgroup(ip, whichfork, new, &right))
state |= BMAP_RIGHT_CONTIG;
error = 0;
@@ -3124,8 +3151,15 @@ xfs_bmap_adjacent_valid(
struct xfs_mount *mp = ap->ip->i_mount;
if (XFS_IS_REALTIME_INODE(ap->ip) &&
- (ap->datatype & XFS_ALLOC_USERDATA))
- return x < mp->m_sb.sb_rblocks;
+ (ap->datatype & XFS_ALLOC_USERDATA)) {
+ if (!xfs_has_rtgroups(mp))
+ return x < mp->m_sb.sb_rblocks;
+
+ return xfs_rtb_to_rgno(mp, x) == xfs_rtb_to_rgno(mp, y) &&
+ xfs_rtb_to_rgno(mp, x) < mp->m_sb.sb_rgcount &&
+ xfs_rtb_to_rtx(mp, x) < mp->m_sb.sb_rgextents;
+
+ }
return XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) &&
XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount &&
@@ -5356,9 +5390,11 @@ xfs_bmap_del_extent_real(
* If we need to, add to list of extents to delete.
*/
if (!(bflags & XFS_BMAPI_REMAP)) {
+ bool isrt = xfs_ifork_is_realtime(ip, whichfork);
+
if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
xfs_refcount_decrease_extent(tp, del);
- } else if (xfs_ifork_is_realtime(ip, whichfork)) {
+ } else if (isrt && !xfs_has_rtgroups(mp)) {
error = xfs_bmap_free_rtblocks(tp, del);
} else {
unsigned int efi_flags = 0;
@@ -5367,6 +5403,19 @@ xfs_bmap_del_extent_real(
del->br_state == XFS_EXT_UNWRITTEN)
efi_flags |= XFS_FREE_EXTENT_SKIP_DISCARD;
+ /*
+ * Historically, we did not use EFIs to free realtime
+ * extents. However, when reverse mapping is enabled,
+ * we must maintain the same order of operations as the
+ * data device, which is: Remove the file mapping,
+ * remove the reverse mapping, and then free the
+ * blocks. Reflink for realtime volumes requires the
+ * same sort of ordering. Both features rely on
+ * rtgroups, so let's gate rt EFI usage on rtgroups.
+ */
+ if (isrt)
+ efi_flags |= XFS_FREE_EXTENT_REALTIME;
+
error = xfs_free_extent_later(tp, del->br_startblock,
del->br_blockcount, NULL,
XFS_AG_RESV_NONE, efi_flags);
@@ -5715,6 +5764,8 @@ xfs_bunmapi(
*/
STATIC bool
xfs_bmse_can_merge(
+ struct xfs_inode *ip,
+ int whichfork,
struct xfs_bmbt_irec *left, /* preceding extent */
struct xfs_bmbt_irec *got, /* current extent to shift */
xfs_fileoff_t shift) /* shift fsb */
@@ -5730,7 +5781,8 @@ xfs_bmse_can_merge(
if ((left->br_startoff + left->br_blockcount != startoff) ||
(left->br_startblock + left->br_blockcount != got->br_startblock) ||
(left->br_state != got->br_state) ||
- (left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN))
+ (left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN) ||
+ !xfs_bmap_same_rtgroup(ip, whichfork, left, got))
return false;
return true;
@@ -5766,7 +5818,7 @@ xfs_bmse_merge(
blockcount = left->br_blockcount + got->br_blockcount;
xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
- ASSERT(xfs_bmse_can_merge(left, got, shift));
+ ASSERT(xfs_bmse_can_merge(ip, whichfork, left, got, shift));
new = *left;
new.br_blockcount = blockcount;
@@ -5928,7 +5980,8 @@ xfs_bmap_collapse_extents(
goto del_cursor;
}
- if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
+ if (xfs_bmse_can_merge(ip, whichfork, &prev, &got,
+ offset_shift_fsb)) {
error = xfs_bmse_merge(tp, ip, whichfork,
offset_shift_fsb, &icur, &got, &prev,
cur, &logflags);
@@ -6064,7 +6117,8 @@ xfs_bmap_insert_extents(
* never find mergeable extents in this scenario. Check anyways
* and warn if we encounter two extents that could be one.
*/
- if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
+ if (xfs_bmse_can_merge(ip, whichfork, &got, &next,
+ offset_shift_fsb))
WARN_ON_ONCE(1);
}
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 2cd212ad2c1d..5b377cbbb1f7 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -846,6 +846,12 @@ xfs_defer_add(
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
+ if (!ops->finish_item) {
+ ASSERT(ops->finish_item != NULL);
+ xfs_force_shutdown(tp->t_mountp, SHUTDOWN_CORRUPT_INCORE);
+ return NULL;
+ }
+
dfp = xfs_defer_find_last(tp, ops);
if (!dfp || !xfs_defer_can_append(dfp, ops))
dfp = xfs_defer_alloc(&tp->t_dfops, ops);
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 8b338031e487..ec51b8465e61 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -71,6 +71,7 @@ extern const struct xfs_defer_op_type xfs_refcount_update_defer_type;
extern const struct xfs_defer_op_type xfs_rmap_update_defer_type;
extern const struct xfs_defer_op_type xfs_extent_free_defer_type;
extern const struct xfs_defer_op_type xfs_agfl_free_defer_type;
+extern const struct xfs_defer_op_type xfs_rtextent_free_defer_type;
extern const struct xfs_defer_op_type xfs_attr_defer_type;
extern const struct xfs_defer_op_type xfs_exchmaps_defer_type;
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 867060d60e85..d6c10855ab02 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -179,6 +179,9 @@ typedef struct xfs_sb {
xfs_rgnumber_t sb_rgcount; /* number of realtime groups */
xfs_rtxlen_t sb_rgextents; /* size of a realtime group in rtx */
+ uint8_t sb_rgblklog; /* rt group number shift */
+ uint8_t sb_pad[7]; /* zeroes */
+
/* must be padded to 64 bit alignment */
} xfs_sb_t;
@@ -265,8 +268,18 @@ struct xfs_dsb {
uuid_t sb_meta_uuid; /* metadata file system unique id */
__be64 sb_metadirino; /* metadata directory tree root */
+ __be32 sb_rgcount; /* # of realtime groups */
+ __be32 sb_rgextents; /* size of rtgroup in rtx */
- /* must be padded to 64 bit alignment */
+ __u8 sb_rgblklog; /* rt group number shift */
+ __u8 sb_pad[7]; /* zeroes */
+
+ /*
+ * The size of this structure must be padded to 64 bit alignment.
+ *
+ * NOTE: Don't forget to update secondary_sb_whack in xfs_repair when
+ * adding new fields here.
+ */
};
#define XFS_SB_CRC_OFF offsetof(struct xfs_dsb, sb_crc)
@@ -702,21 +715,58 @@ struct xfs_agfl {
/*
* Realtime bitmap information is accessed by the word, which is currently
- * stored in host-endian format.
+ * stored in host-endian format. Starting with the realtime groups feature,
+ * the words are stored in be32 ondisk.
*/
union xfs_rtword_raw {
__u32 old;
+ __be32 rtg;
};
/*
* Realtime summary counts are accessed by the word, which is currently
- * stored in host-endian format.
+ * stored in host-endian format. Starting with the realtime groups feature,
+ * the words are stored in be32 ondisk.
*/
union xfs_suminfo_raw {
__u32 old;
+ __be32 rtg;
};
/*
+ * Realtime allocation groups break the rt section into multiple pieces that
+ * could be locked independently. Realtime block group numbers are 32-bit
+ * quantities. Block numbers within a group are also 32-bit quantities, but
+ * the upper bit must never be set. rtgroup 0 might have a superblock in it,
+ * so the minimum size of an rtgroup is 2 rtx.
+ */
+#define XFS_MAX_RGBLOCKS ((xfs_rgblock_t)(1U << 31) - 1)
+#define XFS_MIN_RGEXTENTS ((xfs_rtxlen_t)2)
+#define XFS_MAX_RGNUMBER ((xfs_rgnumber_t)(-1U))
+
+#define XFS_RTSB_MAGIC 0x46726F67 /* 'Frog' */
+
+/*
+ * Realtime superblock - on disk version. Must be padded to 64 bit alignment.
+ * The first block of the realtime volume contains this superblock.
+ */
+struct xfs_rtsb {
+ __be32 rsb_magicnum; /* magic number == XFS_RTSB_MAGIC */
+ __le32 rsb_crc; /* superblock crc */
+
+ __be32 rsb_pad; /* zero */
+ unsigned char rsb_fname[XFSLABEL_MAX]; /* file system name */
+
+ uuid_t rsb_uuid; /* user-visible file system unique id */
+ uuid_t rsb_meta_uuid; /* metadata file system unique id */
+
+ /* must be padded to 64 bit alignment */
+};
+
+#define XFS_RTSB_CRC_OFF offsetof(struct xfs_rtsb, rsb_crc)
+#define XFS_RTSB_DADDR ((xfs_daddr_t)0) /* daddr in rt section */
+
+/*
* XFS Timestamps
* ==============
*
@@ -1239,6 +1289,24 @@ static inline bool xfs_dinode_is_metadir(const struct xfs_dinode *dip)
#define XFS_MIN_RTEXTSIZE (4 * 1024) /* 4kB */
/*
+ * RT bit manipulation macros.
+ */
+#define XFS_RTBITMAP_MAGIC 0x424D505A /* BMPZ */
+#define XFS_RTSUMMARY_MAGIC 0x53554D59 /* SUMY */
+
+struct xfs_rtbuf_blkinfo {
+ __be32 rt_magic; /* validity check on block */
+ __be32 rt_crc; /* CRC of block */
+ __be64 rt_owner; /* inode that owns the block */
+ __be64 rt_blkno; /* first block of the buffer */
+ __be64 rt_lsn; /* sequence number of last write */
+ uuid_t rt_uuid; /* filesystem we belong to */
+};
+
+#define XFS_RTBUF_CRC_OFF \
+ offsetof(struct xfs_rtbuf_blkinfo, rt_crc)
+
+/*
* Dquot and dquot block format definitions
*/
#define XFS_DQUOT_MAGIC 0x4451 /* 'DQ' */
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index faa38a7d1eb0..96f7d3c95fb4 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -187,7 +187,9 @@ struct xfs_fsop_geom {
__u32 logsunit; /* log stripe unit, bytes */
uint32_t sick; /* o: unhealthy fs & rt metadata */
uint32_t checked; /* o: checked fs & rt metadata */
- __u64 reserved[17]; /* reserved space */
+ __u32 rgextents; /* rt extents in a realtime group */
+ __u32 rgcount; /* number of realtime groups */
+ __u64 reserved[16]; /* reserved space */
};
#define XFS_FSOP_GEOM_SICK_COUNTERS (1 << 0) /* summary counters */
@@ -734,9 +736,10 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_HEALTHY 27 /* everything checked out ok */
#define XFS_SCRUB_TYPE_DIRTREE 28 /* directory tree structure */
#define XFS_SCRUB_TYPE_METAPATH 29 /* metadata directory tree paths */
+#define XFS_SCRUB_TYPE_RGSUPER 30 /* realtime superblock */
/* Number of scrub subcommands. */
-#define XFS_SCRUB_TYPE_NR 30
+#define XFS_SCRUB_TYPE_NR 31
/*
* This special type code only applies to the vectored scrub implementation.
@@ -819,9 +822,12 @@ struct xfs_scrub_vec_head {
* path checking.
*/
#define XFS_SCRUB_METAPATH_PROBE (0) /* do we have a metapath scrubber? */
+#define XFS_SCRUB_METAPATH_RTDIR (1) /* rtrgroups metadir */
+#define XFS_SCRUB_METAPATH_RTBITMAP (2) /* per-rtg bitmap */
+#define XFS_SCRUB_METAPATH_RTSUMMARY (3) /* per-rtg summary */
/* Number of metapath sm_ino values */
-#define XFS_SCRUB_METAPATH_NR (1)
+#define XFS_SCRUB_METAPATH_NR (4)
/*
* ioctl limits
@@ -970,6 +976,21 @@ struct xfs_getparents_by_handle {
};
/*
+ * Output for XFS_IOC_RTGROUP_GEOMETRY
+ */
+struct xfs_rtgroup_geometry {
+ __u32 rg_number; /* i/o: rtgroup number */
+ __u32 rg_length; /* o: length in blocks */
+ __u32 rg_sick; /* o: sick things in ag */
+ __u32 rg_checked; /* o: checked metadata in ag */
+ __u32 rg_flags; /* i/o: flags for this ag */
+ __u32 rg_reserved[27]; /* o: zero */
+};
+#define XFS_RTGROUP_GEOM_SICK_SUPER (1U << 0) /* superblock */
+#define XFS_RTGROUP_GEOM_SICK_BITMAP (1U << 1) /* rtbitmap */
+#define XFS_RTGROUP_GEOM_SICK_SUMMARY (1U << 2) /* rtsummary */
+
+/*
* ioctl commands that are used by Linux filesystems
*/
#define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS
@@ -1007,6 +1028,7 @@ struct xfs_getparents_by_handle {
#define XFS_IOC_GETPARENTS _IOWR('X', 62, struct xfs_getparents)
#define XFS_IOC_GETPARENTS_BY_HANDLE _IOWR('X', 63, struct xfs_getparents_by_handle)
#define XFS_IOC_SCRUBV_METADATA _IOWR('X', 64, struct xfs_scrub_vec_head)
+#define XFS_IOC_RTGROUP_GEOMETRY _IOWR('X', 65, struct xfs_rtgroup_geometry)
/*
* ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/libxfs/xfs_group.h b/fs/xfs/libxfs/xfs_group.h
index 5b7362277c3f..242b05627c7a 100644
--- a/fs/xfs/libxfs/xfs_group.h
+++ b/fs/xfs/libxfs/xfs_group.h
@@ -12,6 +12,10 @@ struct xfs_group {
atomic_t xg_ref; /* passive reference count */
atomic_t xg_active_ref; /* active reference count */
+ /* Precalculated geometry info */
+ uint32_t xg_block_count; /* max usable gbno */
+ uint32_t xg_min_gbno; /* min usable gbno */
+
#ifdef __KERNEL__
/* -- kernel only structures below this line -- */
@@ -128,4 +132,33 @@ xfs_fsb_to_gbno(
return fsbno & mp->m_groups[type].blkmask;
}
+static inline bool
+xfs_verify_gbno(
+ struct xfs_group *xg,
+ uint32_t gbno)
+{
+ if (gbno >= xg->xg_block_count)
+ return false;
+ if (gbno < xg->xg_min_gbno)
+ return false;
+ return true;
+}
+
+static inline bool
+xfs_verify_gbext(
+ struct xfs_group *xg,
+ uint32_t gbno,
+ uint32_t glen)
+{
+ uint32_t end;
+
+ if (!xfs_verify_gbno(xg, gbno))
+ return false;
+ if (glen == 0 || check_add_overflow(gbno, glen - 1, &end))
+ return false;
+ if (!xfs_verify_gbno(xg, end))
+ return false;
+ return true;
+}
+
#endif /* __LIBXFS_GROUP_H */
diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
index a23df94319e5..d34986ac18c3 100644
--- a/fs/xfs/libxfs/xfs_health.h
+++ b/fs/xfs/libxfs/xfs_health.h
@@ -54,6 +54,7 @@ struct xfs_inode;
struct xfs_fsop_geom;
struct xfs_btree_cur;
struct xfs_da_args;
+struct xfs_rtgroup;
/* Observable health issues for metadata spanning the entire filesystem. */
#define XFS_SICK_FS_COUNTERS (1 << 0) /* summary counters */
@@ -65,9 +66,10 @@ struct xfs_da_args;
#define XFS_SICK_FS_METADIR (1 << 6) /* metadata directory tree */
#define XFS_SICK_FS_METAPATH (1 << 7) /* metadata directory tree path */
-/* Observable health issues for realtime volume metadata. */
-#define XFS_SICK_RT_BITMAP (1 << 0) /* realtime bitmap */
-#define XFS_SICK_RT_SUMMARY (1 << 1) /* realtime summary */
+/* Observable health issues for realtime group metadata. */
+#define XFS_SICK_RG_SUPER (1 << 0) /* rt group superblock */
+#define XFS_SICK_RG_BITMAP (1 << 1) /* rt group bitmap */
+#define XFS_SICK_RG_SUMMARY (1 << 2) /* rt groups summary */
/* Observable health issues for AG metadata. */
#define XFS_SICK_AG_SB (1 << 0) /* superblock */
@@ -111,8 +113,9 @@ struct xfs_da_args;
XFS_SICK_FS_METADIR | \
XFS_SICK_FS_METAPATH)
-#define XFS_SICK_RT_PRIMARY (XFS_SICK_RT_BITMAP | \
- XFS_SICK_RT_SUMMARY)
+#define XFS_SICK_RG_PRIMARY (XFS_SICK_RG_SUPER | \
+ XFS_SICK_RG_BITMAP | \
+ XFS_SICK_RG_SUMMARY)
#define XFS_SICK_AG_PRIMARY (XFS_SICK_AG_SB | \
XFS_SICK_AG_AGF | \
@@ -142,26 +145,26 @@ struct xfs_da_args;
/* Secondary state related to (but not primary evidence of) health problems. */
#define XFS_SICK_FS_SECONDARY (0)
-#define XFS_SICK_RT_SECONDARY (0)
+#define XFS_SICK_RG_SECONDARY (0)
#define XFS_SICK_AG_SECONDARY (0)
#define XFS_SICK_INO_SECONDARY (XFS_SICK_INO_FORGET)
/* Evidence of health problems elsewhere. */
#define XFS_SICK_FS_INDIRECT (0)
-#define XFS_SICK_RT_INDIRECT (0)
+#define XFS_SICK_RG_INDIRECT (0)
#define XFS_SICK_AG_INDIRECT (XFS_SICK_AG_INODES)
#define XFS_SICK_INO_INDIRECT (0)
/* All health masks. */
-#define XFS_SICK_FS_ALL (XFS_SICK_FS_PRIMARY | \
+#define XFS_SICK_FS_ALL (XFS_SICK_FS_PRIMARY | \
XFS_SICK_FS_SECONDARY | \
XFS_SICK_FS_INDIRECT)
-#define XFS_SICK_RT_ALL (XFS_SICK_RT_PRIMARY | \
- XFS_SICK_RT_SECONDARY | \
- XFS_SICK_RT_INDIRECT)
+#define XFS_SICK_RG_ALL (XFS_SICK_RG_PRIMARY | \
+ XFS_SICK_RG_SECONDARY | \
+ XFS_SICK_RG_INDIRECT)
-#define XFS_SICK_AG_ALL (XFS_SICK_AG_PRIMARY | \
+#define XFS_SICK_AG_ALL (XFS_SICK_AG_PRIMARY | \
XFS_SICK_AG_SECONDARY | \
XFS_SICK_AG_INDIRECT)
@@ -195,11 +198,8 @@ void xfs_fs_mark_healthy(struct xfs_mount *mp, unsigned int mask);
void xfs_fs_measure_sickness(struct xfs_mount *mp, unsigned int *sick,
unsigned int *checked);
-void xfs_rt_mark_sick(struct xfs_mount *mp, unsigned int mask);
-void xfs_rt_mark_corrupt(struct xfs_mount *mp, unsigned int mask);
-void xfs_rt_mark_healthy(struct xfs_mount *mp, unsigned int mask);
-void xfs_rt_measure_sickness(struct xfs_mount *mp, unsigned int *sick,
- unsigned int *checked);
+void xfs_rgno_mark_sick(struct xfs_mount *mp, xfs_rgnumber_t rgno,
+ unsigned int mask);
void xfs_agno_mark_sick(struct xfs_mount *mp, xfs_agnumber_t agno,
unsigned int mask);
@@ -244,11 +244,17 @@ xfs_group_has_sickness(
xfs_group_measure_sickness(xg, &sick, &checked);
return sick & mask;
}
+
#define xfs_ag_has_sickness(pag, mask) \
xfs_group_has_sickness(pag_group(pag), (mask))
#define xfs_ag_is_healthy(pag) \
(!xfs_ag_has_sickness((pag), UINT_MAX))
+#define xfs_rtgroup_has_sickness(rtg, mask) \
+ xfs_group_has_sickness(rtg_group(rtg), (mask))
+#define xfs_rtgroup_is_healthy(rtg) \
+ (!xfs_rtgroup_has_sickness((rtg), UINT_MAX))
+
static inline bool
xfs_inode_has_sickness(struct xfs_inode *ip, unsigned int mask)
{
@@ -272,6 +278,8 @@ xfs_inode_is_healthy(struct xfs_inode *ip)
void xfs_fsop_geom_health(struct xfs_mount *mp, struct xfs_fsop_geom *geo);
void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo);
+void xfs_rtgroup_geom_health(struct xfs_rtgroup *rtg,
+ struct xfs_rtgroup_geometry *rgeo);
void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs);
#define xfs_metadata_is_sick(error) \
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 4c28deb3e988..9b34896dd1a3 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -717,7 +717,7 @@ xfs_inobt_max_size(
struct xfs_perag *pag)
{
struct xfs_mount *mp = pag_mount(pag);
- xfs_agblock_t agblocks = pag->block_count;
+ xfs_agblock_t agblocks = pag_group(pag)->xg_block_count;
/* Bail out if we're uninitialized, which can happen in mkfs. */
if (M_IGEO(mp)->inobt_mxr[0] == 0)
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index ace7384a275b..15dec19b6c32 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -248,6 +248,8 @@ typedef struct xfs_trans_header {
#define XFS_LI_ATTRD 0x1247 /* attr set/remove done */
#define XFS_LI_XMI 0x1248 /* mapping exchange intent */
#define XFS_LI_XMD 0x1249 /* mapping exchange done */
+#define XFS_LI_EFI_RT 0x124a /* realtime extent free intent */
+#define XFS_LI_EFD_RT 0x124b /* realtime extent free done */
#define XFS_LI_TYPE_DESC \
{ XFS_LI_EFI, "XFS_LI_EFI" }, \
@@ -267,7 +269,9 @@ typedef struct xfs_trans_header {
{ XFS_LI_ATTRI, "XFS_LI_ATTRI" }, \
{ XFS_LI_ATTRD, "XFS_LI_ATTRD" }, \
{ XFS_LI_XMI, "XFS_LI_XMI" }, \
- { XFS_LI_XMD, "XFS_LI_XMD" }
+ { XFS_LI_XMD, "XFS_LI_XMD" }, \
+ { XFS_LI_EFI_RT, "XFS_LI_EFI_RT" }, \
+ { XFS_LI_EFD_RT, "XFS_LI_EFD_RT" }
/*
* Inode Log Item Format definitions.
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 521d327e4c89..5397a8ff004d 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -77,6 +77,8 @@ extern const struct xlog_recover_item_ops xlog_attri_item_ops;
extern const struct xlog_recover_item_ops xlog_attrd_item_ops;
extern const struct xlog_recover_item_ops xlog_xmi_item_ops;
extern const struct xlog_recover_item_ops xlog_xmd_item_ops;
+extern const struct xlog_recover_item_ops xlog_rtefi_item_ops;
+extern const struct xlog_recover_item_ops xlog_rtefd_item_ops;
/*
* Macros, structures, prototypes for internal log manager use.
diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h
index 8bca86e350fd..99eae7f67e96 100644
--- a/fs/xfs/libxfs/xfs_ondisk.h
+++ b/fs/xfs/libxfs/xfs_ondisk.h
@@ -37,7 +37,7 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_dinode, 176);
XFS_CHECK_STRUCT_SIZE(struct xfs_disk_dquot, 104);
XFS_CHECK_STRUCT_SIZE(struct xfs_dqblk, 136);
- XFS_CHECK_STRUCT_SIZE(struct xfs_dsb, 272);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dsb, 288);
XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr, 56);
XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key, 4);
XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec, 16);
@@ -53,6 +53,7 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(xfs_inobt_ptr_t, 4);
XFS_CHECK_STRUCT_SIZE(xfs_refcount_ptr_t, 4);
XFS_CHECK_STRUCT_SIZE(xfs_rmap_ptr_t, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_rtsb, 56);
/* dir/attr trees */
XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr, 80);
@@ -75,6 +76,7 @@ xfs_check_ondisk_structs(void)
/* realtime structures */
XFS_CHECK_STRUCT_SIZE(union xfs_rtword_raw, 4);
XFS_CHECK_STRUCT_SIZE(union xfs_suminfo_raw, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_rtbuf_blkinfo, 48);
/*
* m68k has problems with xfs_attr_leaf_name_remote_t, but we pad it to
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 54079edfe10f..4ddfb7e395b3 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -21,28 +21,86 @@
#include "xfs_rtbitmap.h"
#include "xfs_health.h"
#include "xfs_sb.h"
+#include "xfs_errortag.h"
+#include "xfs_log.h"
+#include "xfs_buf_item.h"
+#include "xfs_extent_busy.h"
/*
* Realtime allocator bitmap functions shared with userspace.
*/
-/*
- * Real time buffers need verifiers to avoid runtime warnings during IO.
- * We don't have anything to verify, however, so these are just dummy
- * operations.
- */
+static xfs_failaddr_t
+xfs_rtbuf_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+
+ if (!xfs_verify_magic(bp, hdr->rt_magic))
+ return __this_address;
+ if (!xfs_has_rtgroups(mp))
+ return __this_address;
+ if (!xfs_has_crc(mp))
+ return __this_address;
+ if (!uuid_equal(&hdr->rt_uuid, &mp->m_sb.sb_meta_uuid))
+ return __this_address;
+ if (hdr->rt_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
+ return __this_address;
+ return NULL;
+}
+
static void
xfs_rtbuf_verify_read(
- struct xfs_buf *bp)
+ struct xfs_buf *bp)
{
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+ xfs_failaddr_t fa;
+
+ if (!xfs_has_rtgroups(mp))
+ return;
+
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr->rt_lsn))) {
+ fa = __this_address;
+ goto fail;
+ }
+
+ if (!xfs_buf_verify_cksum(bp, XFS_RTBUF_CRC_OFF)) {
+ fa = __this_address;
+ goto fail;
+ }
+
+ fa = xfs_rtbuf_verify(bp);
+ if (fa)
+ goto fail;
+
return;
+fail:
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
}
static void
xfs_rtbuf_verify_write(
struct xfs_buf *bp)
{
- return;
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
+ xfs_failaddr_t fa;
+
+ if (!xfs_has_rtgroups(mp))
+ return;
+
+ fa = xfs_rtbuf_verify(bp);
+ if (fa) {
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ return;
+ }
+
+ if (bip)
+ hdr->rt_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+ xfs_buf_update_cksum(bp, XFS_RTBUF_CRC_OFF);
}
const struct xfs_buf_ops xfs_rtbuf_ops = {
@@ -51,6 +109,22 @@ const struct xfs_buf_ops xfs_rtbuf_ops = {
.verify_write = xfs_rtbuf_verify_write,
};
+const struct xfs_buf_ops xfs_rtbitmap_buf_ops = {
+ .name = "xfs_rtbitmap",
+ .magic = { 0, cpu_to_be32(XFS_RTBITMAP_MAGIC) },
+ .verify_read = xfs_rtbuf_verify_read,
+ .verify_write = xfs_rtbuf_verify_write,
+ .verify_struct = xfs_rtbuf_verify,
+};
+
+const struct xfs_buf_ops xfs_rtsummary_buf_ops = {
+ .name = "xfs_rtsummary",
+ .magic = { 0, cpu_to_be32(XFS_RTSUMMARY_MAGIC) },
+ .verify_read = xfs_rtbuf_verify_read,
+ .verify_write = xfs_rtbuf_verify_write,
+ .verify_struct = xfs_rtbuf_verify,
+};
+
/* Release cached rt bitmap and summary buffers. */
void
xfs_rtbuf_cache_relse(
@@ -76,28 +150,31 @@ static int
xfs_rtbuf_get(
struct xfs_rtalloc_args *args,
xfs_fileoff_t block, /* block number in bitmap or summary */
- int issum) /* is summary not bitmap */
+ enum xfs_rtg_inodes type)
{
+ struct xfs_inode *ip = args->rtg->rtg_inodes[type];
struct xfs_mount *mp = args->mp;
struct xfs_buf **cbpp; /* cached block buffer */
xfs_fileoff_t *coffp; /* cached block number */
struct xfs_buf *bp; /* block buffer, result */
- struct xfs_inode *ip; /* bitmap or summary inode */
struct xfs_bmbt_irec map;
- enum xfs_blft type;
+ enum xfs_blft buf_type;
int nmap = 1;
int error;
- if (issum) {
+ switch (type) {
+ case XFS_RTGI_SUMMARY:
cbpp = &args->sumbp;
coffp = &args->sumoff;
- ip = args->rtg->rtg_inodes[XFS_RTGI_SUMMARY];
- type = XFS_BLFT_RTSUMMARY_BUF;
- } else {
+ buf_type = XFS_BLFT_RTSUMMARY_BUF;
+ break;
+ case XFS_RTGI_BITMAP:
cbpp = &args->rbmbp;
coffp = &args->rbmoff;
- ip = args->rtg->rtg_inodes[XFS_RTGI_BITMAP];
- type = XFS_BLFT_RTBITMAP_BUF;
+ buf_type = XFS_BLFT_RTBITMAP_BUF;
+ break;
+ default:
+ return -EINVAL;
}
/*
@@ -120,22 +197,32 @@ xfs_rtbuf_get(
return error;
if (XFS_IS_CORRUPT(mp, nmap == 0 || !xfs_bmap_is_written_extent(&map))) {
- xfs_rt_mark_sick(mp, issum ? XFS_SICK_RT_SUMMARY :
- XFS_SICK_RT_BITMAP);
+ xfs_rtginode_mark_sick(args->rtg, type);
return -EFSCORRUPTED;
}
ASSERT(map.br_startblock != NULLFSBLOCK);
error = xfs_trans_read_buf(mp, args->tp, mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, map.br_startblock),
- mp->m_bsize, 0, &bp, &xfs_rtbuf_ops);
+ mp->m_bsize, 0, &bp,
+ xfs_rtblock_ops(mp, type));
if (xfs_metadata_is_sick(error))
- xfs_rt_mark_sick(mp, issum ? XFS_SICK_RT_SUMMARY :
- XFS_SICK_RT_BITMAP);
+ xfs_rtginode_mark_sick(args->rtg, type);
if (error)
return error;
- xfs_trans_buf_set_type(args->tp, bp, type);
+ if (xfs_has_rtgroups(mp)) {
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+
+ if (hdr->rt_owner != cpu_to_be64(ip->i_ino)) {
+ xfs_buf_mark_corrupt(bp);
+ xfs_trans_brelse(args->tp, bp);
+ xfs_rtginode_mark_sick(args->rtg, type);
+ return -EFSCORRUPTED;
+ }
+ }
+
+ xfs_trans_buf_set_type(args->tp, bp, buf_type);
*cbpp = bp;
*coffp = block;
return 0;
@@ -149,11 +236,11 @@ xfs_rtbitmap_read_buf(
struct xfs_mount *mp = args->mp;
if (XFS_IS_CORRUPT(mp, block >= mp->m_sb.sb_rbmblocks)) {
- xfs_rt_mark_sick(mp, XFS_SICK_RT_BITMAP);
+ xfs_rtginode_mark_sick(args->rtg, XFS_RTGI_BITMAP);
return -EFSCORRUPTED;
}
- return xfs_rtbuf_get(args, block, 0);
+ return xfs_rtbuf_get(args, block, XFS_RTGI_BITMAP);
}
int
@@ -164,10 +251,10 @@ xfs_rtsummary_read_buf(
struct xfs_mount *mp = args->mp;
if (XFS_IS_CORRUPT(mp, block >= mp->m_rsumblocks)) {
- xfs_rt_mark_sick(args->mp, XFS_SICK_RT_SUMMARY);
+ xfs_rtginode_mark_sick(args->rtg, XFS_RTGI_SUMMARY);
return -EFSCORRUPTED;
}
- return xfs_rtbuf_get(args, block, 1);
+ return xfs_rtbuf_get(args, block, XFS_RTGI_SUMMARY);
}
/*
@@ -980,6 +1067,9 @@ xfs_rtfree_extent(
ASSERT(rbmip->i_itemp != NULL);
xfs_assert_ilocked(rbmip, XFS_ILOCK_EXCL);
+ if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_FREE_EXTENT))
+ return -EIO;
+
error = xfs_rtcheck_alloc_range(&args, start, len);
if (error)
return error;
@@ -995,11 +1085,13 @@ xfs_rtfree_extent(
* Mark more blocks free in the superblock.
*/
xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len);
+
/*
* If we've now freed all the blocks, reset the file sequence
- * number to 0.
+ * number to 0 for pre-RTG file systems.
*/
- if (tp->t_frextents_delta + mp->m_sb.sb_frextents ==
+ if (!xfs_has_rtgroups(mp) &&
+ tp->t_frextents_delta + mp->m_sb.sb_frextents ==
mp->m_sb.sb_rextents) {
if (!(rbmip->i_diflags & XFS_DIFLAG_NEWRTBM))
rbmip->i_diflags |= XFS_DIFLAG_NEWRTBM;
@@ -1029,10 +1121,11 @@ xfs_rtfree_blocks(
{
struct xfs_mount *mp = tp->t_mountp;
xfs_extlen_t mod;
+ int error;
ASSERT(rtlen <= XFS_MAX_BMBT_EXTLEN);
- mod = xfs_rtb_to_rtxoff(mp, rtlen);
+ mod = xfs_blen_to_rtxoff(mp, rtlen);
if (mod) {
ASSERT(mod == 0);
return -EIO;
@@ -1044,8 +1137,16 @@ xfs_rtfree_blocks(
return -EIO;
}
- return xfs_rtfree_extent(tp, rtg, xfs_rtb_to_rtx(mp, rtbno),
+ error = xfs_rtfree_extent(tp, rtg, xfs_rtb_to_rtx(mp, rtbno),
xfs_extlen_to_rtxlen(mp, rtlen));
+ if (error)
+ return error;
+
+ if (xfs_has_rtgroups(mp))
+ xfs_extent_busy_insert(tp, rtg_group(rtg),
+ xfs_rtb_to_rgbno(mp, rtbno), rtlen, 0);
+
+ return 0;
}
/* Find all the free records within a given range. */
@@ -1145,6 +1246,19 @@ xfs_rtalloc_extent_is_free(
return 0;
}
+/* Compute the number of rt extents tracked by a single bitmap block. */
+xfs_rtxnum_t
+xfs_rtbitmap_rtx_per_rbmblock(
+ struct xfs_mount *mp)
+{
+ unsigned int rbmblock_bytes = mp->m_sb.sb_blocksize;
+
+ if (xfs_has_rtgroups(mp))
+ rbmblock_bytes -= sizeof(struct xfs_rtbuf_blkinfo);
+
+ return rbmblock_bytes * NBBY;
+}
+
/*
* Compute the number of rtbitmap blocks needed to track the given number of rt
* extents.
@@ -1154,7 +1268,22 @@ xfs_rtbitmap_blockcount_len(
struct xfs_mount *mp,
xfs_rtbxlen_t rtextents)
{
- return howmany_64(rtextents, NBBY * mp->m_sb.sb_blocksize);
+ return howmany_64(rtextents, xfs_rtbitmap_rtx_per_rbmblock(mp));
+}
+
+/* How many rt extents does each rtbitmap file track? */
+static inline xfs_rtbxlen_t
+xfs_rtbitmap_bitcount(
+ struct xfs_mount *mp)
+{
+ if (!mp->m_sb.sb_rextents)
+ return 0;
+
+ /* rtgroup size can be nonzero even if rextents is zero */
+ if (xfs_has_rtgroups(mp))
+ return mp->m_sb.sb_rgextents;
+
+ return mp->m_sb.sb_rextents;
}
/*
@@ -1164,7 +1293,7 @@ xfs_filblks_t
xfs_rtbitmap_blockcount(
struct xfs_mount *mp)
{
- return xfs_rtbitmap_blockcount_len(mp, mp->m_sb.sb_rextents);
+ return xfs_rtbitmap_blockcount_len(mp, xfs_rtbitmap_bitcount(mp));
}
/*
@@ -1176,12 +1305,12 @@ xfs_rtsummary_blockcount(
struct xfs_mount *mp,
unsigned int *rsumlevels)
{
+ xfs_rtbxlen_t rextents = xfs_rtbitmap_bitcount(mp);
unsigned long long rsumwords;
- *rsumlevels = xfs_compute_rextslog(mp->m_sb.sb_rextents) + 1;
-
- rsumwords = xfs_rtbitmap_blockcount(mp) * (*rsumlevels);
- return XFS_B_TO_FSB(mp, rsumwords << XFS_WORDLOG);
+ *rsumlevels = xfs_compute_rextslog(rextents) + 1;
+ rsumwords = xfs_rtbitmap_blockcount_len(mp, rextents) * (*rsumlevels);
+ return howmany_64(rsumwords, mp->m_blockwsize);
}
static int
@@ -1233,6 +1362,7 @@ xfs_rtfile_initialize_block(
struct xfs_inode *ip = rtg->rtg_inodes[type];
struct xfs_trans *tp;
struct xfs_buf *bp;
+ void *bufdata;
const size_t copylen = mp->m_blockwsize << XFS_WORDLOG;
enum xfs_blft buf_type;
int error;
@@ -1256,13 +1386,30 @@ xfs_rtfile_initialize_block(
xfs_trans_cancel(tp);
return error;
}
+ bufdata = bp->b_addr;
xfs_trans_buf_set_type(tp, bp, buf_type);
- bp->b_ops = &xfs_rtbuf_ops;
+ bp->b_ops = xfs_rtblock_ops(mp, type);
+
+ if (xfs_has_rtgroups(mp)) {
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+
+ if (type == XFS_RTGI_BITMAP)
+ hdr->rt_magic = cpu_to_be32(XFS_RTBITMAP_MAGIC);
+ else
+ hdr->rt_magic = cpu_to_be32(XFS_RTSUMMARY_MAGIC);
+ hdr->rt_owner = cpu_to_be64(ip->i_ino);
+ hdr->rt_blkno = cpu_to_be64(XFS_FSB_TO_DADDR(mp, fsbno));
+ hdr->rt_lsn = 0;
+ uuid_copy(&hdr->rt_uuid, &mp->m_sb.sb_meta_uuid);
+
+ bufdata += sizeof(*hdr);
+ }
+
if (data)
- memcpy(bp->b_addr, data, copylen);
+ memcpy(bufdata, data, copylen);
else
- memset(bp->b_addr, 0, copylen);
+ memset(bufdata, 0, copylen);
xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
return xfs_trans_commit(tp);
}
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h
index b2b9e59a87a2..16563a44bd13 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.h
+++ b/fs/xfs/libxfs/xfs_rtbitmap.h
@@ -26,7 +26,7 @@ xfs_rtx_to_rtb(
xfs_rtxnum_t rtx)
{
struct xfs_mount *mp = rtg_mount(rtg);
- xfs_rtblock_t start = xfs_rgno_start_rtb(mp, rtg_rgno(rtg));
+ xfs_rtblock_t start = xfs_group_start_fsb(rtg_group(rtg));
if (mp->m_rtxblklog >= 0)
return start + (rtx << mp->m_rtxblklog);
@@ -101,17 +101,38 @@ xfs_blen_to_rtbxlen(
return div_u64(blen, mp->m_sb.sb_rextsize);
}
+/* Return the offset of a file block length within an rt extent. */
+static inline xfs_extlen_t
+xfs_blen_to_rtxoff(
+ struct xfs_mount *mp,
+ xfs_filblks_t blen)
+{
+ if (likely(mp->m_rtxblklog >= 0))
+ return blen & mp->m_rtxblkmask;
+
+ return do_div(blen, mp->m_sb.sb_rextsize);
+}
+
+/* Round this block count up to the nearest rt extent size. */
+static inline xfs_filblks_t
+xfs_blen_roundup_rtx(
+ struct xfs_mount *mp,
+ xfs_filblks_t blen)
+{
+ return roundup_64(blen, mp->m_sb.sb_rextsize);
+}
+
/* Convert an rt block number into an rt extent number. */
static inline xfs_rtxnum_t
xfs_rtb_to_rtx(
struct xfs_mount *mp,
xfs_rtblock_t rtbno)
{
- uint64_t __rgbno = __xfs_rtb_to_rgbno(mp, rtbno);
-
+ /* open-coded 64-bit masking operation */
+ rtbno &= mp->m_groups[XG_TYPE_RTG].blkmask;
if (likely(mp->m_rtxblklog >= 0))
- return __rgbno >> mp->m_rtxblklog;
- return div_u64(__rgbno, mp->m_sb.sb_rextsize);
+ return rtbno >> mp->m_rtxblklog;
+ return div_u64(rtbno, mp->m_sb.sb_rextsize);
}
/* Return the offset of an rt block number within an rt extent. */
@@ -120,28 +141,29 @@ xfs_rtb_to_rtxoff(
struct xfs_mount *mp,
xfs_rtblock_t rtbno)
{
+ /* open-coded 64-bit masking operation */
+ rtbno &= mp->m_groups[XG_TYPE_RTG].blkmask;
if (likely(mp->m_rtxblklog >= 0))
return rtbno & mp->m_rtxblkmask;
-
return do_div(rtbno, mp->m_sb.sb_rextsize);
}
-/* Round this rtblock up to the nearest rt extent size. */
+/* Round this file block offset up to the nearest rt extent size. */
static inline xfs_rtblock_t
-xfs_rtb_roundup_rtx(
+xfs_fileoff_roundup_rtx(
struct xfs_mount *mp,
- xfs_rtblock_t rtbno)
+ xfs_fileoff_t off)
{
- return roundup_64(rtbno, mp->m_sb.sb_rextsize);
+ return roundup_64(off, mp->m_sb.sb_rextsize);
}
-/* Round this rtblock down to the nearest rt extent size. */
+/* Round this file block offset down to the nearest rt extent size. */
static inline xfs_rtblock_t
-xfs_rtb_rounddown_rtx(
+xfs_fileoff_rounddown_rtx(
struct xfs_mount *mp,
- xfs_rtblock_t rtbno)
+ xfs_fileoff_t off)
{
- return rounddown_64(rtbno, mp->m_sb.sb_rextsize);
+ return rounddown_64(off, mp->m_sb.sb_rextsize);
}
/* Convert an rt extent number to a file block offset in the rt bitmap file. */
@@ -150,6 +172,9 @@ xfs_rtx_to_rbmblock(
struct xfs_mount *mp,
xfs_rtxnum_t rtx)
{
+ if (xfs_has_rtgroups(mp))
+ return div_u64(rtx, mp->m_rtx_per_rbmblock);
+
return rtx >> mp->m_blkbit_log;
}
@@ -159,6 +184,13 @@ xfs_rtx_to_rbmword(
struct xfs_mount *mp,
xfs_rtxnum_t rtx)
{
+ if (xfs_has_rtgroups(mp)) {
+ unsigned int mod;
+
+ div_u64_rem(rtx >> XFS_NBWORDLOG, mp->m_blockwsize, &mod);
+ return mod;
+ }
+
return (rtx >> XFS_NBWORDLOG) & (mp->m_blockwsize - 1);
}
@@ -168,6 +200,9 @@ xfs_rbmblock_to_rtx(
struct xfs_mount *mp,
xfs_fileoff_t rbmoff)
{
+ if (xfs_has_rtgroups(mp))
+ return rbmoff * mp->m_rtx_per_rbmblock;
+
return rbmoff << mp->m_blkbit_log;
}
@@ -177,7 +212,14 @@ xfs_rbmblock_wordptr(
struct xfs_rtalloc_args *args,
unsigned int index)
{
- union xfs_rtword_raw *words = args->rbmbp->b_addr;
+ struct xfs_mount *mp = args->mp;
+ union xfs_rtword_raw *words;
+ struct xfs_rtbuf_blkinfo *hdr = args->rbmbp->b_addr;
+
+ if (xfs_has_rtgroups(mp))
+ words = (union xfs_rtword_raw *)(hdr + 1);
+ else
+ words = args->rbmbp->b_addr;
return words + index;
}
@@ -190,6 +232,8 @@ xfs_rtbitmap_getword(
{
union xfs_rtword_raw *word = xfs_rbmblock_wordptr(args, index);
+ if (xfs_has_rtgroups(args->mp))
+ return be32_to_cpu(word->rtg);
return word->old;
}
@@ -202,7 +246,10 @@ xfs_rtbitmap_setword(
{
union xfs_rtword_raw *word = xfs_rbmblock_wordptr(args, index);
- word->old = value;
+ if (xfs_has_rtgroups(args->mp))
+ word->rtg = cpu_to_be32(value);
+ else
+ word->old = value;
}
/*
@@ -227,6 +274,9 @@ xfs_rtsumoffs_to_block(
struct xfs_mount *mp,
xfs_rtsumoff_t rsumoff)
{
+ if (xfs_has_rtgroups(mp))
+ return rsumoff / mp->m_blockwsize;
+
return XFS_B_TO_FSBT(mp, rsumoff * sizeof(xfs_suminfo_t));
}
@@ -241,6 +291,9 @@ xfs_rtsumoffs_to_infoword(
{
unsigned int mask = mp->m_blockmask >> XFS_SUMINFOLOG;
+ if (xfs_has_rtgroups(mp))
+ return rsumoff % mp->m_blockwsize;
+
return rsumoff & mask;
}
@@ -250,7 +303,13 @@ xfs_rsumblock_infoptr(
struct xfs_rtalloc_args *args,
unsigned int index)
{
- union xfs_suminfo_raw *info = args->sumbp->b_addr;
+ union xfs_suminfo_raw *info;
+ struct xfs_rtbuf_blkinfo *hdr = args->sumbp->b_addr;
+
+ if (xfs_has_rtgroups(args->mp))
+ info = (union xfs_suminfo_raw *)(hdr + 1);
+ else
+ info = args->sumbp->b_addr;
return info + index;
}
@@ -263,6 +322,8 @@ xfs_suminfo_get(
{
union xfs_suminfo_raw *info = xfs_rsumblock_infoptr(args, index);
+ if (xfs_has_rtgroups(args->mp))
+ return be32_to_cpu(info->rtg);
return info->old;
}
@@ -275,10 +336,28 @@ xfs_suminfo_add(
{
union xfs_suminfo_raw *info = xfs_rsumblock_infoptr(args, index);
+ if (xfs_has_rtgroups(args->mp)) {
+ be32_add_cpu(&info->rtg, delta);
+ return be32_to_cpu(info->rtg);
+ }
+
info->old += delta;
return info->old;
}
+static inline const struct xfs_buf_ops *
+xfs_rtblock_ops(
+ struct xfs_mount *mp,
+ enum xfs_rtg_inodes type)
+{
+ if (xfs_has_rtgroups(mp)) {
+ if (type == XFS_RTGI_SUMMARY)
+ return &xfs_rtsummary_buf_ops;
+ return &xfs_rtbitmap_buf_ops;
+ }
+ return &xfs_rtbuf_ops;
+}
+
/*
* Functions for walking free space rtextents in the realtime bitmap.
*/
@@ -324,6 +403,7 @@ int xfs_rtfree_extent(struct xfs_trans *tp, struct xfs_rtgroup *rtg,
int xfs_rtfree_blocks(struct xfs_trans *tp, struct xfs_rtgroup *rtg,
xfs_fsblock_t rtbno, xfs_filblks_t rtlen);
+xfs_rtxnum_t xfs_rtbitmap_rtx_per_rbmblock(struct xfs_mount *mp);
xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp);
xfs_filblks_t xfs_rtbitmap_blockcount_len(struct xfs_mount *mp,
xfs_rtbxlen_t rtextents);
diff --git a/fs/xfs/libxfs/xfs_rtgroup.c b/fs/xfs/libxfs/xfs_rtgroup.c
index da29f41e51f1..e74bb059f24f 100644
--- a/fs/xfs/libxfs/xfs_rtgroup.c
+++ b/fs/xfs/libxfs/xfs_rtgroup.c
@@ -28,11 +28,38 @@
#include "xfs_trace.h"
#include "xfs_inode.h"
#include "xfs_icache.h"
+#include "xfs_buf_item.h"
#include "xfs_rtgroup.h"
#include "xfs_rtbitmap.h"
#include "xfs_metafile.h"
#include "xfs_metadir.h"
+/* Find the first usable fsblock in this rtgroup. */
+static inline uint32_t
+xfs_rtgroup_min_block(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno)
+{
+ if (xfs_has_rtsb(mp) && rgno == 0)
+ return mp->m_sb.sb_rextsize;
+
+ return 0;
+}
+
+/* Precompute this group's geometry */
+void
+xfs_rtgroup_calc_geometry(
+ struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
+ xfs_rgnumber_t rgno,
+ xfs_rgnumber_t rgcount,
+ xfs_rtbxlen_t rextents)
+{
+ rtg->rtg_extents = __xfs_rtgroup_extents(mp, rgno, rgcount, rextents);
+ rtg_group(rtg)->xg_block_count = rtg->rtg_extents * mp->m_sb.sb_rextsize;
+ rtg_group(rtg)->xg_min_gbno = xfs_rtgroup_min_block(mp, rgno);
+}
+
int
xfs_rtgroup_alloc(
struct xfs_mount *mp,
@@ -47,6 +74,8 @@ xfs_rtgroup_alloc(
if (!rtg)
return -ENOMEM;
+ xfs_rtgroup_calc_geometry(mp, rtg, rgno, rgcount, rextents);
+
error = xfs_group_insert(mp, rtg_group(rtg), rgno, XG_TYPE_RTG);
if (error)
goto out_free_rtg;
@@ -148,6 +177,7 @@ xfs_update_last_rtgroup_size(
return -EFSCORRUPTED;
rtg->rtg_extents = __xfs_rtgroup_extents(mp, prev_rgcount - 1,
mp->m_sb.sb_rgcount, mp->m_sb.sb_rextents);
+ rtg_group(rtg)->xg_block_count = rtg->rtg_extents * mp->m_sb.sb_rextsize;
xfs_rtgroup_rele(rtg);
return 0;
}
@@ -213,6 +243,20 @@ xfs_rtgroup_trans_join(
}
}
+/* Retrieve rt group geometry. */
+int
+xfs_rtgroup_get_geometry(
+ struct xfs_rtgroup *rtg,
+ struct xfs_rtgroup_geometry *rgeo)
+{
+ /* Fill out form. */
+ memset(rgeo, 0, sizeof(*rgeo));
+ rgeo->rg_number = rtg_rgno(rtg);
+ rgeo->rg_length = rtg_group(rtg)->xg_block_count;
+ xfs_rtgroup_geom_health(rtg, rgeo);
+ return 0;
+}
+
#ifdef CONFIG_PROVE_LOCKING
static struct lock_class_key xfs_rtginode_lock_class;
@@ -270,6 +314,8 @@ struct xfs_rtginode_ops {
enum xfs_metafile_type metafile_type;
+ unsigned int sick; /* rtgroup sickness flag */
+
/* Does the fs have this feature? */
bool (*enabled)(struct xfs_mount *mp);
@@ -284,11 +330,13 @@ static const struct xfs_rtginode_ops xfs_rtginode_ops[XFS_RTGI_MAX] = {
[XFS_RTGI_BITMAP] = {
.name = "bitmap",
.metafile_type = XFS_METAFILE_RTBITMAP,
+ .sick = XFS_SICK_RG_BITMAP,
.create = xfs_rtbitmap_create,
},
[XFS_RTGI_SUMMARY] = {
.name = "summary",
.metafile_type = XFS_METAFILE_RTSUMMARY,
+ .sick = XFS_SICK_RG_SUMMARY,
.create = xfs_rtsummary_create,
},
};
@@ -322,6 +370,17 @@ xfs_rtginode_enabled(
return ops->enabled(rtg_mount(rtg));
}
+/* Mark an rtgroup inode sick */
+void
+xfs_rtginode_mark_sick(
+ struct xfs_rtgroup *rtg,
+ enum xfs_rtg_inodes type)
+{
+ const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type];
+
+ xfs_group_mark_sick(rtg_group(rtg), ops->sick);
+}
+
/* Load and existing rtgroup inode into the rtgroup structure. */
int
xfs_rtginode_load(
@@ -357,8 +416,10 @@ xfs_rtginode_load(
} else {
const char *path;
- if (!mp->m_rtdirip)
+ if (!mp->m_rtdirip) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
return -EFSCORRUPTED;
+ }
path = xfs_rtginode_path(rtg_rgno(rtg), type);
if (!path)
@@ -368,17 +429,22 @@ xfs_rtginode_load(
kfree(path);
}
- if (error)
+ if (error) {
+ if (xfs_metadata_is_sick(error))
+ xfs_rtginode_mark_sick(rtg, type);
return error;
+ }
if (XFS_IS_CORRUPT(mp, ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
ip->i_df.if_format != XFS_DINODE_FMT_BTREE)) {
xfs_irele(ip);
+ xfs_rtginode_mark_sick(rtg, type);
return -EFSCORRUPTED;
}
if (XFS_IS_CORRUPT(mp, ip->i_projid != rtg_rgno(rtg))) {
xfs_irele(ip);
+ xfs_rtginode_mark_sick(rtg, type);
return -EFSCORRUPTED;
}
@@ -415,8 +481,10 @@ xfs_rtginode_create(
if (!xfs_rtginode_enabled(rtg, type))
return 0;
- if (!mp->m_rtdirip)
+ if (!mp->m_rtdirip) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
return -EFSCORRUPTED;
+ }
upd.path = xfs_rtginode_path(rtg_rgno(rtg), type);
if (!upd.path)
@@ -463,8 +531,10 @@ int
xfs_rtginode_mkdir_parent(
struct xfs_mount *mp)
{
- if (!mp->m_metadirip)
+ if (!mp->m_metadirip) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
return -EFSCORRUPTED;
+ }
return xfs_metadir_mkdir(mp->m_metadirip, "rtgroups", &mp->m_rtdirip);
}
@@ -476,9 +546,152 @@ xfs_rtginode_load_parent(
{
struct xfs_mount *mp = tp->t_mountp;
- if (!mp->m_metadirip)
+ if (!mp->m_metadirip) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
return -EFSCORRUPTED;
+ }
return xfs_metadir_load(tp, mp->m_metadirip, "rtgroups",
XFS_METAFILE_DIR, &mp->m_rtdirip);
}
+
+/* Check superblock fields for a read or a write. */
+static xfs_failaddr_t
+xfs_rtsb_verify_common(
+ struct xfs_buf *bp)
+{
+ struct xfs_rtsb *rsb = bp->b_addr;
+
+ if (!xfs_verify_magic(bp, rsb->rsb_magicnum))
+ return __this_address;
+ if (rsb->rsb_pad)
+ return __this_address;
+
+ /* Everything to the end of the fs block must be zero */
+ if (memchr_inv(rsb + 1, 0, BBTOB(bp->b_length) - sizeof(*rsb)))
+ return __this_address;
+
+ return NULL;
+}
+
+/* Check superblock fields for a read or revalidation. */
+static inline xfs_failaddr_t
+xfs_rtsb_verify_all(
+ struct xfs_buf *bp)
+{
+ struct xfs_rtsb *rsb = bp->b_addr;
+ struct xfs_mount *mp = bp->b_mount;
+ xfs_failaddr_t fa;
+
+ fa = xfs_rtsb_verify_common(bp);
+ if (fa)
+ return fa;
+
+ if (memcmp(&rsb->rsb_fname, &mp->m_sb.sb_fname, XFSLABEL_MAX))
+ return __this_address;
+ if (!uuid_equal(&rsb->rsb_uuid, &mp->m_sb.sb_uuid))
+ return __this_address;
+ if (!uuid_equal(&rsb->rsb_meta_uuid, &mp->m_sb.sb_meta_uuid))
+ return __this_address;
+
+ return NULL;
+}
+
+static void
+xfs_rtsb_read_verify(
+ struct xfs_buf *bp)
+{
+ xfs_failaddr_t fa;
+
+ if (!xfs_buf_verify_cksum(bp, XFS_RTSB_CRC_OFF)) {
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ return;
+ }
+
+ fa = xfs_rtsb_verify_all(bp);
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+}
+
+static void
+xfs_rtsb_write_verify(
+ struct xfs_buf *bp)
+{
+ xfs_failaddr_t fa;
+
+ fa = xfs_rtsb_verify_common(bp);
+ if (fa) {
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ return;
+ }
+
+ xfs_buf_update_cksum(bp, XFS_RTSB_CRC_OFF);
+}
+
+const struct xfs_buf_ops xfs_rtsb_buf_ops = {
+ .name = "xfs_rtsb",
+ .magic = { 0, cpu_to_be32(XFS_RTSB_MAGIC) },
+ .verify_read = xfs_rtsb_read_verify,
+ .verify_write = xfs_rtsb_write_verify,
+ .verify_struct = xfs_rtsb_verify_all,
+};
+
+/* Update a realtime superblock from the primary fs super */
+void
+xfs_update_rtsb(
+ struct xfs_buf *rtsb_bp,
+ const struct xfs_buf *sb_bp)
+{
+ const struct xfs_dsb *dsb = sb_bp->b_addr;
+ struct xfs_rtsb *rsb = rtsb_bp->b_addr;
+ const uuid_t *meta_uuid;
+
+ rsb->rsb_magicnum = cpu_to_be32(XFS_RTSB_MAGIC);
+
+ rsb->rsb_pad = 0;
+ memcpy(&rsb->rsb_fname, &dsb->sb_fname, XFSLABEL_MAX);
+
+ memcpy(&rsb->rsb_uuid, &dsb->sb_uuid, sizeof(rsb->rsb_uuid));
+
+ /*
+ * The metadata uuid is the fs uuid if the metauuid feature is not
+ * enabled.
+ */
+ if (dsb->sb_features_incompat &
+ cpu_to_be32(XFS_SB_FEAT_INCOMPAT_META_UUID))
+ meta_uuid = &dsb->sb_meta_uuid;
+ else
+ meta_uuid = &dsb->sb_uuid;
+ memcpy(&rsb->rsb_meta_uuid, meta_uuid, sizeof(rsb->rsb_meta_uuid));
+}
+
+/*
+ * Update the realtime superblock from a filesystem superblock and log it to
+ * the given transaction.
+ */
+struct xfs_buf *
+xfs_log_rtsb(
+ struct xfs_trans *tp,
+ const struct xfs_buf *sb_bp)
+{
+ struct xfs_buf *rtsb_bp;
+
+ if (!xfs_has_rtsb(tp->t_mountp))
+ return NULL;
+
+ rtsb_bp = xfs_trans_getrtsb(tp);
+ if (!rtsb_bp) {
+ /*
+ * It's possible for the rtgroups feature to be enabled but
+ * there is no incore rt superblock buffer if the rt geometry
+ * was specified at mkfs time but the rt section has not yet
+ * been attached. In this case, rblocks must be zero.
+ */
+ ASSERT(tp->t_mountp->m_sb.sb_rblocks == 0);
+ return NULL;
+ }
+
+ xfs_update_rtsb(rtsb_bp, sb_bp);
+ xfs_trans_ordered_buf(tp, rtsb_bp);
+ return rtsb_bp;
+}
diff --git a/fs/xfs/libxfs/xfs_rtgroup.h b/fs/xfs/libxfs/xfs_rtgroup.h
index 6ccf31bb6bc7..7e7e491ff06f 100644
--- a/fs/xfs/libxfs/xfs_rtgroup.h
+++ b/fs/xfs/libxfs/xfs_rtgroup.h
@@ -123,30 +123,11 @@ xfs_rtgroup_next(
}
static inline xfs_rtblock_t
-xfs_rgno_start_rtb(
- struct xfs_mount *mp,
- xfs_rgnumber_t rgno)
-{
- if (mp->m_rgblklog >= 0)
- return ((xfs_rtblock_t)rgno << mp->m_rgblklog);
- return ((xfs_rtblock_t)rgno * mp->m_rgblocks);
-}
-
-static inline xfs_rtblock_t
-__xfs_rgbno_to_rtb(
- struct xfs_mount *mp,
- xfs_rgnumber_t rgno,
- xfs_rgblock_t rgbno)
-{
- return xfs_rgno_start_rtb(mp, rgno) + rgbno;
-}
-
-static inline xfs_rtblock_t
xfs_rgbno_to_rtb(
struct xfs_rtgroup *rtg,
xfs_rgblock_t rgbno)
{
- return __xfs_rgbno_to_rtb(rtg_mount(rtg), rtg_rgno(rtg), rgbno);
+ return xfs_gbno_to_fsb(rtg_group(rtg), rgbno);
}
static inline xfs_rgnumber_t
@@ -154,38 +135,37 @@ xfs_rtb_to_rgno(
struct xfs_mount *mp,
xfs_rtblock_t rtbno)
{
- if (!xfs_has_rtgroups(mp))
- return 0;
-
- if (mp->m_rgblklog >= 0)
- return rtbno >> mp->m_rgblklog;
-
- return div_u64(rtbno, mp->m_rgblocks);
+ return xfs_fsb_to_gno(mp, rtbno, XG_TYPE_RTG);
}
-static inline uint64_t
-__xfs_rtb_to_rgbno(
+static inline xfs_rgblock_t
+xfs_rtb_to_rgbno(
struct xfs_mount *mp,
xfs_rtblock_t rtbno)
{
- uint32_t rem;
-
- if (!xfs_has_rtgroups(mp))
- return rtbno;
-
- if (mp->m_rgblklog >= 0)
- return rtbno & mp->m_rgblkmask;
-
- div_u64_rem(rtbno, mp->m_rgblocks, &rem);
- return rem;
+ return xfs_fsb_to_gbno(mp, rtbno, XG_TYPE_RTG);
}
-static inline xfs_rgblock_t
-xfs_rtb_to_rgbno(
+/* Is rtbno the start of a RT group? */
+static inline bool
+xfs_rtbno_is_group_start(
struct xfs_mount *mp,
xfs_rtblock_t rtbno)
{
- return __xfs_rtb_to_rgbno(mp, rtbno);
+ return (rtbno & mp->m_groups[XG_TYPE_RTG].blkmask) == 0;
+}
+
+/* Convert an rtgroups rt extent number into an rgbno. */
+static inline xfs_rgblock_t
+xfs_rtx_to_rgbno(
+ struct xfs_rtgroup *rtg,
+ xfs_rtxnum_t rtx)
+{
+ struct xfs_mount *mp = rtg_mount(rtg);
+
+ if (likely(mp->m_rtxblklog >= 0))
+ return rtx << mp->m_rtxblklog;
+ return rtx * mp->m_sb.sb_rextsize;
}
static inline xfs_daddr_t
@@ -193,7 +173,11 @@ xfs_rtb_to_daddr(
struct xfs_mount *mp,
xfs_rtblock_t rtbno)
{
- return rtbno << mp->m_blkbb_log;
+ struct xfs_groups *g = &mp->m_groups[XG_TYPE_RTG];
+ xfs_rgnumber_t rgno = xfs_rtb_to_rgno(mp, rtbno);
+ uint64_t start_bno = (xfs_rtblock_t)rgno * g->blocks;
+
+ return XFS_FSB_TO_BB(mp, start_bno + (rtbno & g->blkmask));
}
static inline xfs_rtblock_t
@@ -201,7 +185,18 @@ xfs_daddr_to_rtb(
struct xfs_mount *mp,
xfs_daddr_t daddr)
{
- return daddr >> mp->m_blkbb_log;
+ xfs_rfsblock_t bno = XFS_BB_TO_FSBT(mp, daddr);
+
+ if (xfs_has_rtgroups(mp)) {
+ struct xfs_groups *g = &mp->m_groups[XG_TYPE_RTG];
+ xfs_rgnumber_t rgno;
+ uint32_t rgbno;
+
+ rgno = div_u64_rem(bno, g->blocks, &rgbno);
+ return ((xfs_rtblock_t)rgno << g->blklog) + rgbno;
+ }
+
+ return bno;
}
#ifdef CONFIG_XFS_RT
@@ -217,6 +212,9 @@ int xfs_initialize_rtgroups(struct xfs_mount *mp, xfs_rgnumber_t first_rgno,
xfs_rtxnum_t __xfs_rtgroup_extents(struct xfs_mount *mp, xfs_rgnumber_t rgno,
xfs_rgnumber_t rgcount, xfs_rtbxlen_t rextents);
xfs_rtxnum_t xfs_rtgroup_extents(struct xfs_mount *mp, xfs_rgnumber_t rgno);
+void xfs_rtgroup_calc_geometry(struct xfs_mount *mp, struct xfs_rtgroup *rtg,
+ xfs_rgnumber_t rgno, xfs_rgnumber_t rgcount,
+ xfs_rtbxlen_t rextents);
int xfs_update_last_rtgroup_size(struct xfs_mount *mp,
xfs_rgnumber_t prev_rgcount);
@@ -234,12 +232,16 @@ void xfs_rtgroup_unlock(struct xfs_rtgroup *rtg, unsigned int rtglock_flags);
void xfs_rtgroup_trans_join(struct xfs_trans *tp, struct xfs_rtgroup *rtg,
unsigned int rtglock_flags);
+int xfs_rtgroup_get_geometry(struct xfs_rtgroup *rtg,
+ struct xfs_rtgroup_geometry *rgeo);
+
int xfs_rtginode_mkdir_parent(struct xfs_mount *mp);
int xfs_rtginode_load_parent(struct xfs_trans *tp);
const char *xfs_rtginode_name(enum xfs_rtg_inodes type);
enum xfs_metafile_type xfs_rtginode_metafile_type(enum xfs_rtg_inodes type);
bool xfs_rtginode_enabled(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type);
+void xfs_rtginode_mark_sick(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type);
int xfs_rtginode_load(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type,
struct xfs_trans *tp);
int xfs_rtginode_create(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type,
@@ -251,6 +253,11 @@ static inline const char *xfs_rtginode_path(xfs_rgnumber_t rgno,
{
return kasprintf(GFP_KERNEL, "%u.%s", rgno, xfs_rtginode_name(type));
}
+
+void xfs_update_rtsb(struct xfs_buf *rtsb_bp,
+ const struct xfs_buf *sb_bp);
+struct xfs_buf *xfs_log_rtsb(struct xfs_trans *tp,
+ const struct xfs_buf *sb_bp);
#else
static inline void xfs_free_rtgroups(struct xfs_mount *mp,
xfs_rgnumber_t first_rgno, xfs_rgnumber_t end_rgno)
@@ -269,6 +276,9 @@ static inline int xfs_initialize_rtgroups(struct xfs_mount *mp,
# define xfs_rtgroup_lock(rtg, gf) ((void)0)
# define xfs_rtgroup_unlock(rtg, gf) ((void)0)
# define xfs_rtgroup_trans_join(tp, rtg, gf) ((void)0)
+# define xfs_update_rtsb(bp, sb_bp) ((void)0)
+# define xfs_log_rtsb(tp, sb_bp) (NULL)
+# define xfs_rtgroup_get_geometry(rtg, rgeo) (-EOPNOTSUPP)
#endif /* CONFIG_XFS_RT */
#endif /* __LIBXFS_RTGROUP_H */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 5ca11c3c4711..c1254f019e53 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -27,6 +27,7 @@
#include "xfs_ag.h"
#include "xfs_rtbitmap.h"
#include "xfs_exchrange.h"
+#include "xfs_rtgroup.h"
/*
* Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -234,11 +235,37 @@ xfs_validate_sb_read(
return 0;
}
+/* Return the number of extents covered by a single rt bitmap file */
+static xfs_rtbxlen_t
+xfs_extents_per_rbm(
+ struct xfs_sb *sbp)
+{
+ if (xfs_sb_is_v5(sbp) &&
+ (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR))
+ return sbp->sb_rgextents;
+ return sbp->sb_rextents;
+}
+
+/*
+ * Return the payload size of a single rt bitmap block (without the metadata
+ * header if any).
+ */
+static inline unsigned int
+xfs_rtbmblock_size(
+ struct xfs_sb *sbp)
+{
+ if (xfs_sb_is_v5(sbp) &&
+ (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR))
+ return sbp->sb_blocksize - sizeof(struct xfs_rtbuf_blkinfo);
+ return sbp->sb_blocksize;
+}
+
static uint64_t
-xfs_sb_calc_rbmblocks(
+xfs_expected_rbmblocks(
struct xfs_sb *sbp)
{
- return howmany_64(sbp->sb_rextents, NBBY * sbp->sb_blocksize);
+ return howmany_64(xfs_extents_per_rbm(sbp),
+ NBBY * xfs_rtbmblock_size(sbp));
}
/* Validate the realtime geometry */
@@ -260,7 +287,7 @@ xfs_validate_rt_geometry(
if (sbp->sb_rextents == 0 ||
sbp->sb_rextents != div_u64(sbp->sb_rblocks, sbp->sb_rextsize) ||
sbp->sb_rextslog != xfs_compute_rextslog(sbp->sb_rextents) ||
- sbp->sb_rbmblocks != xfs_sb_calc_rbmblocks(sbp))
+ sbp->sb_rbmblocks != xfs_expected_rbmblocks(sbp))
return false;
return true;
@@ -341,6 +368,78 @@ xfs_validate_sb_write(
return 0;
}
+int
+xfs_compute_rgblklog(
+ xfs_rtxlen_t rgextents,
+ xfs_rgblock_t rextsize)
+{
+ uint64_t rgblocks = (uint64_t)rgextents * rextsize;
+
+ return xfs_highbit64(rgblocks - 1) + 1;
+}
+
+static int
+xfs_validate_sb_rtgroups(
+ struct xfs_mount *mp,
+ struct xfs_sb *sbp)
+{
+ uint64_t groups;
+ int rgblklog;
+
+ if (sbp->sb_rextsize == 0) {
+ xfs_warn(mp,
+"Realtime extent size must not be zero.");
+ return -EINVAL;
+ }
+
+ if (sbp->sb_rgextents > XFS_MAX_RGBLOCKS / sbp->sb_rextsize) {
+ xfs_warn(mp,
+"Realtime group size (%u) must be less than %u rt extents.",
+ sbp->sb_rgextents,
+ XFS_MAX_RGBLOCKS / sbp->sb_rextsize);
+ return -EINVAL;
+ }
+
+ if (sbp->sb_rgextents < XFS_MIN_RGEXTENTS) {
+ xfs_warn(mp,
+"Realtime group size (%u) must be at least %u rt extents.",
+ sbp->sb_rgextents, XFS_MIN_RGEXTENTS);
+ return -EINVAL;
+ }
+
+ if (sbp->sb_rgcount > XFS_MAX_RGNUMBER) {
+ xfs_warn(mp,
+"Realtime groups (%u) must be less than %u.",
+ sbp->sb_rgcount, XFS_MAX_RGNUMBER);
+ return -EINVAL;
+ }
+
+ groups = howmany_64(sbp->sb_rextents, sbp->sb_rgextents);
+ if (groups != sbp->sb_rgcount) {
+ xfs_warn(mp,
+"Realtime groups (%u) do not cover the entire rt section; need (%llu) groups.",
+ sbp->sb_rgcount, groups);
+ return -EINVAL;
+ }
+
+ /* Exchange-range is required for fsr to work on realtime files */
+ if (!(sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_EXCHRANGE)) {
+ xfs_warn(mp,
+"Realtime groups feature requires exchange-range support.");
+ return -EINVAL;
+ }
+
+ rgblklog = xfs_compute_rgblklog(sbp->sb_rgextents, sbp->sb_rextsize);
+ if (sbp->sb_rgblklog != rgblklog) {
+ xfs_warn(mp,
+"Realtime group log (%d) does not match expected value (%d).",
+ sbp->sb_rgblklog, rgblklog);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/* Check the validity of the SB. */
STATIC int
xfs_validate_sb_common(
@@ -352,6 +451,7 @@ xfs_validate_sb_common(
uint32_t agcount = 0;
uint32_t rem;
bool has_dalign;
+ int error;
if (!xfs_verify_magic(bp, dsb->sb_magicnum)) {
xfs_warn(mp,
@@ -415,6 +515,18 @@ xfs_validate_sb_common(
sbp->sb_spino_align);
return -EINVAL;
}
+
+ if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) {
+ if (memchr_inv(sbp->sb_pad, 0, sizeof(sbp->sb_pad))) {
+ xfs_warn(mp,
+"Metadir superblock padding fields must be zero.");
+ return -EINVAL;
+ }
+
+ error = xfs_validate_sb_rtgroups(mp, sbp);
+ if (error)
+ return error;
+ }
} else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |
XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) {
xfs_notice(mp,
@@ -582,6 +694,14 @@ xfs_validate_sb_common(
void
xfs_sb_quota_from_disk(struct xfs_sb *sbp)
{
+ if (xfs_sb_is_v5(sbp) &&
+ (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) {
+ sbp->sb_uquotino = NULLFSINO;
+ sbp->sb_gquotino = NULLFSINO;
+ sbp->sb_pquotino = NULLFSINO;
+ return;
+ }
+
/*
* older mkfs doesn't initialize quota inodes to NULLFSINO. This
* leads to in-core values having two different values for a quota
@@ -706,13 +826,19 @@ __xfs_sb_from_disk(
if (convert_xquota)
xfs_sb_quota_from_disk(to);
- if (to->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)
+ if (to->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) {
to->sb_metadirino = be64_to_cpu(from->sb_metadirino);
- else
+ to->sb_rgblklog = from->sb_rgblklog;
+ memcpy(to->sb_pad, from->sb_pad, sizeof(to->sb_pad));
+ to->sb_rgcount = be32_to_cpu(from->sb_rgcount);
+ to->sb_rgextents = be32_to_cpu(from->sb_rgextents);
+ to->sb_rbmino = NULLFSINO;
+ to->sb_rsumino = NULLFSINO;
+ } else {
to->sb_metadirino = NULLFSINO;
-
- to->sb_rgcount = 1;
- to->sb_rgextents = 0;
+ to->sb_rgcount = 1;
+ to->sb_rgextents = 0;
+ }
}
void
@@ -730,6 +856,14 @@ xfs_sb_quota_to_disk(
{
uint16_t qflags = from->sb_qflags;
+ if (xfs_sb_is_v5(from) &&
+ (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) {
+ to->sb_uquotino = cpu_to_be64(0);
+ to->sb_gquotino = cpu_to_be64(0);
+ to->sb_pquotino = cpu_to_be64(0);
+ return;
+ }
+
to->sb_uquotino = cpu_to_be64(from->sb_uquotino);
/*
@@ -861,8 +995,15 @@ xfs_sb_to_disk(
if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID)
uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid);
- if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)
+ if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) {
to->sb_metadirino = cpu_to_be64(from->sb_metadirino);
+ to->sb_rgblklog = from->sb_rgblklog;
+ memset(to->sb_pad, 0, sizeof(to->sb_pad));
+ to->sb_rgcount = cpu_to_be32(from->sb_rgcount);
+ to->sb_rgextents = cpu_to_be32(from->sb_rgextents);
+ to->sb_rbmino = cpu_to_be64(0);
+ to->sb_rsumino = cpu_to_be64(0);
+ }
}
/*
@@ -992,8 +1133,9 @@ const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
.verify_write = xfs_sb_write_verify,
};
+/* Compute cached rt geometry from the incore sb. */
void
-xfs_mount_sb_set_rextsize(
+xfs_sb_mount_rextsize(
struct xfs_mount *mp,
struct xfs_sb *sbp)
{
@@ -1002,13 +1144,32 @@ xfs_mount_sb_set_rextsize(
mp->m_rtxblklog = log2_if_power2(sbp->sb_rextsize);
mp->m_rtxblkmask = mask64_if_power2(sbp->sb_rextsize);
- mp->m_rgblocks = 0;
- mp->m_rgblklog = 0;
- mp->m_rgblkmask = (uint64_t)-1;
+ if (xfs_sb_is_v5(sbp) &&
+ (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) {
+ rgs->blocks = sbp->sb_rgextents * sbp->sb_rextsize;
+ rgs->blklog = mp->m_sb.sb_rgblklog;
+ rgs->blkmask = xfs_mask32lo(mp->m_sb.sb_rgblklog);
+ } else {
+ rgs->blocks = 0;
+ rgs->blklog = 0;
+ rgs->blkmask = (uint64_t)-1;
+ }
+}
+
+/* Update incore sb rt extent size, then recompute the cached rt geometry. */
+void
+xfs_mount_sb_set_rextsize(
+ struct xfs_mount *mp,
+ struct xfs_sb *sbp,
+ xfs_agblock_t rextsize)
+{
+ sbp->sb_rextsize = rextsize;
+ if (xfs_sb_is_v5(sbp) &&
+ (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR))
+ sbp->sb_rgblklog = xfs_compute_rgblklog(sbp->sb_rgextents,
+ rextsize);
- rgs->blocks = 0;
- rgs->blklog = 0;
- rgs->blkmask = (uint64_t)-1;
+ xfs_sb_mount_rextsize(mp, sbp);
}
/*
@@ -1035,14 +1196,14 @@ xfs_sb_mount_common(
mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
mp->m_blockmask = sbp->sb_blocksize - 1;
- mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
- mp->m_blockwmask = mp->m_blockwsize - 1;
+ mp->m_blockwsize = xfs_rtbmblock_size(sbp) >> XFS_WORDLOG;
+ mp->m_rtx_per_rbmblock = mp->m_blockwsize << XFS_NBWORDLOG;
ags->blocks = mp->m_sb.sb_agblocks;
ags->blklog = mp->m_sb.sb_agblklog;
ags->blkmask = xfs_mask32lo(mp->m_sb.sb_agblklog);
- xfs_mount_sb_set_rextsize(mp, sbp);
+ xfs_sb_mount_rextsize(mp, sbp);
mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, true);
mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, false);
@@ -1089,11 +1250,6 @@ xfs_log_sb(
* reservations that have been taken out percpu counters. If we have an
* unclean shutdown, this will be corrected by log recovery rebuilding
* the counters from the AGF block counts.
- *
- * Do not update sb_frextents here because it is not part of the lazy
- * sb counters, despite having a percpu counter. It is always kept
- * consistent with the ondisk rtbitmap by xfs_trans_apply_sb_deltas()
- * and hence we don't need have to update it here.
*/
if (xfs_has_lazysbcount(mp)) {
mp->m_sb.sb_icount = percpu_counter_sum_positive(&mp->m_icount);
@@ -1104,6 +1260,16 @@ xfs_log_sb(
percpu_counter_sum_positive(&mp->m_fdblocks);
}
+ /*
+ * sb_frextents was added to the lazy sb counters when the rt groups
+ * feature was introduced. This counter can go negative due to the way
+ * we handle nearly-lockless reservations, so we must use the _positive
+ * variant here to avoid writing out nonsense frextents.
+ */
+ if (xfs_has_rtgroups(mp))
+ mp->m_sb.sb_frextents =
+ percpu_counter_sum_positive(&mp->m_frextents);
+
xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1);
@@ -1214,10 +1380,12 @@ xfs_update_secondary_sbs(
*/
int
xfs_sync_sb_buf(
- struct xfs_mount *mp)
+ struct xfs_mount *mp,
+ bool update_rtsb)
{
struct xfs_trans *tp;
struct xfs_buf *bp;
+ struct xfs_buf *rtsb_bp = NULL;
int error;
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_sb, 0, 0, 0, &tp);
@@ -1227,6 +1395,11 @@ xfs_sync_sb_buf(
bp = xfs_trans_getsb(tp);
xfs_log_sb(tp);
xfs_trans_bhold(tp, bp);
+ if (update_rtsb) {
+ rtsb_bp = xfs_log_rtsb(tp, bp);
+ if (rtsb_bp)
+ xfs_trans_bhold(tp, rtsb_bp);
+ }
xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp);
if (error)
@@ -1235,7 +1408,11 @@ xfs_sync_sb_buf(
* write out the sb buffer to get the changes to disk
*/
error = xfs_bwrite(bp);
+ if (!error && rtsb_bp)
+ error = xfs_bwrite(rtsb_bp);
out:
+ if (rtsb_bp)
+ xfs_buf_relse(rtsb_bp);
xfs_buf_relse(bp);
return error;
}
@@ -1339,6 +1516,11 @@ xfs_fs_geometry(
return;
geo->version = XFS_FSOP_GEOM_VERSION_V5;
+
+ if (xfs_has_rtgroups(mp)) {
+ geo->rgcount = sbp->sb_rgcount;
+ geo->rgextents = sbp->sb_rgextents;
+ }
}
/* Read a secondary superblock. */
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index 885c83755991..34d0dd374e9b 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -15,10 +15,11 @@ struct xfs_perag;
extern void xfs_log_sb(struct xfs_trans *tp);
extern int xfs_sync_sb(struct xfs_mount *mp, bool wait);
-extern int xfs_sync_sb_buf(struct xfs_mount *mp);
+extern int xfs_sync_sb_buf(struct xfs_mount *mp, bool update_rtsb);
extern void xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp);
+void xfs_sb_mount_rextsize(struct xfs_mount *mp, struct xfs_sb *sbp);
void xfs_mount_sb_set_rextsize(struct xfs_mount *mp,
- struct xfs_sb *sbp);
+ struct xfs_sb *sbp, xfs_agblock_t rextsize);
extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from);
extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from);
extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp);
@@ -43,5 +44,6 @@ bool xfs_validate_stripe_geometry(struct xfs_mount *mp,
bool xfs_validate_rt_geometry(struct xfs_sb *sbp);
uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents);
+int xfs_compute_rgblklog(xfs_rtxlen_t rgextents, xfs_rgblock_t rextsize);
#endif /* __XFS_SB_H__ */
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 33b84a3a83ff..e7efdb9ceaf3 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -38,7 +38,10 @@ extern const struct xfs_buf_ops xfs_inode_buf_ops;
extern const struct xfs_buf_ops xfs_inode_buf_ra_ops;
extern const struct xfs_buf_ops xfs_refcountbt_buf_ops;
extern const struct xfs_buf_ops xfs_rmapbt_buf_ops;
+extern const struct xfs_buf_ops xfs_rtbitmap_buf_ops;
+extern const struct xfs_buf_ops xfs_rtsummary_buf_ops;
extern const struct xfs_buf_ops xfs_rtbuf_ops;
+extern const struct xfs_buf_ops xfs_rtsb_buf_ops;
extern const struct xfs_buf_ops xfs_sb_buf_ops;
extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
extern const struct xfs_buf_ops xfs_symlink_buf_ops;
@@ -157,6 +160,7 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp,
#define XFS_TRANS_SB_RBLOCKS 0x00000800
#define XFS_TRANS_SB_REXTENTS 0x00001000
#define XFS_TRANS_SB_REXTSLOG 0x00002000
+#define XFS_TRANS_SB_RGCOUNT 0x00004000
/*
* Here we centralize the specification of XFS meta-data buffer reference count
diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c
index 1cbfe57e971d..1faf04204c5d 100644
--- a/fs/xfs/libxfs/xfs_types.c
+++ b/fs/xfs/libxfs/xfs_types.c
@@ -12,6 +12,8 @@
#include "xfs_bit.h"
#include "xfs_mount.h"
#include "xfs_ag.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
/*
@@ -135,18 +137,36 @@ xfs_verify_dir_ino(
}
/*
- * Verify that an realtime block number pointer doesn't point off the
- * end of the realtime device.
+ * Verify that a realtime block number pointer neither points outside the
+ * allocatable areas of the rtgroup nor off the end of the realtime
+ * device.
*/
inline bool
xfs_verify_rtbno(
struct xfs_mount *mp,
xfs_rtblock_t rtbno)
{
+ if (xfs_has_rtgroups(mp)) {
+ xfs_rgnumber_t rgno = xfs_rtb_to_rgno(mp, rtbno);
+ xfs_rtxnum_t rtx = xfs_rtb_to_rtx(mp, rtbno);
+
+ if (rgno >= mp->m_sb.sb_rgcount)
+ return false;
+ if (rtx >= xfs_rtgroup_extents(mp, rgno))
+ return false;
+ if (xfs_has_rtsb(mp) && rgno == 0 && rtx == 0)
+ return false;
+ return true;
+ }
+
return rtbno < mp->m_sb.sb_rblocks;
}
-/* Verify that a realtime device extent is fully contained inside the volume. */
+/*
+ * Verify that an allocated realtime device extent neither points outside
+ * allocatable areas of the rtgroup, across an rtgroup boundary, nor off the
+ * end of the realtime device.
+ */
bool
xfs_verify_rtbext(
struct xfs_mount *mp,
@@ -159,7 +179,14 @@ xfs_verify_rtbext(
if (!xfs_verify_rtbno(mp, rtbno))
return false;
- return xfs_verify_rtbno(mp, rtbno + len - 1);
+ if (!xfs_verify_rtbno(mp, rtbno + len - 1))
+ return false;
+
+ if (xfs_has_rtgroups(mp) &&
+ xfs_rtb_to_rgno(mp, rtbno) != xfs_rtb_to_rgno(mp, rtbno + len - 1))
+ return false;
+
+ return true;
}
/* Calculate the range of valid icount values. */
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index cad997f38a42..d037de6dd821 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -279,8 +279,15 @@ xchk_superblock(
if (!!(sb->sb_features2 & cpu_to_be32(~v2_ok)))
xchk_block_set_corrupt(sc, bp);
- if (sb->sb_features2 != sb->sb_bad_features2)
- xchk_block_set_preen(sc, bp);
+ if (xfs_has_metadir(mp)) {
+ if (sb->sb_rgblklog != mp->m_sb.sb_rgblklog)
+ xchk_block_set_corrupt(sc, bp);
+ if (memchr_inv(sb->sb_pad, 0, sizeof(sb->sb_pad)))
+ xchk_block_set_preen(sc, bp);
+ } else {
+ if (sb->sb_features2 != sb->sb_bad_features2)
+ xchk_block_set_preen(sc, bp);
+ }
}
/* Check sb_features2 flags that are set at mkfs time. */
@@ -557,7 +564,7 @@ xchk_agf(
/* Check the AG length */
eoag = be32_to_cpu(agf->agf_length);
- if (eoag != pag->block_count)
+ if (eoag != pag_group(pag)->xg_block_count)
xchk_block_set_corrupt(sc, sc->sa.agf_bp);
/* Check the AGF btree roots and levels */
@@ -937,7 +944,7 @@ xchk_agi(
/* Check the AG length */
eoag = be32_to_cpu(agi->agi_length);
- if (eoag != pag->block_count)
+ if (eoag != pag_group(pag)->xg_block_count)
xchk_block_set_corrupt(sc, sc->sa.agi_bp);
/* Check btree roots and levels */
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 0ea04d6e21cd..0fad0baaba2f 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -209,7 +209,7 @@ xrep_agf_init_header(
agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
agf->agf_seqno = cpu_to_be32(pag_agno(pag));
- agf->agf_length = cpu_to_be32(pag->block_count);
+ agf->agf_length = cpu_to_be32(pag_group(pag)->xg_block_count);
agf->agf_flfirst = old_agf->agf_flfirst;
agf->agf_fllast = old_agf->agf_fllast;
agf->agf_flcount = old_agf->agf_flcount;
@@ -898,7 +898,7 @@ xrep_agi_init_header(
agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
agi->agi_seqno = cpu_to_be32(pag_agno(pag));
- agi->agi_length = cpu_to_be32(pag->block_count);
+ agi->agi_length = cpu_to_be32(pag_group(pag)->xg_block_count);
agi->agi_newino = cpu_to_be32(NULLAGINO);
agi->agi_dirino = cpu_to_be32(NULLAGINO);
if (xfs_has_crc(mp))
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 008630b2b752..7e00312225ed 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -834,9 +834,12 @@ xchk_bmap_iext_mapping(
/* Are these two mappings contiguous with each other? */
static inline bool
xchk_are_bmaps_contiguous(
+ const struct xchk_bmap_info *info,
const struct xfs_bmbt_irec *b1,
const struct xfs_bmbt_irec *b2)
{
+ struct xfs_mount *mp = info->sc->mp;
+
/* Don't try to combine unallocated mappings. */
if (!xfs_bmap_is_real_extent(b1))
return false;
@@ -850,6 +853,17 @@ xchk_are_bmaps_contiguous(
return false;
if (b1->br_state != b2->br_state)
return false;
+
+ /*
+ * Don't combine bmaps that would cross rtgroup boundaries. This is a
+ * valid state, but if combined they will fail rtb extent checks.
+ */
+ if (info->is_rt && xfs_has_rtgroups(mp)) {
+ if (xfs_rtb_to_rgno(mp, b1->br_startblock) !=
+ xfs_rtb_to_rgno(mp, b2->br_startblock))
+ return false;
+ }
+
return true;
}
@@ -887,7 +901,7 @@ xchk_bmap_iext_iter(
* that we just read, if possible.
*/
while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) {
- if (!xchk_are_bmaps_contiguous(irec, &got))
+ if (!xchk_are_bmaps_contiguous(info, irec, &got))
break;
if (!xchk_bmap_iext_mapping(info, &got)) {
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 672ed48d4a9f..9ff3cafd8679 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -79,9 +79,11 @@ int xchk_setup_metapath(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_RT
int xchk_setup_rtbitmap(struct xfs_scrub *sc);
int xchk_setup_rtsummary(struct xfs_scrub *sc);
+int xchk_setup_rgsuperblock(struct xfs_scrub *sc);
#else
# define xchk_setup_rtbitmap xchk_setup_nothing
# define xchk_setup_rtsummary xchk_setup_nothing
+# define xchk_setup_rgsuperblock xchk_setup_nothing
#endif
#ifdef CONFIG_XFS_QUOTA
int xchk_ino_dqattach(struct xfs_scrub *sc);
diff --git a/fs/xfs/scrub/fscounters_repair.c b/fs/xfs/scrub/fscounters_repair.c
index 469bf645dbea..cda13447a373 100644
--- a/fs/xfs/scrub/fscounters_repair.c
+++ b/fs/xfs/scrub/fscounters_repair.c
@@ -68,15 +68,16 @@ xrep_fscounters(
/*
* Online repair is only supported on v5 file systems, which require
- * lazy sb counters and thus no update of sb_fdblocks here. But as of
- * now we don't support lazy counting sb_frextents yet, and thus need
- * to also update it directly here. And for that we need to keep
+ * lazy sb counters and thus no update of sb_fdblocks here. But
+ * sb_frextents only uses a lazy counter with rtgroups, and thus needs
+ * to be updated directly here otherwise. And for that we need to keep
* track of the delalloc reservations separately, as they are are
* subtracted from m_frextents, but not included in sb_frextents.
*/
percpu_counter_set(&mp->m_frextents,
fsc->frextents - fsc->frextents_delayed);
- mp->m_sb.sb_frextents = fsc->frextents;
+ if (!xfs_has_rtgroups(mp))
+ mp->m_sb.sb_frextents = fsc->frextents;
return 0;
}
diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c
index b8b92ff3a573..ce86bdad37fa 100644
--- a/fs/xfs/scrub/health.c
+++ b/fs/xfs/scrub/health.c
@@ -12,6 +12,7 @@
#include "xfs_btree.h"
#include "xfs_ag.h"
#include "xfs_health.h"
+#include "xfs_rtgroup.h"
#include "scrub/scrub.h"
#include "scrub/health.h"
#include "scrub/common.h"
@@ -71,9 +72,9 @@
enum xchk_health_group {
XHG_FS = 1,
- XHG_RT,
XHG_AG,
XHG_INO,
+ XHG_RTGROUP,
};
struct xchk_health_map {
@@ -100,8 +101,8 @@ static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = {
[XFS_SCRUB_TYPE_XATTR] = { XHG_INO, XFS_SICK_INO_XATTR },
[XFS_SCRUB_TYPE_SYMLINK] = { XHG_INO, XFS_SICK_INO_SYMLINK },
[XFS_SCRUB_TYPE_PARENT] = { XHG_INO, XFS_SICK_INO_PARENT },
- [XFS_SCRUB_TYPE_RTBITMAP] = { XHG_RT, XFS_SICK_RT_BITMAP },
- [XFS_SCRUB_TYPE_RTSUM] = { XHG_RT, XFS_SICK_RT_SUMMARY },
+ [XFS_SCRUB_TYPE_RTBITMAP] = { XHG_RTGROUP, XFS_SICK_RG_BITMAP },
+ [XFS_SCRUB_TYPE_RTSUM] = { XHG_RTGROUP, XFS_SICK_RG_SUMMARY },
[XFS_SCRUB_TYPE_UQUOTA] = { XHG_FS, XFS_SICK_FS_UQUOTA },
[XFS_SCRUB_TYPE_GQUOTA] = { XHG_FS, XFS_SICK_FS_GQUOTA },
[XFS_SCRUB_TYPE_PQUOTA] = { XHG_FS, XFS_SICK_FS_PQUOTA },
@@ -110,6 +111,7 @@ static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = {
[XFS_SCRUB_TYPE_NLINKS] = { XHG_FS, XFS_SICK_FS_NLINKS },
[XFS_SCRUB_TYPE_DIRTREE] = { XHG_INO, XFS_SICK_INO_DIRTREE },
[XFS_SCRUB_TYPE_METAPATH] = { XHG_FS, XFS_SICK_FS_METAPATH },
+ [XFS_SCRUB_TYPE_RGSUPER] = { XHG_RTGROUP, XFS_SICK_RG_SUPER },
};
/* Return the health status mask for this scrub type. */
@@ -162,11 +164,13 @@ xchk_mark_all_healthy(
struct xfs_mount *mp)
{
struct xfs_perag *pag = NULL;
+ struct xfs_rtgroup *rtg = NULL;
xfs_fs_mark_healthy(mp, XFS_SICK_FS_INDIRECT);
- xfs_rt_mark_healthy(mp, XFS_SICK_RT_INDIRECT);
while ((pag = xfs_perag_next(mp, pag)))
xfs_group_mark_healthy(pag_group(pag), XFS_SICK_AG_INDIRECT);
+ while ((rtg = xfs_rtgroup_next(mp, rtg)))
+ xfs_group_mark_healthy(rtg_group(rtg), XFS_SICK_RG_INDIRECT);
}
/*
@@ -184,6 +188,7 @@ xchk_update_health(
struct xfs_scrub *sc)
{
struct xfs_perag *pag;
+ struct xfs_rtgroup *rtg;
bool bad;
/*
@@ -236,11 +241,13 @@ xchk_update_health(
else
xfs_fs_mark_healthy(sc->mp, sc->sick_mask);
break;
- case XHG_RT:
+ case XHG_RTGROUP:
+ rtg = xfs_rtgroup_get(sc->mp, sc->sm->sm_agno);
if (bad)
- xfs_rt_mark_corrupt(sc->mp, sc->sick_mask);
+ xfs_group_mark_corrupt(rtg_group(rtg), sc->sick_mask);
else
- xfs_rt_mark_healthy(sc->mp, sc->sick_mask);
+ xfs_group_mark_healthy(rtg_group(rtg), sc->sick_mask);
+ xfs_rtgroup_put(rtg);
break;
default:
ASSERT(0);
@@ -295,6 +302,7 @@ xchk_health_record(
{
struct xfs_mount *mp = sc->mp;
struct xfs_perag *pag = NULL;
+ struct xfs_rtgroup *rtg = NULL;
unsigned int sick;
unsigned int checked;
@@ -302,15 +310,17 @@ xchk_health_record(
if (sick & XFS_SICK_FS_PRIMARY)
xchk_set_corrupt(sc);
- xfs_rt_measure_sickness(mp, &sick, &checked);
- if (sick & XFS_SICK_RT_PRIMARY)
- xchk_set_corrupt(sc);
-
while ((pag = xfs_perag_next(mp, pag))) {
xfs_group_measure_sickness(pag_group(pag), &sick, &checked);
if (sick & XFS_SICK_AG_PRIMARY)
xchk_set_corrupt(sc);
}
+ while ((rtg = xfs_rtgroup_next(mp, rtg))) {
+ xfs_group_measure_sickness(rtg_group(rtg), &sick, &checked);
+ if (sick & XFS_SICK_RG_PRIMARY)
+ xchk_set_corrupt(sc);
+ }
+
return 0;
}
diff --git a/fs/xfs/scrub/metapath.c b/fs/xfs/scrub/metapath.c
index edc1a395c401..b8e427fd7fa7 100644
--- a/fs/xfs/scrub/metapath.c
+++ b/fs/xfs/scrub/metapath.c
@@ -20,6 +20,7 @@
#include "xfs_bmap_btree.h"
#include "xfs_trans_space.h"
#include "xfs_attr.h"
+#include "xfs_rtgroup.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -79,6 +80,91 @@ xchk_metapath_cleanup(
kfree(mpath->path);
}
+/* Set up a metadir path scan. @path must be dynamically allocated. */
+static inline int
+xchk_setup_metapath_scan(
+ struct xfs_scrub *sc,
+ struct xfs_inode *dp,
+ const char *path,
+ struct xfs_inode *ip)
+{
+ struct xchk_metapath *mpath;
+ int error;
+
+ if (!path)
+ return -ENOMEM;
+
+ error = xchk_install_live_inode(sc, ip);
+ if (error) {
+ kfree(path);
+ return error;
+ }
+
+ mpath = kzalloc(sizeof(struct xchk_metapath), XCHK_GFP_FLAGS);
+ if (!mpath) {
+ kfree(path);
+ return -ENOMEM;
+ }
+
+ mpath->sc = sc;
+ sc->buf = mpath;
+ sc->buf_cleanup = xchk_metapath_cleanup;
+
+ mpath->dp = dp;
+ mpath->path = path; /* path is now owned by mpath */
+
+ mpath->xname.name = mpath->path;
+ mpath->xname.len = strlen(mpath->path);
+ mpath->xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode);
+
+ return 0;
+}
+
+#ifdef CONFIG_XFS_RT
+/* Scan the /rtgroups directory itself. */
+static int
+xchk_setup_metapath_rtdir(
+ struct xfs_scrub *sc)
+{
+ if (!sc->mp->m_rtdirip)
+ return -ENOENT;
+
+ return xchk_setup_metapath_scan(sc, sc->mp->m_metadirip,
+ kasprintf(GFP_KERNEL, "rtgroups"), sc->mp->m_rtdirip);
+}
+
+/* Scan a rtgroup inode under the /rtgroups directory. */
+static int
+xchk_setup_metapath_rtginode(
+ struct xfs_scrub *sc,
+ enum xfs_rtg_inodes type)
+{
+ struct xfs_rtgroup *rtg;
+ struct xfs_inode *ip;
+ int error;
+
+ rtg = xfs_rtgroup_get(sc->mp, sc->sm->sm_agno);
+ if (!rtg)
+ return -ENOENT;
+
+ ip = rtg->rtg_inodes[type];
+ if (!ip) {
+ error = -ENOENT;
+ goto out_put_rtg;
+ }
+
+ error = xchk_setup_metapath_scan(sc, sc->mp->m_rtdirip,
+ xfs_rtginode_path(rtg_rgno(rtg), type), ip);
+
+out_put_rtg:
+ xfs_rtgroup_put(rtg);
+ return error;
+}
+#else
+# define xchk_setup_metapath_rtdir(...) (-ENOENT)
+# define xchk_setup_metapath_rtginode(...) (-ENOENT)
+#endif /* CONFIG_XFS_RT */
+
int
xchk_setup_metapath(
struct xfs_scrub *sc)
@@ -94,6 +180,12 @@ xchk_setup_metapath(
if (sc->sm->sm_agno)
return -EINVAL;
return 0;
+ case XFS_SCRUB_METAPATH_RTDIR:
+ return xchk_setup_metapath_rtdir(sc);
+ case XFS_SCRUB_METAPATH_RTBITMAP:
+ return xchk_setup_metapath_rtginode(sc, XFS_RTGI_BITMAP);
+ case XFS_SCRUB_METAPATH_RTSUMMARY:
+ return xchk_setup_metapath_rtginode(sc, XFS_RTGI_SUMMARY);
default:
return -ENOENT;
}
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 3fa009126170..91c8bc055a4f 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -306,7 +306,7 @@ xrep_calc_ag_resblks(
/* Now grab the block counters from the AGF. */
error = xfs_alloc_read_agf(pag, NULL, 0, &bp);
if (error) {
- aglen = pag->block_count;
+ aglen = pag_group(pag)->xg_block_count;
freelen = aglen;
usedlen = aglen;
} else {
@@ -326,9 +326,9 @@ xrep_calc_ag_resblks(
/* If the block counts are impossible, make worst-case assumptions. */
if (aglen == NULLAGBLOCK ||
- aglen != pag->block_count ||
+ aglen != pag_group(pag)->xg_block_count ||
freelen >= aglen) {
- aglen = pag->block_count;
+ aglen = pag_group(pag)->xg_block_count;
freelen = aglen;
usedlen = aglen;
}
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 405218574391..b649da1a93eb 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -146,9 +146,11 @@ int xrep_metapath(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_RT
int xrep_rtbitmap(struct xfs_scrub *sc);
int xrep_rtsummary(struct xfs_scrub *sc);
+int xrep_rgsuperblock(struct xfs_scrub *sc);
#else
# define xrep_rtbitmap xrep_notsupported
# define xrep_rtsummary xrep_notsupported
+# define xrep_rgsuperblock xrep_notsupported
#endif /* CONFIG_XFS_RT */
#ifdef CONFIG_XFS_QUOTA
@@ -253,6 +255,7 @@ static inline int xrep_setup_symlink(struct xfs_scrub *sc, unsigned int *x)
#define xrep_symlink xrep_notsupported
#define xrep_dirtree xrep_notsupported
#define xrep_metapath xrep_notsupported
+#define xrep_rgsuperblock xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/rgsuper.c b/fs/xfs/scrub/rgsuper.c
new file mode 100644
index 000000000000..463b3573bb76
--- /dev/null
+++ b/fs/xfs/scrub/rgsuper.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2022-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_rtgroup.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/repair.h"
+
+/* Set us up with a transaction and an empty context. */
+int
+xchk_setup_rgsuperblock(
+ struct xfs_scrub *sc)
+{
+ return xchk_trans_alloc(sc, 0);
+}
+
+/* Cross-reference with the other rt metadata. */
+STATIC void
+xchk_rgsuperblock_xref(
+ struct xfs_scrub *sc)
+{
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ xchk_xref_is_used_rt_space(sc, xfs_rgbno_to_rtb(sc->sr.rtg, 0), 1);
+}
+
+int
+xchk_rgsuperblock(
+ struct xfs_scrub *sc)
+{
+ xfs_rgnumber_t rgno = sc->sm->sm_agno;
+ int error;
+
+ /*
+ * Only rtgroup 0 has a superblock. We may someday want to use higher
+ * rgno for other functions, similar to what we do with the primary
+ * super scrub function.
+ */
+ if (rgno != 0)
+ return -ENOENT;
+
+ /*
+ * Grab an active reference to the rtgroup structure. If we can't get
+ * it, we're racing with something that's tearing down the group, so
+ * signal that the group no longer exists. Take the rtbitmap in shared
+ * mode so that the group can't change while we're doing things.
+ */
+ error = xchk_rtgroup_init_existing(sc, rgno, &sc->sr);
+ if (!xchk_xref_process_error(sc, 0, 0, &error))
+ return error;
+
+ xchk_rtgroup_lock(&sc->sr, XFS_RTGLOCK_BITMAP_SHARED);
+
+ /*
+ * Since we already validated the rt superblock at mount time, we don't
+ * need to check its contents again. All we need is to cross-reference.
+ */
+ xchk_rgsuperblock_xref(sc);
+ return 0;
+}
+
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+int
+xrep_rgsuperblock(
+ struct xfs_scrub *sc)
+{
+ ASSERT(rtg_rgno(sc->sr.rtg) == 0);
+
+ xfs_log_sb(sc->tp);
+ return 0;
+}
+#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c
index 8f3f69b26cad..49fc6250bafc 100644
--- a/fs/xfs/scrub/rtsummary.c
+++ b/fs/xfs/scrub/rtsummary.c
@@ -151,6 +151,11 @@ xchk_rtsum_inc(
struct xfs_mount *mp,
union xfs_suminfo_raw *v)
{
+ if (xfs_has_rtgroups(mp)) {
+ be32_add_cpu(&v->rtg, 1);
+ return be32_to_cpu(v->rtg);
+ }
+
v->old += 1;
return v->old;
}
diff --git a/fs/xfs/scrub/rtsummary_repair.c b/fs/xfs/scrub/rtsummary_repair.c
index 168838098800..8198ea84ad70 100644
--- a/fs/xfs/scrub/rtsummary_repair.c
+++ b/fs/xfs/scrub/rtsummary_repair.c
@@ -83,12 +83,23 @@ xrep_rtsummary_prep_buf(
ondisk = xfs_rsumblock_infoptr(&rts->args, 0);
rts->args.sumbp = NULL;
- bp->b_ops = &xfs_rtbuf_ops;
-
error = xfsum_copyout(sc, rts->prep_wordoff, ondisk, mp->m_blockwsize);
if (error)
return error;
+ if (xfs_has_rtgroups(sc->mp)) {
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+
+ hdr->rt_magic = cpu_to_be32(XFS_RTSUMMARY_MAGIC);
+ hdr->rt_owner = cpu_to_be64(sc->ip->i_ino);
+ hdr->rt_blkno = cpu_to_be64(xfs_buf_daddr(bp));
+ hdr->rt_lsn = 0;
+ uuid_copy(&hdr->rt_uuid, &sc->mp->m_sb.sb_meta_uuid);
+ bp->b_ops = &xfs_rtsummary_buf_ops;
+ } else {
+ bp->b_ops = &xfs_rtbuf_ops;
+ }
+
rts->prep_wordoff += mp->m_blockwsize;
xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_RTSUMMARY_BUF);
return 0;
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 8cd7e36c0999..950f5a58dcd9 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -451,6 +451,13 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.has = xfs_has_metadir,
.repair = xrep_metapath,
},
+ [XFS_SCRUB_TYPE_RGSUPER] = { /* realtime group superblock */
+ .type = ST_RTGROUP,
+ .setup = xchk_setup_rgsuperblock,
+ .scrub = xchk_rgsuperblock,
+ .has = xfs_has_rtsb,
+ .repair = xrep_rgsuperblock,
+ },
};
static int
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index f73c6d0d90a1..a7fda3e2b013 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -273,9 +273,11 @@ int xchk_metapath(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_RT
int xchk_rtbitmap(struct xfs_scrub *sc);
int xchk_rtsummary(struct xfs_scrub *sc);
+int xchk_rgsuperblock(struct xfs_scrub *sc);
#else
# define xchk_rtbitmap xchk_nothing
# define xchk_rtsummary xchk_nothing
+# define xchk_rgsuperblock xchk_nothing
#endif
#ifdef CONFIG_XFS_QUOTA
int xchk_quota(struct xfs_scrub *sc);
diff --git a/fs/xfs/scrub/stats.c b/fs/xfs/scrub/stats.c
index edcd02dc2e62..a476c7b2ab75 100644
--- a/fs/xfs/scrub/stats.c
+++ b/fs/xfs/scrub/stats.c
@@ -81,6 +81,7 @@ static const char *name_map[XFS_SCRUB_TYPE_NR] = {
[XFS_SCRUB_TYPE_NLINKS] = "nlinks",
[XFS_SCRUB_TYPE_DIRTREE] = "dirtree",
[XFS_SCRUB_TYPE_METAPATH] = "metapath",
+ [XFS_SCRUB_TYPE_RGSUPER] = "rgsuper",
};
/* Format the scrub stats into a text buffer, similar to pcp style. */
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index b6c8d0944fa4..9b38f5ad1eaf 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -71,6 +71,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_HEALTHY);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_DIRTREE);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BARRIER);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_METAPATH);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RGSUPER);
#define XFS_SCRUB_TYPE_STRINGS \
{ XFS_SCRUB_TYPE_PROBE, "probe" }, \
@@ -103,7 +104,8 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_METAPATH);
{ XFS_SCRUB_TYPE_HEALTHY, "healthy" }, \
{ XFS_SCRUB_TYPE_DIRTREE, "dirtree" }, \
{ XFS_SCRUB_TYPE_BARRIER, "barrier" }, \
- { XFS_SCRUB_TYPE_METAPATH, "metapath" }
+ { XFS_SCRUB_TYPE_METAPATH, "metapath" }, \
+ { XFS_SCRUB_TYPE_RGSUPER, "rgsuper" }
#define XFS_SCRUB_FLAG_STRINGS \
{ XFS_SCRUB_IFLAG_REPAIR, "repair" }, \
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 37dab184c2df..3d52e9d7ad57 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -318,14 +318,16 @@ xfs_bmap_update_create_done(
return &budp->bud_item;
}
-/* Take a passive ref to the AG containing the space we're mapping. */
+/* Take a passive ref to the group containing the space we're mapping. */
static inline void
xfs_bmap_update_get_group(
struct xfs_mount *mp,
struct xfs_bmap_intent *bi)
{
+ enum xfs_group_type type = XG_TYPE_AG;
+
if (xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork))
- return;
+ type = XG_TYPE_RTG;
/*
* Bump the intent count on behalf of the deferred rmap and refcount
@@ -335,7 +337,7 @@ xfs_bmap_update_get_group(
* remains nonzero across the transaction roll.
*/
bi->bi_group = xfs_group_intent_get(mp, bi->bi_bmap.br_startblock,
- XG_TYPE_AG);
+ type);
}
/* Add this deferred BUI to the transaction. */
@@ -344,8 +346,6 @@ xfs_bmap_defer_add(
struct xfs_trans *tp,
struct xfs_bmap_intent *bi)
{
- trace_xfs_bmap_defer(bi);
-
xfs_bmap_update_get_group(tp->t_mountp, bi);
/*
@@ -358,18 +358,9 @@ xfs_bmap_defer_add(
*/
if (bi->bi_type == XFS_BMAP_MAP)
bi->bi_owner->i_delayed_blks += bi->bi_bmap.br_blockcount;
- xfs_defer_add(tp, &bi->bi_list, &xfs_bmap_update_defer_type);
-}
-
-/* Release a passive AG ref after finishing mapping work. */
-static inline void
-xfs_bmap_update_put_group(
- struct xfs_bmap_intent *bi)
-{
- if (xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork))
- return;
- xfs_group_intent_put(bi->bi_group);
+ trace_xfs_bmap_defer(bi);
+ xfs_defer_add(tp, &bi->bi_list, &xfs_bmap_update_defer_type);
}
/* Cancel a deferred bmap update. */
@@ -382,7 +373,7 @@ xfs_bmap_update_cancel_item(
if (bi->bi_type == XFS_BMAP_MAP)
bi->bi_owner->i_delayed_blks -= bi->bi_bmap.br_blockcount;
- xfs_bmap_update_put_group(bi);
+ xfs_group_intent_put(bi->bi_group);
kmem_cache_free(xfs_bmap_intent_cache, bi);
}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 2c3b3ff6d7be..a59bbe767a7d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -532,7 +532,7 @@ xfs_can_free_eofblocks(
*/
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
if (xfs_inode_has_bigrtalloc(ip))
- end_fsb = xfs_rtb_roundup_rtx(mp, end_fsb);
+ end_fsb = xfs_fileoff_roundup_rtx(mp, end_fsb);
last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
if (last_fsb <= end_fsb)
return false;
@@ -854,8 +854,8 @@ xfs_free_file_space(
/* We can only free complete realtime extents. */
if (xfs_inode_has_bigrtalloc(ip)) {
- startoffset_fsb = xfs_rtb_roundup_rtx(mp, startoffset_fsb);
- endoffset_fsb = xfs_rtb_rounddown_rtx(mp, endoffset_fsb);
+ startoffset_fsb = xfs_fileoff_roundup_rtx(mp, startoffset_fsb);
+ endoffset_fsb = xfs_fileoff_rounddown_rtx(mp, endoffset_fsb);
}
/*
@@ -1523,6 +1523,18 @@ xfs_swap_extents(
goto out_unlock;
}
+ /*
+ * The rmapbt implementation is unable to resume a swapext operation
+ * after a crash if the allocation unit size is larger than a block.
+ * This (deprecated) interface will not be upgraded to handle this
+ * situation. Defragmentation must be performed with the commit range
+ * ioctl.
+ */
+ if (XFS_IS_REALTIME_INODE(ip) && xfs_has_rtgroups(ip->i_mount)) {
+ error = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
error = xfs_qm_dqattach(ip);
if (error)
goto out_unlock;
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index c8259ee482fd..3d0c6402cb36 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -26,6 +26,7 @@
#include "xfs_ag.h"
#include "xfs_sb.h"
#include "xfs_rtgroup.h"
+#include "xfs_rtbitmap.h"
/*
* This is the number of entries in the l_buf_cancel_table used during
@@ -394,9 +395,18 @@ xlog_recover_validate_buf_type(
break;
#ifdef CONFIG_XFS_RT
case XFS_BLFT_RTBITMAP_BUF:
+ if (xfs_has_rtgroups(mp) && magic32 != XFS_RTBITMAP_MAGIC) {
+ warnmsg = "Bad rtbitmap magic!";
+ break;
+ }
+ bp->b_ops = xfs_rtblock_ops(mp, XFS_RTGI_BITMAP);
+ break;
case XFS_BLFT_RTSUMMARY_BUF:
- /* no magic numbers for verification of RT buffers */
- bp->b_ops = &xfs_rtbuf_ops;
+ if (xfs_has_rtgroups(mp) && magic32 != XFS_RTSUMMARY_MAGIC) {
+ warnmsg = "Bad rtsummary magic!";
+ break;
+ }
+ bp->b_ops = xfs_rtblock_ops(mp, XFS_RTGI_SUMMARY);
break;
#endif /* CONFIG_XFS_RT */
default:
@@ -815,11 +825,20 @@ xlog_recover_get_buf_lsn(
* UUIDs, so we must recover them immediately.
*/
blft = xfs_blft_from_flags(buf_f);
- if (blft == XFS_BLFT_RTBITMAP_BUF || blft == XFS_BLFT_RTSUMMARY_BUF)
+ if (!xfs_has_rtgroups(mp) && (blft == XFS_BLFT_RTBITMAP_BUF ||
+ blft == XFS_BLFT_RTSUMMARY_BUF))
goto recover_immediately;
magic32 = be32_to_cpu(*(__be32 *)blk);
switch (magic32) {
+ case XFS_RTSUMMARY_MAGIC:
+ case XFS_RTBITMAP_MAGIC: {
+ struct xfs_rtbuf_blkinfo *hdr = blk;
+
+ lsn = be64_to_cpu(hdr->rt_lsn);
+ uuid = &hdr->rt_uuid;
+ break;
+ }
case XFS_ABTB_CRC_MAGIC:
case XFS_ABTC_CRC_MAGIC:
case XFS_ABTB_MAGIC:
@@ -1061,6 +1080,18 @@ xlog_recover_buf_commit_pass2(
current_lsn);
if (error)
goto out_release;
+
+ /* Update the rt superblock if we have one. */
+ if (xfs_has_rtsb(mp) && mp->m_rtsb_bp) {
+ struct xfs_buf *rtsb_bp = mp->m_rtsb_bp;
+
+ xfs_buf_lock(rtsb_bp);
+ xfs_buf_hold(rtsb_bp);
+ xfs_update_rtsb(rtsb_bp, bp);
+ rtsb_bp->b_flags |= _XBF_LOGRECOVERY;
+ xfs_buf_delwri_queue(rtsb_bp, buffer_list);
+ xfs_buf_relse(rtsb_bp);
+ }
} else {
xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
}
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 42b8b5e0e931..c4bd145f5ec1 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -73,6 +73,8 @@
* extent search so that it overlaps in flight discard IO.
*/
+#define XFS_DISCARD_MAX_EXAMINE (100)
+
struct workqueue_struct *xfs_discard_wq;
static void
@@ -101,6 +103,24 @@ xfs_discard_endio(
bio_put(bio);
}
+static inline struct block_device *
+xfs_group_bdev(
+ const struct xfs_group *xg)
+{
+ struct xfs_mount *mp = xg->xg_mount;
+
+ switch (xg->xg_type) {
+ case XG_TYPE_AG:
+ return mp->m_ddev_targp->bt_bdev;
+ case XG_TYPE_RTG:
+ return mp->m_rtdev_targp->bt_bdev;
+ default:
+ ASSERT(0);
+ break;
+ }
+ return NULL;
+}
+
/*
* Walk the discard list and issue discards on all the busy extents in the
* list. We plug and chain the bios so that we only need a single completion
@@ -118,12 +138,11 @@ xfs_discard_extents(
blk_start_plug(&plug);
list_for_each_entry(busyp, &extents->extent_list, list) {
- struct xfs_perag *pag = to_perag(busyp->group);
-
- trace_xfs_discard_extent(pag, busyp->bno, busyp->length);
+ trace_xfs_discard_extent(busyp->group, busyp->bno,
+ busyp->length);
- error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
- xfs_agbno_to_daddr(pag, busyp->bno),
+ error = __blkdev_issue_discard(xfs_group_bdev(busyp->group),
+ xfs_gbno_to_daddr(busyp->group, busyp->bno),
XFS_FSB_TO_BB(mp, busyp->length),
GFP_KERNEL, &bio);
if (error && error != -EOPNOTSUPP) {
@@ -168,7 +187,7 @@ xfs_trim_gather_extents(
struct xfs_buf *agbp;
int error;
int i;
- int batch = 100;
+ int batch = XFS_DISCARD_MAX_EXAMINE;
/*
* Force out the log. This means any transactions that might have freed
@@ -241,11 +260,11 @@ xfs_trim_gather_extents(
* overlapping ranges for now.
*/
if (fbno + flen < tcur->start) {
- trace_xfs_discard_exclude(pag, fbno, flen);
+ trace_xfs_discard_exclude(pag_group(pag), fbno, flen);
goto next_extent;
}
if (fbno > tcur->end) {
- trace_xfs_discard_exclude(pag, fbno, flen);
+ trace_xfs_discard_exclude(pag_group(pag), fbno, flen);
if (tcur->by_bno) {
tcur->count = 0;
break;
@@ -263,7 +282,7 @@ xfs_trim_gather_extents(
/* Too small? Give up. */
if (flen < tcur->minlen) {
- trace_xfs_discard_toosmall(pag, fbno, flen);
+ trace_xfs_discard_toosmall(pag_group(pag), fbno, flen);
if (tcur->by_bno)
goto next_extent;
tcur->count = 0;
@@ -275,7 +294,7 @@ xfs_trim_gather_extents(
* discard and try again the next time.
*/
if (xfs_extent_busy_search(pag_group(pag), fbno, flen)) {
- trace_xfs_discard_busy(pag, fbno, flen);
+ trace_xfs_discard_busy(pag_group(pag), fbno, flen);
goto next_extent;
}
@@ -337,7 +356,7 @@ xfs_trim_perag_extents(
};
int error = 0;
- if (start != 0 || end != pag->block_count)
+ if (start != 0 || end != pag_group(pag)->xg_block_count)
tcur.by_bno = true;
do {
@@ -403,7 +422,7 @@ xfs_trim_datadev_extents(
end_agbno = xfs_daddr_to_agbno(mp, ddev_end);
while ((pag = xfs_perag_next_range(mp, pag, start_agno, end_agno))) {
- xfs_agblock_t agend = pag->block_count;
+ xfs_agblock_t agend = pag_group(pag)->xg_block_count;
if (pag_agno(pag) == end_agno)
agend = end_agbno;
@@ -548,6 +567,7 @@ xfs_trim_gather_rtextent(
return 0;
}
+/* Trim extents on an !rtgroups realtime device */
static int
xfs_trim_rtextents(
struct xfs_rtgroup *rtg,
@@ -572,7 +592,7 @@ xfs_trim_rtextents(
* trims the extents returned.
*/
do {
- tr.stop_rtx = low + (mp->m_sb.sb_blocksize * NBBY);
+ tr.stop_rtx = low + xfs_rtbitmap_rtx_per_rbmblock(mp);
xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
error = xfs_rtalloc_query_range(rtg, tp, low, high,
xfs_trim_gather_rtextent, &tr);
@@ -602,6 +622,140 @@ xfs_trim_rtextents(
return error;
}
+struct xfs_trim_rtgroup {
+ /* list of rtgroup extents to free */
+ struct xfs_busy_extents *extents;
+
+ /* minimum length that caller allows us to trim */
+ xfs_rtblock_t minlen_fsb;
+
+ /* restart point for the rtbitmap walk */
+ xfs_rtxnum_t restart_rtx;
+
+ /* number of extents to examine before stopping to issue discard ios */
+ int batch;
+
+ /* number of extents queued for discard */
+ int queued;
+};
+
+static int
+xfs_trim_gather_rtgroup_extent(
+ struct xfs_rtgroup *rtg,
+ struct xfs_trans *tp,
+ const struct xfs_rtalloc_rec *rec,
+ void *priv)
+{
+ struct xfs_trim_rtgroup *tr = priv;
+ xfs_rgblock_t rgbno;
+ xfs_extlen_t len;
+
+ if (--tr->batch <= 0) {
+ /*
+ * If we've checked a large number of extents, update the
+ * cursor to point at this extent so we restart the next batch
+ * from this extent.
+ */
+ tr->restart_rtx = rec->ar_startext;
+ return -ECANCELED;
+ }
+
+ rgbno = xfs_rtx_to_rgbno(rtg, rec->ar_startext);
+ len = xfs_rtxlen_to_extlen(rtg_mount(rtg), rec->ar_extcount);
+
+ /* Ignore too small. */
+ if (len < tr->minlen_fsb) {
+ trace_xfs_discard_toosmall(rtg_group(rtg), rgbno, len);
+ return 0;
+ }
+
+ /*
+ * If any blocks in the range are still busy, skip the discard and try
+ * again the next time.
+ */
+ if (xfs_extent_busy_search(rtg_group(rtg), rgbno, len)) {
+ trace_xfs_discard_busy(rtg_group(rtg), rgbno, len);
+ return 0;
+ }
+
+ xfs_extent_busy_insert_discard(rtg_group(rtg), rgbno, len,
+ &tr->extents->extent_list);
+
+ tr->queued++;
+ tr->restart_rtx = rec->ar_startext + rec->ar_extcount;
+ return 0;
+}
+
+/* Trim extents in this rtgroup using the busy extent machinery. */
+static int
+xfs_trim_rtgroup_extents(
+ struct xfs_rtgroup *rtg,
+ xfs_rtxnum_t low,
+ xfs_rtxnum_t high,
+ xfs_daddr_t minlen)
+{
+ struct xfs_mount *mp = rtg_mount(rtg);
+ struct xfs_trim_rtgroup tr = {
+ .minlen_fsb = XFS_BB_TO_FSB(mp, minlen),
+ };
+ struct xfs_trans *tp;
+ int error;
+
+ error = xfs_trans_alloc_empty(mp, &tp);
+ if (error)
+ return error;
+
+ /*
+ * Walk the free ranges between low and high. The query_range function
+ * trims the extents returned.
+ */
+ do {
+ tr.extents = kzalloc(sizeof(*tr.extents), GFP_KERNEL);
+ if (!tr.extents) {
+ error = -ENOMEM;
+ break;
+ }
+
+ tr.queued = 0;
+ tr.batch = XFS_DISCARD_MAX_EXAMINE;
+ tr.extents->owner = tr.extents;
+ INIT_LIST_HEAD(&tr.extents->extent_list);
+
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
+ error = xfs_rtalloc_query_range(rtg, tp, low, high,
+ xfs_trim_gather_rtgroup_extent, &tr);
+ xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
+ if (error == -ECANCELED)
+ error = 0;
+ if (error) {
+ kfree(tr.extents);
+ break;
+ }
+
+ if (!tr.queued)
+ break;
+
+ /*
+ * We hand the extent list to the discard function here so the
+ * discarded extents can be removed from the busy extent list.
+ * This allows the discards to run asynchronously with
+ * gathering the next round of extents to discard.
+ *
+ * However, we must ensure that we do not reference the extent
+ * list after this function call, as it may have been freed by
+ * the time control returns to us.
+ */
+ error = xfs_discard_extents(rtg_mount(rtg), tr.extents);
+ if (error)
+ break;
+
+ low = tr.restart_rtx;
+ } while (!xfs_trim_should_stop() && low <= high);
+
+ xfs_trans_cancel(tp);
+ return error;
+}
+
static int
xfs_trim_rtdev_extents(
struct xfs_mount *mp,
@@ -640,7 +794,12 @@ xfs_trim_rtdev_extents(
if (rtg_rgno(rtg) == end_rgno)
rtg_end = min(rtg_end, end_rtx);
- error = xfs_trim_rtextents(rtg, start_rtx, rtg_end, minlen);
+ if (xfs_has_rtgroups(mp))
+ error = xfs_trim_rtgroup_extents(rtg, start_rtx,
+ rtg_end, minlen);
+ else
+ error = xfs_trim_rtextents(rtg, start_rtx, rtg_end,
+ minlen);
if (error)
last_error = error;
diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c
index 75cb53f090d1..f644c4cc77fa 100644
--- a/fs/xfs/xfs_exchrange.c
+++ b/fs/xfs/xfs_exchrange.c
@@ -217,7 +217,7 @@ xfs_exchrange_mappings(
* length in @fxr are safe to round up.
*/
if (xfs_inode_has_bigrtalloc(ip2))
- req.blockcount = xfs_rtb_roundup_rtx(mp, req.blockcount);
+ req.blockcount = xfs_blen_roundup_rtx(mp, req.blockcount);
error = xfs_exchrange_estimate(&req);
if (error)
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
index 457a27ab8375..ea43c9a6e54c 100644
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -18,6 +18,7 @@
#include "xfs_trans.h"
#include "xfs_log.h"
#include "xfs_ag.h"
+#include "xfs_rtgroup.h"
struct xfs_extent_busy_tree {
spinlock_t eb_lock;
@@ -665,9 +666,14 @@ xfs_extent_busy_wait_all(
struct xfs_mount *mp)
{
struct xfs_perag *pag = NULL;
+ struct xfs_rtgroup *rtg = NULL;
while ((pag = xfs_perag_next(mp, pag)))
xfs_extent_busy_wait_group(pag_group(pag));
+
+ if (xfs_has_rtgroups(mp))
+ while ((rtg = xfs_rtgroup_next(mp, rtg)))
+ xfs_extent_busy_wait_group(rtg_group(rtg));
}
/*
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index e469510986e8..a25c713ff888 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -25,6 +25,10 @@
#include "xfs_error.h"
#include "xfs_log_priv.h"
#include "xfs_log_recover.h"
+#include "xfs_rtalloc.h"
+#include "xfs_inode.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
struct kmem_cache *xfs_efi_cache;
struct kmem_cache *xfs_efd_cache;
@@ -95,16 +99,15 @@ xfs_efi_item_format(
ASSERT(atomic_read(&efip->efi_next_extent) ==
efip->efi_format.efi_nextents);
+ ASSERT(lip->li_type == XFS_LI_EFI || lip->li_type == XFS_LI_EFI_RT);
- efip->efi_format.efi_type = XFS_LI_EFI;
+ efip->efi_format.efi_type = lip->li_type;
efip->efi_format.efi_size = 1;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT,
- &efip->efi_format,
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT, &efip->efi_format,
xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents));
}
-
/*
* The unpin operation is the last place an EFI is manipulated in the log. It is
* either inserted in the AIL or aborted in the event of a log I/O error. In
@@ -140,12 +143,14 @@ xfs_efi_item_release(
STATIC struct xfs_efi_log_item *
xfs_efi_init(
struct xfs_mount *mp,
+ unsigned short item_type,
uint nextents)
-
{
struct xfs_efi_log_item *efip;
+ ASSERT(item_type == XFS_LI_EFI || item_type == XFS_LI_EFI_RT);
ASSERT(nextents > 0);
+
if (nextents > XFS_EFI_MAX_FAST_EXTENTS) {
efip = kzalloc(xfs_efi_log_item_sizeof(nextents),
GFP_KERNEL | __GFP_NOFAIL);
@@ -154,7 +159,7 @@ xfs_efi_init(
GFP_KERNEL | __GFP_NOFAIL);
}
- xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
+ xfs_log_item_init(mp, &efip->efi_item, item_type, &xfs_efi_item_ops);
efip->efi_format.efi_nextents = nextents;
efip->efi_format.efi_id = (uintptr_t)(void *)efip;
atomic_set(&efip->efi_next_extent, 0);
@@ -264,12 +269,12 @@ xfs_efd_item_format(
struct xfs_log_iovec *vecp = NULL;
ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents);
+ ASSERT(lip->li_type == XFS_LI_EFD || lip->li_type == XFS_LI_EFD_RT);
- efdp->efd_format.efd_type = XFS_LI_EFD;
+ efdp->efd_format.efd_type = lip->li_type;
efdp->efd_format.efd_size = 1;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT,
- &efdp->efd_format,
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT, &efdp->efd_format,
xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents));
}
@@ -308,6 +313,14 @@ static inline struct xfs_extent_free_item *xefi_entry(const struct list_head *e)
return list_entry(e, struct xfs_extent_free_item, xefi_list);
}
+static inline bool
+xfs_efi_item_isrt(const struct xfs_log_item *lip)
+{
+ ASSERT(lip->li_type == XFS_LI_EFI || lip->li_type == XFS_LI_EFI_RT);
+
+ return lip->li_type == XFS_LI_EFI_RT;
+}
+
/*
* Fill the EFD with all extents from the EFI when we need to roll the
* transaction and continue with a new EFI.
@@ -388,18 +401,20 @@ xfs_extent_free_log_item(
}
static struct xfs_log_item *
-xfs_extent_free_create_intent(
+__xfs_extent_free_create_intent(
struct xfs_trans *tp,
struct list_head *items,
unsigned int count,
- bool sort)
+ bool sort,
+ unsigned short item_type)
{
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_efi_log_item *efip = xfs_efi_init(mp, count);
+ struct xfs_efi_log_item *efip;
struct xfs_extent_free_item *xefi;
ASSERT(count > 0);
+ efip = xfs_efi_init(mp, item_type, count);
if (sort)
list_sort(mp, items, xfs_extent_free_diff_items);
list_for_each_entry(xefi, items, xefi_list)
@@ -407,6 +422,23 @@ xfs_extent_free_create_intent(
return &efip->efi_item;
}
+static struct xfs_log_item *
+xfs_extent_free_create_intent(
+ struct xfs_trans *tp,
+ struct list_head *items,
+ unsigned int count,
+ bool sort)
+{
+ return __xfs_extent_free_create_intent(tp, items, count, sort,
+ XFS_LI_EFI);
+}
+
+static inline unsigned short
+xfs_efd_type_from_efi(const struct xfs_efi_log_item *efip)
+{
+ return xfs_efi_item_isrt(&efip->efi_item) ? XFS_LI_EFD_RT : XFS_LI_EFD;
+}
+
/* Get an EFD so we can process all the free extents. */
static struct xfs_log_item *
xfs_extent_free_create_done(
@@ -427,8 +459,8 @@ xfs_extent_free_create_done(
GFP_KERNEL | __GFP_NOFAIL);
}
- xfs_log_item_init(tp->t_mountp, &efdp->efd_item, XFS_LI_EFD,
- &xfs_efd_item_ops);
+ xfs_log_item_init(tp->t_mountp, &efdp->efd_item,
+ xfs_efd_type_from_efi(efip), &xfs_efd_item_ops);
efdp->efd_efip = efip;
efdp->efd_format.efd_nextents = count;
efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
@@ -436,6 +468,17 @@ xfs_extent_free_create_done(
return &efdp->efd_item;
}
+static inline const struct xfs_defer_op_type *
+xefi_ops(
+ struct xfs_extent_free_item *xefi)
+{
+ if (xfs_efi_is_realtime(xefi))
+ return &xfs_rtextent_free_defer_type;
+ if (xefi->xefi_agresv == XFS_AG_RESV_AGFL)
+ return &xfs_agfl_free_defer_type;
+ return &xfs_extent_free_defer_type;
+}
+
/* Add this deferred EFI to the transaction. */
void
xfs_extent_free_defer_add(
@@ -445,16 +488,11 @@ xfs_extent_free_defer_add(
{
struct xfs_mount *mp = tp->t_mountp;
- trace_xfs_extent_free_defer(mp, xefi);
-
xefi->xefi_group = xfs_group_intent_get(mp, xefi->xefi_startblock,
- XG_TYPE_AG);
- if (xefi->xefi_agresv == XFS_AG_RESV_AGFL)
- *dfpp = xfs_defer_add(tp, &xefi->xefi_list,
- &xfs_agfl_free_defer_type);
- else
- *dfpp = xfs_defer_add(tp, &xefi->xefi_list,
- &xfs_extent_free_defer_type);
+ xfs_efi_is_realtime(xefi) ? XG_TYPE_RTG : XG_TYPE_AG);
+
+ trace_xfs_extent_free_defer(mp, xefi);
+ *dfpp = xfs_defer_add(tp, &xefi->xefi_list, xefi_ops(xefi));
}
/* Cancel a free extent. */
@@ -560,8 +598,12 @@ xfs_agfl_free_finish_item(
static inline bool
xfs_efi_validate_ext(
struct xfs_mount *mp,
+ bool isrt,
struct xfs_extent *extp)
{
+ if (isrt)
+ return xfs_verify_rtbext(mp, extp->ext_start, extp->ext_len);
+
return xfs_verify_fsbext(mp, extp->ext_start, extp->ext_len);
}
@@ -569,6 +611,7 @@ static inline void
xfs_efi_recover_work(
struct xfs_mount *mp,
struct xfs_defer_pending *dfp,
+ bool isrt,
struct xfs_extent *extp)
{
struct xfs_extent_free_item *xefi;
@@ -580,7 +623,9 @@ xfs_efi_recover_work(
xefi->xefi_agresv = XFS_AG_RESV_NONE;
xefi->xefi_owner = XFS_RMAP_OWN_UNKNOWN;
xefi->xefi_group = xfs_group_intent_get(mp, extp->ext_start,
- XG_TYPE_AG);
+ isrt ? XG_TYPE_RTG : XG_TYPE_AG);
+ if (isrt)
+ xefi->xefi_flags |= XFS_EFI_REALTIME;
xfs_defer_add_item(dfp, &xefi->xefi_list);
}
@@ -601,14 +646,15 @@ xfs_extent_free_recover_work(
struct xfs_trans *tp;
int i;
int error = 0;
+ bool isrt = xfs_efi_item_isrt(lip);
/*
- * First check the validity of the extents described by the
- * EFI. If any are bad, then assume that all are bad and
- * just toss the EFI.
+ * First check the validity of the extents described by the EFI. If
+ * any are bad, then assume that all are bad and just toss the EFI.
+ * Mixing RT and non-RT extents in the same EFI item is not allowed.
*/
for (i = 0; i < efip->efi_format.efi_nextents; i++) {
- if (!xfs_efi_validate_ext(mp,
+ if (!xfs_efi_validate_ext(mp, isrt,
&efip->efi_format.efi_extents[i])) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
&efip->efi_format,
@@ -616,7 +662,8 @@ xfs_extent_free_recover_work(
return -EFSCORRUPTED;
}
- xfs_efi_recover_work(mp, dfp, &efip->efi_format.efi_extents[i]);
+ xfs_efi_recover_work(mp, dfp, isrt,
+ &efip->efi_format.efi_extents[i]);
}
resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate);
@@ -654,10 +701,12 @@ xfs_extent_free_relog_intent(
count = EFI_ITEM(intent)->efi_format.efi_nextents;
extp = EFI_ITEM(intent)->efi_format.efi_extents;
+ ASSERT(intent->li_type == XFS_LI_EFI || intent->li_type == XFS_LI_EFI_RT);
+
efdp->efd_next_extent = count;
memcpy(efdp->efd_format.efd_extents, extp, count * sizeof(*extp));
- efip = xfs_efi_init(tp->t_mountp, count);
+ efip = xfs_efi_init(tp->t_mountp, intent->li_type, count);
memcpy(efip->efi_format.efi_extents, extp, count * sizeof(*extp));
atomic_set(&efip->efi_next_extent, count);
@@ -689,6 +738,72 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
.relog_intent = xfs_extent_free_relog_intent,
};
+#ifdef CONFIG_XFS_RT
+/* Create a realtime extent freeing */
+static struct xfs_log_item *
+xfs_rtextent_free_create_intent(
+ struct xfs_trans *tp,
+ struct list_head *items,
+ unsigned int count,
+ bool sort)
+{
+ return __xfs_extent_free_create_intent(tp, items, count, sort,
+ XFS_LI_EFI_RT);
+}
+
+/* Process a free realtime extent. */
+STATIC int
+xfs_rtextent_free_finish_item(
+ struct xfs_trans *tp,
+ struct xfs_log_item *done,
+ struct list_head *item,
+ struct xfs_btree_cur **state)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_extent_free_item *xefi = xefi_entry(item);
+ struct xfs_efd_log_item *efdp = EFD_ITEM(done);
+ struct xfs_rtgroup **rtgp = (struct xfs_rtgroup **)state;
+ int error = 0;
+
+ trace_xfs_extent_free_deferred(mp, xefi);
+
+ if (!(xefi->xefi_flags & XFS_EFI_CANCELLED)) {
+ if (*rtgp != to_rtg(xefi->xefi_group)) {
+ *rtgp = to_rtg(xefi->xefi_group);
+ xfs_rtgroup_lock(*rtgp, XFS_RTGLOCK_BITMAP);
+ xfs_rtgroup_trans_join(tp, *rtgp,
+ XFS_RTGLOCK_BITMAP);
+ }
+ error = xfs_rtfree_blocks(tp, *rtgp,
+ xefi->xefi_startblock, xefi->xefi_blockcount);
+ }
+ if (error == -EAGAIN) {
+ xfs_efd_from_efi(efdp);
+ return error;
+ }
+
+ xfs_efd_add_extent(efdp, xefi);
+ xfs_extent_free_cancel_item(item);
+ return error;
+}
+
+const struct xfs_defer_op_type xfs_rtextent_free_defer_type = {
+ .name = "rtextent_free",
+ .max_items = XFS_EFI_MAX_FAST_EXTENTS,
+ .create_intent = xfs_rtextent_free_create_intent,
+ .abort_intent = xfs_extent_free_abort_intent,
+ .create_done = xfs_extent_free_create_done,
+ .finish_item = xfs_rtextent_free_finish_item,
+ .cancel_item = xfs_extent_free_cancel_item,
+ .recover_work = xfs_extent_free_recover_work,
+ .relog_intent = xfs_extent_free_relog_intent,
+};
+#else
+const struct xfs_defer_op_type xfs_rtextent_free_defer_type = {
+ .name = "rtextent_free",
+};
+#endif /* CONFIG_XFS_RT */
+
STATIC bool
xfs_efi_item_match(
struct xfs_log_item *lip,
@@ -733,7 +848,7 @@ xlog_recover_efi_commit_pass2(
return -EFSCORRUPTED;
}
- efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
+ efip = xfs_efi_init(mp, ITEM_TYPE(item), efi_formatp->efi_nextents);
error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format);
if (error) {
xfs_efi_item_free(efip);
@@ -751,6 +866,58 @@ const struct xlog_recover_item_ops xlog_efi_item_ops = {
.commit_pass2 = xlog_recover_efi_commit_pass2,
};
+#ifdef CONFIG_XFS_RT
+STATIC int
+xlog_recover_rtefi_commit_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ struct xfs_mount *mp = log->l_mp;
+ struct xfs_efi_log_item *efip;
+ struct xfs_efi_log_format *efi_formatp;
+ int error;
+
+ efi_formatp = item->ri_buf[0].i_addr;
+
+ if (item->ri_buf[0].i_len < xfs_efi_log_format_sizeof(0)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
+ efip = xfs_efi_init(mp, ITEM_TYPE(item), efi_formatp->efi_nextents);
+ error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format);
+ if (error) {
+ xfs_efi_item_free(efip);
+ return error;
+ }
+ atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
+
+ xlog_recover_intent_item(log, &efip->efi_item, lsn,
+ &xfs_rtextent_free_defer_type);
+ return 0;
+}
+#else
+STATIC int
+xlog_recover_rtefi_commit_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+}
+#endif
+
+const struct xlog_recover_item_ops xlog_rtefi_item_ops = {
+ .item_type = XFS_LI_EFI_RT,
+ .commit_pass2 = xlog_recover_rtefi_commit_pass2,
+};
+
/*
* This routine is called when an EFD format structure is found in a committed
* transaction in the log. Its purpose is to cancel the corresponding EFI if it
@@ -793,3 +960,44 @@ const struct xlog_recover_item_ops xlog_efd_item_ops = {
.item_type = XFS_LI_EFD,
.commit_pass2 = xlog_recover_efd_commit_pass2,
};
+
+#ifdef CONFIG_XFS_RT
+STATIC int
+xlog_recover_rtefd_commit_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ struct xfs_efd_log_format *efd_formatp;
+ int buflen = item->ri_buf[0].i_len;
+
+ efd_formatp = item->ri_buf[0].i_addr;
+
+ if (buflen < sizeof(struct xfs_efd_log_format)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ efd_formatp, buflen);
+ return -EFSCORRUPTED;
+ }
+
+ if (item->ri_buf[0].i_len != xfs_efd_log_format32_sizeof(
+ efd_formatp->efd_nextents) &&
+ item->ri_buf[0].i_len != xfs_efd_log_format64_sizeof(
+ efd_formatp->efd_nextents)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ efd_formatp, buflen);
+ return -EFSCORRUPTED;
+ }
+
+ xlog_recover_release_intent(log, XFS_LI_EFI_RT,
+ efd_formatp->efd_efi_id);
+ return 0;
+}
+#else
+# define xlog_recover_rtefd_commit_pass2 xlog_recover_rtefi_commit_pass2
+#endif
+
+const struct xlog_recover_item_ops xlog_rtefd_item_ops = {
+ .item_type = XFS_LI_EFD_RT,
+ .commit_pass2 = xlog_recover_rtefd_commit_pass2,
+};
diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
index e5663e2ac9b8..c7c2e6561998 100644
--- a/fs/xfs/xfs_health.c
+++ b/fs/xfs/xfs_health.c
@@ -18,6 +18,22 @@
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_quota_defs.h"
+#include "xfs_rtgroup.h"
+
+static void
+xfs_health_unmount_group(
+ struct xfs_group *xg,
+ bool *warn)
+{
+ unsigned int sick = 0;
+ unsigned int checked = 0;
+
+ xfs_group_measure_sickness(xg, &sick, &checked);
+ if (sick) {
+ trace_xfs_group_unfixed_corruption(xg, sick);
+ *warn = true;
+ }
+}
/*
* Warn about metadata corruption that we detected but haven't fixed, and
@@ -29,6 +45,7 @@ xfs_health_unmount(
struct xfs_mount *mp)
{
struct xfs_perag *pag = NULL;
+ struct xfs_rtgroup *rtg = NULL;
unsigned int sick = 0;
unsigned int checked = 0;
bool warn = false;
@@ -37,21 +54,12 @@ xfs_health_unmount(
return;
/* Measure AG corruption levels. */
- while ((pag = xfs_perag_next(mp, pag))) {
- xfs_group_measure_sickness(pag_group(pag), &sick, &checked);
- if (sick) {
- trace_xfs_group_unfixed_corruption(pag_group(pag),
- sick);
- warn = true;
- }
- }
+ while ((pag = xfs_perag_next(mp, pag)))
+ xfs_health_unmount_group(pag_group(pag), &warn);
- /* Measure realtime volume corruption levels. */
- xfs_rt_measure_sickness(mp, &sick, &checked);
- if (sick) {
- trace_xfs_rt_unfixed_corruption(mp, sick);
- warn = true;
- }
+ /* Measure realtime group corruption levels. */
+ while ((rtg = xfs_rtgroup_next(mp, rtg)))
+ xfs_health_unmount_group(rtg_group(rtg), &warn);
/*
* Measure fs corruption and keep the sample around for the warning.
@@ -150,65 +158,6 @@ xfs_fs_measure_sickness(
spin_unlock(&mp->m_sb_lock);
}
-/* Mark unhealthy realtime metadata. */
-void
-xfs_rt_mark_sick(
- struct xfs_mount *mp,
- unsigned int mask)
-{
- ASSERT(!(mask & ~XFS_SICK_RT_ALL));
- trace_xfs_rt_mark_sick(mp, mask);
-
- spin_lock(&mp->m_sb_lock);
- mp->m_rt_sick |= mask;
- spin_unlock(&mp->m_sb_lock);
-}
-
-/* Mark realtime metadata as having been checked and found unhealthy by fsck. */
-void
-xfs_rt_mark_corrupt(
- struct xfs_mount *mp,
- unsigned int mask)
-{
- ASSERT(!(mask & ~XFS_SICK_RT_ALL));
- trace_xfs_rt_mark_corrupt(mp, mask);
-
- spin_lock(&mp->m_sb_lock);
- mp->m_rt_sick |= mask;
- mp->m_rt_checked |= mask;
- spin_unlock(&mp->m_sb_lock);
-}
-
-/* Mark a realtime metadata healed. */
-void
-xfs_rt_mark_healthy(
- struct xfs_mount *mp,
- unsigned int mask)
-{
- ASSERT(!(mask & ~XFS_SICK_RT_ALL));
- trace_xfs_rt_mark_healthy(mp, mask);
-
- spin_lock(&mp->m_sb_lock);
- mp->m_rt_sick &= ~mask;
- if (!(mp->m_rt_sick & XFS_SICK_RT_PRIMARY))
- mp->m_rt_sick &= ~XFS_SICK_RT_SECONDARY;
- mp->m_rt_checked |= mask;
- spin_unlock(&mp->m_sb_lock);
-}
-
-/* Sample which realtime metadata are unhealthy. */
-void
-xfs_rt_measure_sickness(
- struct xfs_mount *mp,
- unsigned int *sick,
- unsigned int *checked)
-{
- spin_lock(&mp->m_sb_lock);
- *sick = mp->m_rt_sick;
- *checked = mp->m_rt_checked;
- spin_unlock(&mp->m_sb_lock);
-}
-
/* Mark unhealthy per-ag metadata given a raw AG number. */
void
xfs_agno_mark_sick(
@@ -226,13 +175,24 @@ xfs_agno_mark_sick(
xfs_perag_put(pag);
}
+static inline void
+xfs_group_check_mask(
+ struct xfs_group *xg,
+ unsigned int mask)
+{
+ if (xg->xg_type == XG_TYPE_AG)
+ ASSERT(!(mask & ~XFS_SICK_AG_ALL));
+ else
+ ASSERT(!(mask & ~XFS_SICK_RG_ALL));
+}
+
/* Mark unhealthy per-ag metadata. */
void
xfs_group_mark_sick(
struct xfs_group *xg,
unsigned int mask)
{
- ASSERT(!(mask & ~XFS_SICK_AG_ALL));
+ xfs_group_check_mask(xg, mask);
trace_xfs_group_mark_sick(xg, mask);
spin_lock(&xg->xg_state_lock);
@@ -248,7 +208,7 @@ xfs_group_mark_corrupt(
struct xfs_group *xg,
unsigned int mask)
{
- ASSERT(!(mask & ~XFS_SICK_AG_ALL));
+ xfs_group_check_mask(xg, mask);
trace_xfs_group_mark_corrupt(xg, mask);
spin_lock(&xg->xg_state_lock);
@@ -265,7 +225,7 @@ xfs_group_mark_healthy(
struct xfs_group *xg,
unsigned int mask)
{
- ASSERT(!(mask & ~XFS_SICK_AG_ALL));
+ xfs_group_check_mask(xg, mask);
trace_xfs_group_mark_healthy(xg, mask);
spin_lock(&xg->xg_state_lock);
@@ -289,6 +249,23 @@ xfs_group_measure_sickness(
spin_unlock(&xg->xg_state_lock);
}
+/* Mark unhealthy per-rtgroup metadata given a raw rt group number. */
+void
+xfs_rgno_mark_sick(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno,
+ unsigned int mask)
+{
+ struct xfs_rtgroup *rtg = xfs_rtgroup_get(mp, rgno);
+
+ /* per-rtgroup structure not set up yet? */
+ if (!rtg)
+ return;
+
+ xfs_group_mark_sick(rtg_group(rtg), mask);
+ xfs_rtgroup_put(rtg);
+}
+
/* Mark the unhealthy parts of an inode. */
void
xfs_inode_mark_sick(
@@ -373,6 +350,9 @@ struct ioctl_sick_map {
unsigned int ioctl_mask;
};
+#define for_each_sick_map(map, m) \
+ for ((m) = (map); (m) < (map) + ARRAY_SIZE(map); (m)++)
+
static const struct ioctl_sick_map fs_map[] = {
{ XFS_SICK_FS_COUNTERS, XFS_FSOP_GEOM_SICK_COUNTERS},
{ XFS_SICK_FS_UQUOTA, XFS_FSOP_GEOM_SICK_UQUOTA },
@@ -382,13 +362,11 @@ static const struct ioctl_sick_map fs_map[] = {
{ XFS_SICK_FS_NLINKS, XFS_FSOP_GEOM_SICK_NLINKS },
{ XFS_SICK_FS_METADIR, XFS_FSOP_GEOM_SICK_METADIR },
{ XFS_SICK_FS_METAPATH, XFS_FSOP_GEOM_SICK_METAPATH },
- { 0, 0 },
};
static const struct ioctl_sick_map rt_map[] = {
- { XFS_SICK_RT_BITMAP, XFS_FSOP_GEOM_SICK_RT_BITMAP },
- { XFS_SICK_RT_SUMMARY, XFS_FSOP_GEOM_SICK_RT_SUMMARY },
- { 0, 0 },
+ { XFS_SICK_RG_BITMAP, XFS_FSOP_GEOM_SICK_RT_BITMAP },
+ { XFS_SICK_RG_SUMMARY, XFS_FSOP_GEOM_SICK_RT_SUMMARY },
};
static inline void
@@ -410,6 +388,7 @@ xfs_fsop_geom_health(
struct xfs_mount *mp,
struct xfs_fsop_geom *geo)
{
+ struct xfs_rtgroup *rtg = NULL;
const struct ioctl_sick_map *m;
unsigned int sick;
unsigned int checked;
@@ -418,12 +397,14 @@ xfs_fsop_geom_health(
geo->checked = 0;
xfs_fs_measure_sickness(mp, &sick, &checked);
- for (m = fs_map; m->sick_mask; m++)
+ for_each_sick_map(fs_map, m)
xfgeo_health_tick(geo, sick, checked, m);
- xfs_rt_measure_sickness(mp, &sick, &checked);
- for (m = rt_map; m->sick_mask; m++)
- xfgeo_health_tick(geo, sick, checked, m);
+ while ((rtg = xfs_rtgroup_next(mp, rtg))) {
+ xfs_group_measure_sickness(rtg_group(rtg), &sick, &checked);
+ for_each_sick_map(rt_map, m)
+ xfgeo_health_tick(geo, sick, checked, m);
+ }
}
static const struct ioctl_sick_map ag_map[] = {
@@ -438,7 +419,6 @@ static const struct ioctl_sick_map ag_map[] = {
{ XFS_SICK_AG_RMAPBT, XFS_AG_GEOM_SICK_RMAPBT },
{ XFS_SICK_AG_REFCNTBT, XFS_AG_GEOM_SICK_REFCNTBT },
{ XFS_SICK_AG_INODES, XFS_AG_GEOM_SICK_INODES },
- { 0, 0 },
};
/* Fill out ag geometry health info. */
@@ -455,7 +435,7 @@ xfs_ag_geom_health(
ageo->ag_checked = 0;
xfs_group_measure_sickness(pag_group(pag), &sick, &checked);
- for (m = ag_map; m->sick_mask; m++) {
+ for_each_sick_map(ag_map, m) {
if (checked & m->sick_mask)
ageo->ag_checked |= m->ioctl_mask;
if (sick & m->sick_mask)
@@ -463,6 +443,34 @@ xfs_ag_geom_health(
}
}
+static const struct ioctl_sick_map rtgroup_map[] = {
+ { XFS_SICK_RG_SUPER, XFS_RTGROUP_GEOM_SICK_SUPER },
+ { XFS_SICK_RG_BITMAP, XFS_RTGROUP_GEOM_SICK_BITMAP },
+ { XFS_SICK_RG_SUMMARY, XFS_RTGROUP_GEOM_SICK_SUMMARY },
+};
+
+/* Fill out rtgroup geometry health info. */
+void
+xfs_rtgroup_geom_health(
+ struct xfs_rtgroup *rtg,
+ struct xfs_rtgroup_geometry *rgeo)
+{
+ const struct ioctl_sick_map *m;
+ unsigned int sick;
+ unsigned int checked;
+
+ rgeo->rg_sick = 0;
+ rgeo->rg_checked = 0;
+
+ xfs_group_measure_sickness(rtg_group(rtg), &sick, &checked);
+ for_each_sick_map(rtgroup_map, m) {
+ if (checked & m->sick_mask)
+ rgeo->rg_checked |= m->ioctl_mask;
+ if (sick & m->sick_mask)
+ rgeo->rg_sick |= m->ioctl_mask;
+ }
+}
+
static const struct ioctl_sick_map ino_map[] = {
{ XFS_SICK_INO_CORE, XFS_BS_SICK_INODE },
{ XFS_SICK_INO_BMBTD, XFS_BS_SICK_BMBTD },
@@ -477,7 +485,6 @@ static const struct ioctl_sick_map ino_map[] = {
{ XFS_SICK_INO_DIR_ZAPPED, XFS_BS_SICK_DIR },
{ XFS_SICK_INO_SYMLINK_ZAPPED, XFS_BS_SICK_SYMLINK },
{ XFS_SICK_INO_DIRTREE, XFS_BS_SICK_DIRTREE },
- { 0, 0 },
};
/* Fill out bulkstat health info. */
@@ -494,7 +501,7 @@ xfs_bulkstat_health(
bs->bs_checked = 0;
xfs_inode_measure_sickness(ip, &sick, &checked);
- for (m = ino_map; m->sick_mask; m++) {
+ for_each_sick_map(ino_map, m) {
if (checked & m->sick_mask)
bs->bs_checked |= m->ioctl_mask;
if (sick & m->sick_mask)
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index f36fd8db388c..64cf51c8a15d 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -40,6 +40,7 @@
#include "xfs_file.h"
#include "xfs_exchrange.h"
#include "xfs_handle.h"
+#include "xfs_rtgroup.h"
#include <linux/mount.h>
#include <linux/fileattr.h>
@@ -403,6 +404,38 @@ xfs_ioc_ag_geometry(
return 0;
}
+STATIC int
+xfs_ioc_rtgroup_geometry(
+ struct xfs_mount *mp,
+ void __user *arg)
+{
+ struct xfs_rtgroup *rtg;
+ struct xfs_rtgroup_geometry rgeo;
+ int error;
+
+ if (copy_from_user(&rgeo, arg, sizeof(rgeo)))
+ return -EFAULT;
+ if (rgeo.rg_flags)
+ return -EINVAL;
+ if (memchr_inv(&rgeo.rg_reserved, 0, sizeof(rgeo.rg_reserved)))
+ return -EINVAL;
+ if (!xfs_has_rtgroups(mp))
+ return -EINVAL;
+
+ rtg = xfs_rtgroup_get(mp, rgeo.rg_number);
+ if (!rtg)
+ return -EINVAL;
+
+ error = xfs_rtgroup_get_geometry(rtg, &rgeo);
+ xfs_rtgroup_put(rtg);
+ if (error)
+ return error;
+
+ if (copy_to_user(arg, &rgeo, sizeof(rgeo)))
+ return -EFAULT;
+ return 0;
+}
+
/*
* Linux extended inode flags interface.
*/
@@ -1028,7 +1061,7 @@ xfs_ioc_setlabel(
* buffered reads from userspace (i.e. from blkid) are invalidated,
* and userspace will see the newly-written label.
*/
- error = xfs_sync_sb_buf(mp);
+ error = xfs_sync_sb_buf(mp, true);
if (error)
goto out;
/*
@@ -1039,6 +1072,8 @@ xfs_ioc_setlabel(
mutex_unlock(&mp->m_growlock);
invalidate_bdev(mp->m_ddev_targp->bt_bdev);
+ if (xfs_has_rtsb(mp) && mp->m_rtdev_targp)
+ invalidate_bdev(mp->m_rtdev_targp->bt_bdev);
out:
mnt_drop_write_file(filp);
@@ -1223,6 +1258,8 @@ xfs_file_ioctl(
case XFS_IOC_AG_GEOMETRY:
return xfs_ioc_ag_geometry(mp, arg);
+ case XFS_IOC_RTGROUP_GEOMETRY:
+ return xfs_ioc_rtgroup_geometry(mp, arg);
case XFS_IOC_GETVERSION:
return put_user(inode->i_generation, (int __user *)arg);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index ba6584a6f101..f31855b6bf2c 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -24,6 +24,7 @@
#include "xfs_iomap.h"
#include "xfs_trace.h"
#include "xfs_quota.h"
+#include "xfs_rtgroup.h"
#include "xfs_dquot_item.h"
#include "xfs_dquot.h"
#include "xfs_reflink.h"
@@ -115,7 +116,9 @@ xfs_bmbt_to_iomap(
iomap->addr = IOMAP_NULL_ADDR;
iomap->type = IOMAP_DELALLOC;
} else {
- iomap->addr = BBTOB(xfs_fsb_to_db(ip, imap->br_startblock));
+ xfs_daddr_t daddr = xfs_fsb_to_db(ip, imap->br_startblock);
+
+ iomap->addr = BBTOB(daddr);
if (mapping_flags & IOMAP_DAX)
iomap->addr += target->bt_dax_part_off;
@@ -124,6 +127,14 @@ xfs_bmbt_to_iomap(
else
iomap->type = IOMAP_MAPPED;
+ /*
+ * Mark iomaps starting at the first sector of a RTG as merge
+ * boundary so that each I/O completions is contained to a
+ * single RTG.
+ */
+ if (XFS_IS_REALTIME_INODE(ip) && xfs_has_rtgroups(mp) &&
+ xfs_rtbno_is_group_start(mp, imap->br_startblock))
+ iomap->flags |= IOMAP_F_BOUNDARY;
}
iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 55e412a82148..0af3d477197b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1818,6 +1818,8 @@ static const struct xlog_recover_item_ops *xlog_recover_item_ops[] = {
&xlog_attrd_item_ops,
&xlog_xmi_item_ops,
&xlog_xmd_item_ops,
+ &xlog_rtefi_item_ops,
+ &xlog_rtefd_item_ops,
};
static const struct xlog_recover_item_ops *
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index b3554893f388..ee1c3eb53d9f 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -119,6 +119,7 @@ typedef struct xfs_mount {
struct super_block *m_super;
struct xfs_ail *m_ail; /* fs active log item list */
struct xfs_buf *m_sb_bp; /* buffer for superblock */
+ struct xfs_buf *m_rtsb_bp; /* realtime superblock */
char *m_rtname; /* realtime device name */
char *m_logname; /* external log device name */
struct xfs_da_geometry *m_dir_geo; /* directory block geometry */
@@ -146,10 +147,11 @@ typedef struct xfs_mount {
uint8_t m_agno_log; /* log #ag's */
uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
int8_t m_rtxblklog; /* log2 of rextsize, if possible */
- int8_t m_rgblklog; /* log2 of rt group sz if possible */
+
uint m_blockmask; /* sb_blocksize-1 */
uint m_blockwsize; /* sb_blocksize in words */
- uint m_blockwmask; /* blockwsize-1 */
+ /* number of rt extents per rt bitmap block if rtgroups enabled */
+ unsigned int m_rtx_per_rbmblock;
uint m_alloc_mxr[2]; /* max alloc btree records */
uint m_alloc_mnr[2]; /* min alloc btree records */
uint m_bmap_dmxr[2]; /* max bmap btree records */
@@ -175,14 +177,12 @@ typedef struct xfs_mount {
int m_logbsize; /* size of each log buffer */
unsigned int m_rsumlevels; /* rt summary levels */
xfs_filblks_t m_rsumblocks; /* size of rt summary, FSBs */
- uint32_t m_rgblocks; /* size of rtgroup in rtblocks */
int m_fixedfsid[2]; /* unchanged for life of FS */
uint m_qflags; /* quota status flags */
uint64_t m_features; /* active filesystem features */
uint64_t m_low_space[XFS_LOWSP_MAX];
uint64_t m_low_rtexts[XFS_LOWSP_MAX];
uint64_t m_rtxblkmask; /* rt extent block mask */
- uint64_t m_rgblkmask; /* rt group block mask */
struct xfs_ino_geometry m_ino_geo; /* inode geometry */
struct xfs_trans_resv m_resv; /* precomputed res values */
/* low free space thresholds */
@@ -253,6 +253,7 @@ typedef struct xfs_mount {
#endif
xfs_agnumber_t m_agfrotor; /* last ag where space found */
atomic_t m_agirotor; /* last ag dir inode alloced */
+ atomic_t m_rtgrotor; /* last rtgroup rtpicked */
/* Memory shrinker to throttle and reprioritize inodegc */
struct shrinker *m_inodegc_shrinker;
@@ -387,12 +388,14 @@ __XFS_HAS_FEAT(metadir, METADIR)
static inline bool xfs_has_rtgroups(struct xfs_mount *mp)
{
- return false;
+ /* all metadir file systems also allow rtgroups */
+ return xfs_has_metadir(mp);
}
static inline bool xfs_has_rtsb(struct xfs_mount *mp)
{
- return false;
+ /* all rtgroups filesystems with an rt section have an rtsb */
+ return xfs_has_rtgroups(mp) && xfs_has_realtime(mp);
}
/*
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index d9fb5e9c0aaf..7ecea7623a15 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -29,6 +29,7 @@
#include "xfs_metafile.h"
#include "xfs_rtgroup.h"
#include "xfs_error.h"
+#include "xfs_trace.h"
/*
* Return whether there are any free extents in the size range given
@@ -745,12 +746,16 @@ xfs_growfs_rt_alloc_fake_mount(
nmp = kmemdup(mp, sizeof(*mp), GFP_KERNEL);
if (!nmp)
return NULL;
- nmp->m_sb.sb_rextsize = rextsize;
- xfs_mount_sb_set_rextsize(nmp, &nmp->m_sb);
+ xfs_mount_sb_set_rextsize(nmp, &nmp->m_sb, rextsize);
nmp->m_sb.sb_rblocks = rblocks;
nmp->m_sb.sb_rextents = xfs_blen_to_rtbxlen(nmp, nmp->m_sb.sb_rblocks);
nmp->m_sb.sb_rbmblocks = xfs_rtbitmap_blockcount(nmp);
nmp->m_sb.sb_rextslog = xfs_compute_rextslog(nmp->m_sb.sb_rextents);
+ if (xfs_has_rtgroups(nmp))
+ nmp->m_sb.sb_rgcount = howmany_64(nmp->m_sb.sb_rextents,
+ nmp->m_sb.sb_rgextents);
+ else
+ nmp->m_sb.sb_rgcount = 1;
nmp->m_rsumblocks = xfs_rtsummary_blockcount(nmp, &nmp->m_rsumlevels);
if (rblocks > 0)
@@ -761,6 +766,88 @@ xfs_growfs_rt_alloc_fake_mount(
return nmp;
}
+/* Free all the new space and return the number of extents actually freed. */
+static int
+xfs_growfs_rt_free_new(
+ struct xfs_rtgroup *rtg,
+ struct xfs_rtalloc_args *nargs,
+ xfs_rtbxlen_t *freed_rtx)
+{
+ struct xfs_mount *mp = rtg_mount(rtg);
+ xfs_rgnumber_t rgno = rtg_rgno(rtg);
+ xfs_rtxnum_t start_rtx = 0, end_rtx;
+
+ if (rgno < mp->m_sb.sb_rgcount)
+ start_rtx = xfs_rtgroup_extents(mp, rgno);
+ end_rtx = xfs_rtgroup_extents(nargs->mp, rgno);
+
+ /*
+ * Compute the first new extent that we want to free, being careful to
+ * skip past a realtime superblock at the start of the realtime volume.
+ */
+ if (xfs_has_rtsb(nargs->mp) && rgno == 0 && start_rtx == 0)
+ start_rtx++;
+ *freed_rtx = end_rtx - start_rtx;
+ return xfs_rtfree_range(nargs, start_rtx, *freed_rtx);
+}
+
+static xfs_rfsblock_t
+xfs_growfs_rt_nrblocks(
+ struct xfs_rtgroup *rtg,
+ xfs_rfsblock_t nrblocks,
+ xfs_agblock_t rextsize,
+ xfs_fileoff_t bmbno)
+{
+ struct xfs_mount *mp = rtg_mount(rtg);
+ xfs_rfsblock_t step;
+
+ step = (bmbno + 1) * mp->m_rtx_per_rbmblock * rextsize;
+ if (xfs_has_rtgroups(mp)) {
+ xfs_rfsblock_t rgblocks = mp->m_sb.sb_rgextents * rextsize;
+
+ step = min(rgblocks, step) + rgblocks * rtg_rgno(rtg);
+ }
+
+ return min(nrblocks, step);
+}
+
+/*
+ * If the post-grow filesystem will have an rtsb; we're initializing the first
+ * rtgroup; and the filesystem didn't have a realtime section, write the rtsb
+ * now, and attach the rtsb buffer to the real mount.
+ */
+static int
+xfs_growfs_rt_init_rtsb(
+ const struct xfs_rtalloc_args *nargs,
+ const struct xfs_rtgroup *rtg,
+ const struct xfs_rtalloc_args *args)
+{
+ struct xfs_mount *mp = args->mp;
+ struct xfs_buf *rtsb_bp;
+ int error;
+
+ if (!xfs_has_rtsb(nargs->mp))
+ return 0;
+ if (rtg_rgno(rtg) > 0)
+ return 0;
+ if (mp->m_sb.sb_rblocks)
+ return 0;
+
+ error = xfs_buf_get_uncached(mp->m_rtdev_targp, XFS_FSB_TO_BB(mp, 1),
+ 0, &rtsb_bp);
+ if (error)
+ return error;
+
+ rtsb_bp->b_maps[0].bm_bn = XFS_RTSB_DADDR;
+ rtsb_bp->b_ops = &xfs_rtsb_buf_ops;
+
+ xfs_update_rtsb(rtsb_bp, mp->m_sb_bp);
+ mp->m_rtsb_bp = rtsb_bp;
+ error = xfs_bwrite(rtsb_bp);
+ xfs_buf_unlock(rtsb_bp);
+ return error;
+}
+
static int
xfs_growfs_rt_bmblock(
struct xfs_rtgroup *rtg,
@@ -779,20 +866,21 @@ xfs_growfs_rt_bmblock(
.rtg = rtg,
};
struct xfs_mount *nmp;
- xfs_rfsblock_t nrblocks_step;
xfs_rtbxlen_t freed_rtx;
int error;
/*
- * Calculate new sb and mount fields for this round.
+ * Calculate new sb and mount fields for this round. Also ensure the
+ * rtg_extents value is uptodate as the rtbitmap code relies on it.
*/
- nrblocks_step = (bmbno + 1) * NBBY * mp->m_sb.sb_blocksize * rextsize;
nmp = nargs.mp = xfs_growfs_rt_alloc_fake_mount(mp,
- min(nrblocks, nrblocks_step), rextsize);
+ xfs_growfs_rt_nrblocks(rtg, nrblocks, rextsize, bmbno),
+ rextsize);
if (!nmp)
return -ENOMEM;
- rtg->rtg_extents = xfs_rtgroup_extents(nmp, rtg_rgno(rtg));
+ xfs_rtgroup_calc_geometry(nmp, rtg, rtg_rgno(rtg),
+ nmp->m_sb.sb_rgcount, nmp->m_sb.sb_rextents);
/*
* Recompute the growfsrt reservation from the new rsumsize, so that the
@@ -837,6 +925,10 @@ xfs_growfs_rt_bmblock(
goto out_cancel;
}
+ error = xfs_growfs_rt_init_rtsb(&nargs, rtg, &args);
+ if (error)
+ goto out_cancel;
+
/*
* Update superblock fields.
*/
@@ -855,12 +947,14 @@ xfs_growfs_rt_bmblock(
if (nmp->m_sb.sb_rextslog != mp->m_sb.sb_rextslog)
xfs_trans_mod_sb(args.tp, XFS_TRANS_SB_REXTSLOG,
nmp->m_sb.sb_rextslog - mp->m_sb.sb_rextslog);
+ if (nmp->m_sb.sb_rgcount != mp->m_sb.sb_rgcount)
+ xfs_trans_mod_sb(args.tp, XFS_TRANS_SB_RGCOUNT,
+ nmp->m_sb.sb_rgcount - mp->m_sb.sb_rgcount);
/*
* Free the new extent.
*/
- freed_rtx = nmp->m_sb.sb_rextents - mp->m_sb.sb_rextents;
- error = xfs_rtfree_range(&nargs, mp->m_sb.sb_rextents, freed_rtx);
+ error = xfs_growfs_rt_free_new(rtg, &nargs, &freed_rtx);
xfs_rtbuf_cache_relse(&nargs);
if (error)
goto out_cancel;
@@ -875,7 +969,6 @@ xfs_growfs_rt_bmblock(
*/
mp->m_rsumlevels = nmp->m_rsumlevels;
mp->m_rsumblocks = nmp->m_rsumblocks;
- xfs_mount_sb_set_rextsize(mp, &mp->m_sb);
/*
* Recompute the growfsrt reservation from the new rsumsize.
@@ -901,6 +994,15 @@ out_free:
return error;
}
+static xfs_rtxnum_t
+xfs_last_rtgroup_extents(
+ struct xfs_mount *mp)
+{
+ return mp->m_sb.sb_rextents -
+ ((xfs_rtxnum_t)(mp->m_sb.sb_rgcount - 1) *
+ mp->m_sb.sb_rgextents);
+}
+
/*
* Calculate the last rbmblock currently used.
*
@@ -911,11 +1013,20 @@ xfs_last_rt_bmblock(
struct xfs_rtgroup *rtg)
{
struct xfs_mount *mp = rtg_mount(rtg);
- xfs_fileoff_t bmbno = mp->m_sb.sb_rbmblocks;
+ xfs_rgnumber_t rgno = rtg_rgno(rtg);
+ xfs_fileoff_t bmbno = 0;
+
+ ASSERT(!mp->m_sb.sb_rgcount || rgno >= mp->m_sb.sb_rgcount - 1);
+
+ if (mp->m_sb.sb_rgcount && rgno == mp->m_sb.sb_rgcount - 1) {
+ xfs_rtxnum_t nrext = xfs_last_rtgroup_extents(mp);
+
+ /* Also fill up the previous block if not entirely full. */
+ bmbno = xfs_rtbitmap_blockcount_len(mp, nrext);
+ if (xfs_rtx_to_rbmword(mp, nrext) != 0)
+ bmbno--;
+ }
- /* Skip the current block if it is exactly full. */
- if (xfs_rtx_to_rbmword(mp, mp->m_sb.sb_rextents) != 0)
- bmbno--;
return bmbno;
}
@@ -932,38 +1043,56 @@ xfs_growfs_rt_alloc_blocks(
struct xfs_mount *mp = rtg_mount(rtg);
struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP];
struct xfs_inode *rsumip = rtg->rtg_inodes[XFS_RTGI_SUMMARY];
- xfs_extlen_t orbmblocks;
- xfs_extlen_t orsumblocks;
- xfs_extlen_t nrsumblocks;
+ xfs_extlen_t orbmblocks = 0;
+ xfs_extlen_t orsumblocks = 0;
struct xfs_mount *nmp;
- int error;
-
- /*
- * Get the old block counts for bitmap and summary inodes.
- * These can't change since other growfs callers are locked out.
- */
- orbmblocks = XFS_B_TO_FSB(mp, rbmip->i_disk_size);
- orsumblocks = XFS_B_TO_FSB(mp, rsumip->i_disk_size);
+ int error = 0;
nmp = xfs_growfs_rt_alloc_fake_mount(mp, nrblocks, rextsize);
if (!nmp)
return -ENOMEM;
-
*nrbmblocks = nmp->m_sb.sb_rbmblocks;
- nrsumblocks = nmp->m_rsumblocks;
- kfree(nmp);
+
+ if (xfs_has_rtgroups(mp)) {
+ /*
+ * For file systems with the rtgroups feature, the RT bitmap and
+ * summary are always fully allocated, which means that we never
+ * need to grow the existing files.
+ *
+ * But we have to be careful to only fill the bitmap until the
+ * end of the actually used range.
+ */
+ if (rtg_rgno(rtg) == nmp->m_sb.sb_rgcount - 1)
+ *nrbmblocks = xfs_rtbitmap_blockcount_len(nmp,
+ xfs_last_rtgroup_extents(nmp));
+
+ if (mp->m_sb.sb_rgcount &&
+ rtg_rgno(rtg) == mp->m_sb.sb_rgcount - 1)
+ goto out_free;
+ } else {
+ /*
+ * Get the old block counts for bitmap and summary inodes.
+ * These can't change since other growfs callers are locked out.
+ */
+ orbmblocks = XFS_B_TO_FSB(mp, rbmip->i_disk_size);
+ orsumblocks = XFS_B_TO_FSB(mp, rsumip->i_disk_size);
+ }
error = xfs_rtfile_initialize_blocks(rtg, XFS_RTGI_BITMAP, orbmblocks,
- *nrbmblocks, NULL);
+ nmp->m_sb.sb_rbmblocks, NULL);
if (error)
- return error;
- return xfs_rtfile_initialize_blocks(rtg, XFS_RTGI_SUMMARY, orsumblocks,
- nrsumblocks, NULL);
+ goto out_free;
+ error = xfs_rtfile_initialize_blocks(rtg, XFS_RTGI_SUMMARY, orsumblocks,
+ nmp->m_rsumblocks, NULL);
+out_free:
+ kfree(nmp);
+ return error;
}
static int
xfs_growfs_rtg(
struct xfs_mount *mp,
+ xfs_rgnumber_t rgno,
xfs_rfsblock_t nrblocks,
xfs_agblock_t rextsize)
{
@@ -974,7 +1103,7 @@ xfs_growfs_rtg(
unsigned int i;
int error;
- rtg = xfs_rtgroup_grab(mp, 0);
+ rtg = xfs_rtgroup_grab(mp, rgno);
if (!rtg)
return -EINVAL;
@@ -1010,7 +1139,8 @@ out_error:
/*
* Reset rtg_extents to the old value if adding more blocks failed.
*/
- rtg->rtg_extents = xfs_rtgroup_extents(rtg_mount(rtg), rtg_rgno(rtg));
+ xfs_rtgroup_calc_geometry(mp, rtg, rtg_rgno(rtg), mp->m_sb.sb_rgcount,
+ mp->m_sb.sb_rextents);
if (old_rsum_cache) {
kvfree(rtg->rtg_rsum_cache);
rtg->rtg_rsum_cache = old_rsum_cache;
@@ -1046,13 +1176,66 @@ xfs_growfs_check_rtgeom(
}
/*
+ * Compute the new number of rt groups and ensure that /rtgroups exists.
+ *
+ * Changing the rtgroup size is not allowed (even if the rt volume hasn't yet
+ * been initialized) because the userspace ABI doesn't support it.
+ */
+static int
+xfs_growfs_rt_prep_groups(
+ struct xfs_mount *mp,
+ xfs_rfsblock_t rblocks,
+ xfs_extlen_t rextsize,
+ xfs_rgnumber_t *new_rgcount)
+{
+ int error;
+
+ *new_rgcount = howmany_64(rblocks, mp->m_sb.sb_rgextents * rextsize);
+ if (*new_rgcount > XFS_MAX_RGNUMBER)
+ return -EINVAL;
+
+ /* Make sure the /rtgroups dir has been created */
+ if (!mp->m_rtdirip) {
+ struct xfs_trans *tp;
+
+ error = xfs_trans_alloc_empty(mp, &tp);
+ if (error)
+ return error;
+ error = xfs_rtginode_load_parent(tp);
+ xfs_trans_cancel(tp);
+
+ if (error == -ENOENT)
+ error = xfs_rtginode_mkdir_parent(mp);
+ if (error)
+ return error;
+ }
+
+ return 0;
+}
+
+static bool
+xfs_grow_last_rtg(
+ struct xfs_mount *mp)
+{
+ if (!xfs_has_rtgroups(mp))
+ return true;
+ if (mp->m_sb.sb_rgcount == 0)
+ return false;
+ return xfs_rtgroup_extents(mp, mp->m_sb.sb_rgcount - 1) <=
+ mp->m_sb.sb_rgextents;
+}
+
+/*
* Grow the realtime area of the filesystem.
*/
int
xfs_growfs_rt(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_growfs_rt_t *in) /* growfs rt input struct */
+ struct xfs_mount *mp,
+ struct xfs_growfs_rt *in)
{
+ xfs_rgnumber_t old_rgcount = mp->m_sb.sb_rgcount;
+ xfs_rgnumber_t new_rgcount = 1;
+ xfs_rgnumber_t rgno;
struct xfs_buf *bp;
xfs_agblock_t old_rextsize = mp->m_sb.sb_rextsize;
int error;
@@ -1110,24 +1293,112 @@ xfs_growfs_rt(
if (error)
goto out_unlock;
- error = xfs_growfs_rtg(mp, in->newblocks, in->extsize);
- if (error)
- goto out_unlock;
+ if (xfs_has_rtgroups(mp)) {
+ error = xfs_growfs_rt_prep_groups(mp, in->newblocks,
+ in->extsize, &new_rgcount);
+ if (error)
+ goto out_unlock;
+ }
- if (old_rextsize != in->extsize) {
- error = xfs_growfs_rt_fixup_extsize(mp);
+ if (xfs_grow_last_rtg(mp)) {
+ error = xfs_growfs_rtg(mp, old_rgcount - 1, in->newblocks,
+ in->extsize);
if (error)
goto out_unlock;
}
- /* Update secondary superblocks now the physical grow has completed */
- error = xfs_update_secondary_sbs(mp);
+ for (rgno = old_rgcount; rgno < new_rgcount; rgno++) {
+ xfs_rtbxlen_t rextents = div_u64(in->newblocks, in->extsize);
+
+ error = xfs_rtgroup_alloc(mp, rgno, new_rgcount, rextents);
+ if (error)
+ goto out_unlock;
+
+ error = xfs_growfs_rtg(mp, rgno, in->newblocks, in->extsize);
+ if (error) {
+ struct xfs_rtgroup *rtg;
+
+ rtg = xfs_rtgroup_grab(mp, rgno);
+ if (!WARN_ON_ONCE(!rtg)) {
+ xfs_rtunmount_rtg(rtg);
+ xfs_rtgroup_rele(rtg);
+ xfs_rtgroup_free(mp, rgno);
+ }
+ break;
+ }
+ }
+
+ if (!error && old_rextsize != in->extsize)
+ error = xfs_growfs_rt_fixup_extsize(mp);
+
+ /*
+ * Update secondary superblocks now the physical grow has completed.
+ *
+ * Also do this in case of an error as we might have already
+ * successfully updated one or more RTGs and incremented sb_rgcount.
+ */
+ if (!xfs_is_shutdown(mp)) {
+ int error2 = xfs_update_secondary_sbs(mp);
+
+ if (!error)
+ error = error2;
+ }
out_unlock:
mutex_unlock(&mp->m_growlock);
return error;
}
+/* Read the realtime superblock and attach it to the mount. */
+int
+xfs_rtmount_readsb(
+ struct xfs_mount *mp)
+{
+ struct xfs_buf *bp;
+ int error;
+
+ if (!xfs_has_rtsb(mp))
+ return 0;
+ if (mp->m_sb.sb_rblocks == 0)
+ return 0;
+ if (mp->m_rtdev_targp == NULL) {
+ xfs_warn(mp,
+ "Filesystem has a realtime volume, use rtdev=device option");
+ return -ENODEV;
+ }
+
+ /* m_blkbb_log is not set up yet */
+ error = xfs_buf_read_uncached(mp->m_rtdev_targp, XFS_RTSB_DADDR,
+ mp->m_sb.sb_blocksize >> BBSHIFT, XBF_NO_IOACCT, &bp,
+ &xfs_rtsb_buf_ops);
+ if (error) {
+ xfs_warn(mp, "rt sb validate failed with error %d.", error);
+ /* bad CRC means corrupted metadata */
+ if (error == -EFSBADCRC)
+ error = -EFSCORRUPTED;
+ return error;
+ }
+
+ mp->m_rtsb_bp = bp;
+ xfs_buf_unlock(bp);
+ return 0;
+}
+
+/* Detach the realtime superblock from the mount and free it. */
+void
+xfs_rtmount_freesb(
+ struct xfs_mount *mp)
+{
+ struct xfs_buf *bp = mp->m_rtsb_bp;
+
+ if (!bp)
+ return;
+
+ xfs_buf_lock(bp);
+ mp->m_rtsb_bp = NULL;
+ xfs_buf_relse(bp);
+}
+
/*
* Initialize realtime fields in the mount structure.
*/
@@ -1251,8 +1522,6 @@ xfs_rtmount_rtg(
{
int error, i;
- rtg->rtg_extents = xfs_rtgroup_extents(mp, rtg_rgno(rtg));
-
for (i = 0; i < XFS_RTGI_MAX; i++) {
error = xfs_rtginode_load(rtg, i, tp);
if (error)
@@ -1391,9 +1660,118 @@ xfs_rtalloc_align_minmax(
*raminlen = newminlen;
}
+/* Given a free extent, find any part of it that isn't busy, if possible. */
+STATIC bool
+xfs_rtalloc_check_busy(
+ struct xfs_rtalloc_args *args,
+ xfs_rtxnum_t start,
+ xfs_rtxlen_t minlen_rtx,
+ xfs_rtxlen_t maxlen_rtx,
+ xfs_rtxlen_t len_rtx,
+ xfs_rtxlen_t prod,
+ xfs_rtxnum_t rtx,
+ xfs_rtxlen_t *reslen,
+ xfs_rtxnum_t *resrtx,
+ unsigned *busy_gen)
+{
+ struct xfs_rtgroup *rtg = args->rtg;
+ struct xfs_mount *mp = rtg_mount(rtg);
+ xfs_agblock_t rgbno = xfs_rtx_to_rgbno(rtg, rtx);
+ xfs_rgblock_t min_rgbno = xfs_rtx_to_rgbno(rtg, start);
+ xfs_extlen_t minlen = xfs_rtxlen_to_extlen(mp, minlen_rtx);
+ xfs_extlen_t len = xfs_rtxlen_to_extlen(mp, len_rtx);
+ xfs_extlen_t diff;
+ bool busy;
+
+ busy = xfs_extent_busy_trim(rtg_group(rtg), minlen,
+ xfs_rtxlen_to_extlen(mp, maxlen_rtx), &rgbno, &len,
+ busy_gen);
+
+ /*
+ * If we have a largish extent that happens to start before min_rgbno,
+ * see if we can shift it into range...
+ */
+ if (rgbno < min_rgbno && rgbno + len > min_rgbno) {
+ diff = min_rgbno - rgbno;
+ if (len > diff) {
+ rgbno += diff;
+ len -= diff;
+ }
+ }
+
+ if (prod > 1 && len >= minlen) {
+ xfs_rgblock_t aligned_rgbno = roundup(rgbno, prod);
+
+ diff = aligned_rgbno - rgbno;
+
+ *resrtx = xfs_rgbno_to_rtx(mp, aligned_rgbno);
+ *reslen = xfs_extlen_to_rtxlen(mp,
+ diff >= len ? 0 : len - diff);
+ } else {
+ *resrtx = xfs_rgbno_to_rtx(mp, rgbno);
+ *reslen = xfs_extlen_to_rtxlen(mp, len);
+ }
+
+ return busy;
+}
+
+/*
+ * Adjust the given free extent so that it isn't busy, or flush the log and
+ * wait for the space to become unbusy. Only needed for rtgroups.
+ */
+STATIC int
+xfs_rtallocate_adjust_for_busy(
+ struct xfs_rtalloc_args *args,
+ xfs_rtxnum_t start,
+ xfs_rtxlen_t minlen,
+ xfs_rtxlen_t maxlen,
+ xfs_rtxlen_t *len,
+ xfs_rtxlen_t prod,
+ xfs_rtxnum_t *rtx)
+{
+ xfs_rtxnum_t resrtx;
+ xfs_rtxlen_t reslen;
+ unsigned busy_gen;
+ bool busy;
+ int error;
+
+again:
+ busy = xfs_rtalloc_check_busy(args, start, minlen, maxlen, *len, prod,
+ *rtx, &reslen, &resrtx, &busy_gen);
+ if (!busy)
+ return 0;
+
+ if (reslen < minlen || (start != 0 && resrtx != *rtx)) {
+ /*
+ * Enough of the extent was busy that we cannot satisfy the
+ * allocation, or this is a near allocation and the start of
+ * the extent is busy. Flush the log and wait for the busy
+ * situation to resolve.
+ */
+ trace_xfs_rtalloc_extent_busy(args->rtg, start, minlen, maxlen,
+ *len, prod, *rtx, busy_gen);
+
+ error = xfs_extent_busy_flush(args->tp, rtg_group(args->rtg),
+ busy_gen, 0);
+ if (error)
+ return error;
+
+ goto again;
+ }
+
+ /* Some of the free space wasn't busy, hand that back to the caller. */
+ trace_xfs_rtalloc_extent_busy_trim(args->rtg, *rtx, *len, resrtx,
+ reslen);
+ *len = reslen;
+ *rtx = resrtx;
+
+ return 0;
+}
+
static int
-xfs_rtallocate(
+xfs_rtallocate_rtg(
struct xfs_trans *tp,
+ xfs_rgnumber_t rgno,
xfs_rtblock_t bno_hint,
xfs_rtxlen_t minlen,
xfs_rtxlen_t maxlen,
@@ -1413,16 +1791,33 @@ xfs_rtallocate(
xfs_rtxlen_t len = 0;
int error = 0;
- args.rtg = xfs_rtgroup_grab(args.mp, 0);
+ args.rtg = xfs_rtgroup_grab(args.mp, rgno);
if (!args.rtg)
return -ENOSPC;
/*
- * Lock out modifications to both the RT bitmap and summary inodes.
+ * We need to lock out modifications to both the RT bitmap and summary
+ * inodes for finding free space in xfs_rtallocate_extent_{near,size}
+ * and join the bitmap and summary inodes for the actual allocation
+ * down in xfs_rtallocate_range.
+ *
+ * For RTG-enabled file system we don't want to join the inodes to the
+ * transaction until we are committed to allocate to allocate from this
+ * RTG so that only one inode of each type is locked at a time.
+ *
+ * But for pre-RTG file systems we need to already to join the bitmap
+ * inode to the transaction for xfs_rtpick_extent, which bumps the
+ * sequence number in it, so we'll have to join the inode to the
+ * transaction early here.
+ *
+ * This is all a bit messy, but at least the mess is contained in
+ * this function.
*/
if (!*rtlocked) {
xfs_rtgroup_lock(args.rtg, XFS_RTGLOCK_BITMAP);
- xfs_rtgroup_trans_join(tp, args.rtg, XFS_RTGLOCK_BITMAP);
+ if (!xfs_has_rtgroups(args.mp))
+ xfs_rtgroup_trans_join(tp, args.rtg,
+ XFS_RTGLOCK_BITMAP);
*rtlocked = true;
}
@@ -1432,7 +1827,7 @@ xfs_rtallocate(
*/
if (bno_hint)
start = xfs_rtb_to_rtx(args.mp, bno_hint);
- else if (initial_user_data)
+ else if (!xfs_has_rtgroups(args.mp) && initial_user_data)
start = xfs_rtpick_extent(args.rtg, tp, maxlen);
if (start) {
@@ -1453,8 +1848,20 @@ xfs_rtallocate(
prod, &rtx);
}
- if (error)
+ if (error) {
+ if (xfs_has_rtgroups(args.mp))
+ goto out_unlock;
goto out_release;
+ }
+
+ if (xfs_has_rtgroups(args.mp)) {
+ error = xfs_rtallocate_adjust_for_busy(&args, start, minlen,
+ maxlen, &len, prod, &rtx);
+ if (error)
+ goto out_unlock;
+
+ xfs_rtgroup_trans_join(tp, args.rtg, XFS_RTGLOCK_BITMAP);
+ }
error = xfs_rtallocate_range(&args, rtx, len);
if (error)
@@ -1470,6 +1877,57 @@ out_release:
xfs_rtgroup_rele(args.rtg);
xfs_rtbuf_cache_relse(&args);
return error;
+out_unlock:
+ xfs_rtgroup_unlock(args.rtg, XFS_RTGLOCK_BITMAP);
+ *rtlocked = false;
+ goto out_release;
+}
+
+static int
+xfs_rtallocate_rtgs(
+ struct xfs_trans *tp,
+ xfs_fsblock_t bno_hint,
+ xfs_rtxlen_t minlen,
+ xfs_rtxlen_t maxlen,
+ xfs_rtxlen_t prod,
+ bool wasdel,
+ bool initial_user_data,
+ xfs_rtblock_t *bno,
+ xfs_extlen_t *blen)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ xfs_rgnumber_t start_rgno, rgno;
+ int error;
+
+ /*
+ * For now this just blindly iterates over the RTGs for an initial
+ * allocation. We could try to keep an in-memory rtg_longest member
+ * to avoid the locking when just looking for big enough free space,
+ * but for now this keeps things simple.
+ */
+ if (bno_hint != NULLFSBLOCK)
+ start_rgno = xfs_rtb_to_rgno(mp, bno_hint);
+ else
+ start_rgno = (atomic_inc_return(&mp->m_rtgrotor) - 1) %
+ mp->m_sb.sb_rgcount;
+
+ rgno = start_rgno;
+ do {
+ bool rtlocked = false;
+
+ error = xfs_rtallocate_rtg(tp, rgno, bno_hint, minlen, maxlen,
+ prod, wasdel, initial_user_data, &rtlocked,
+ bno, blen);
+ if (error != -ENOSPC)
+ return error;
+ ASSERT(!rtlocked);
+
+ if (++rgno == mp->m_sb.sb_rgcount)
+ rgno = 0;
+ bno_hint = NULLFSBLOCK;
+ } while (rgno != start_rgno);
+
+ return -ENOSPC;
}
static int
@@ -1566,9 +2024,16 @@ retry:
if (xfs_bmap_adjacent(ap))
bno_hint = ap->blkno;
- error = xfs_rtallocate(ap->tp, bno_hint, raminlen, ralen, prod,
- ap->wasdel, initial_user_data, &rtlocked,
- &ap->blkno, &ap->length);
+ if (xfs_has_rtgroups(ap->ip->i_mount)) {
+ error = xfs_rtallocate_rtgs(ap->tp, bno_hint, raminlen, ralen,
+ prod, ap->wasdel, initial_user_data,
+ &ap->blkno, &ap->length);
+ } else {
+ error = xfs_rtallocate_rtg(ap->tp, 0, bno_hint, raminlen, ralen,
+ prod, ap->wasdel, initial_user_data,
+ &rtlocked, &ap->blkno, &ap->length);
+ }
+
if (error == -ENOSPC) {
if (!noalign) {
/*
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index a6836da9bebe..8e2a07b8174b 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -12,6 +12,10 @@ struct xfs_mount;
struct xfs_trans;
#ifdef CONFIG_XFS_RT
+/* rtgroup superblock initialization */
+int xfs_rtmount_readsb(struct xfs_mount *mp);
+void xfs_rtmount_freesb(struct xfs_mount *mp);
+
/*
* Initialize realtime fields in the mount structure.
*/
@@ -42,6 +46,8 @@ int xfs_rtalloc_reinit_frextents(struct xfs_mount *mp);
#else
# define xfs_growfs_rt(mp,in) (-ENOSYS)
# define xfs_rtalloc_reinit_frextents(m) (0)
+# define xfs_rtmount_readsb(mp) (0)
+# define xfs_rtmount_freesb(mp) ((void)0)
static inline int /* error */
xfs_rtmount_init(
xfs_mount_t *mp) /* file system mount structure */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 9ae352dfdd6c..3afeab684468 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -45,6 +45,7 @@
#include "xfs_rtbitmap.h"
#include "xfs_exchmaps_item.h"
#include "xfs_parent.h"
+#include "xfs_rtalloc.h"
#include "scrub/stats.h"
#include "scrub/rcbag_btree.h"
@@ -1145,6 +1146,7 @@ xfs_fs_put_super(
xfs_filestream_unmount(mp);
xfs_unmountfs(mp);
+ xfs_rtmount_freesb(mp);
xfs_freesb(mp);
xchk_mount_stats_free(mp);
free_percpu(mp->m_stats.xs_stats);
@@ -1690,10 +1692,14 @@ xfs_fs_fill_super(
goto out_free_sb;
}
- error = xfs_filestream_mount(mp);
+ error = xfs_rtmount_readsb(mp);
if (error)
goto out_free_sb;
+ error = xfs_filestream_mount(mp);
+ if (error)
+ goto out_free_rtsb;
+
/*
* we must configure the block size in the superblock before we run the
* full mount process as the mount process can lookup and cache inodes.
@@ -1781,6 +1787,8 @@ xfs_fs_fill_super(
out_filestream_unmount:
xfs_filestream_unmount(mp);
+ out_free_rtsb:
+ xfs_rtmount_freesb(mp);
out_free_sb:
xfs_freesb(mp);
out_free_scrub_stats:
@@ -1800,7 +1808,7 @@ xfs_fs_fill_super(
out_unmount:
xfs_filestream_unmount(mp);
xfs_unmountfs(mp);
- goto out_free_sb;
+ goto out_free_rtsb;
}
static int
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index caa02caac2cd..7b16cdd72e9d 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -97,6 +97,7 @@ struct xfs_extent_free_item;
struct xfs_rmap_intent;
struct xfs_refcount_intent;
struct xfs_metadir_update;
+struct xfs_rtgroup;
#define XFS_ATTR_FILTER_FLAGS \
{ XFS_ATTR_ROOT, "ROOT" }, \
@@ -1748,6 +1749,80 @@ TRACE_EVENT(xfs_extent_busy_trim,
__entry->tlen)
);
+#ifdef CONFIG_XFS_RT
+TRACE_EVENT(xfs_rtalloc_extent_busy,
+ TP_PROTO(struct xfs_rtgroup *rtg, xfs_rtxnum_t start,
+ xfs_rtxlen_t minlen, xfs_rtxlen_t maxlen,
+ xfs_rtxlen_t len, xfs_rtxlen_t prod, xfs_rtxnum_t rtx,
+ unsigned busy_gen),
+ TP_ARGS(rtg, start, minlen, maxlen, len, prod, rtx, busy_gen),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_rgnumber_t, rgno)
+ __field(xfs_rtxnum_t, start)
+ __field(xfs_rtxlen_t, minlen)
+ __field(xfs_rtxlen_t, maxlen)
+ __field(xfs_rtxlen_t, mod)
+ __field(xfs_rtxlen_t, prod)
+ __field(xfs_rtxlen_t, len)
+ __field(xfs_rtxnum_t, rtx)
+ __field(unsigned, busy_gen)
+ ),
+ TP_fast_assign(
+ __entry->dev = rtg_mount(rtg)->m_super->s_dev;
+ __entry->rgno = rtg_rgno(rtg);
+ __entry->start = start;
+ __entry->minlen = minlen;
+ __entry->maxlen = maxlen;
+ __entry->prod = prod;
+ __entry->len = len;
+ __entry->rtx = rtx;
+ __entry->busy_gen = busy_gen;
+ ),
+ TP_printk("dev %d:%d rgno 0x%x startrtx 0x%llx minlen %u maxlen %u "
+ "prod %u len %u rtx 0%llx busy_gen 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->rgno,
+ __entry->start,
+ __entry->minlen,
+ __entry->maxlen,
+ __entry->prod,
+ __entry->len,
+ __entry->rtx,
+ __entry->busy_gen)
+)
+
+TRACE_EVENT(xfs_rtalloc_extent_busy_trim,
+ TP_PROTO(struct xfs_rtgroup *rtg, xfs_rtxnum_t old_rtx,
+ xfs_rtxlen_t old_len, xfs_rtxnum_t new_rtx,
+ xfs_rtxlen_t new_len),
+ TP_ARGS(rtg, old_rtx, old_len, new_rtx, new_len),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_rgnumber_t, rgno)
+ __field(xfs_rtxnum_t, old_rtx)
+ __field(xfs_rtxnum_t, new_rtx)
+ __field(xfs_rtxlen_t, old_len)
+ __field(xfs_rtxlen_t, new_len)
+ ),
+ TP_fast_assign(
+ __entry->dev = rtg_mount(rtg)->m_super->s_dev;
+ __entry->rgno = rtg_rgno(rtg);
+ __entry->old_rtx = old_rtx;
+ __entry->old_len = old_len;
+ __entry->new_rtx = new_rtx;
+ __entry->new_len = new_len;
+ ),
+ TP_printk("dev %d:%d rgno 0x%x rtx 0x%llx rtxcount 0x%x -> rtx 0x%llx rtxcount 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->rgno,
+ __entry->old_rtx,
+ __entry->old_len,
+ __entry->new_rtx,
+ __entry->new_len)
+);
+#endif /* CONFIG_XFS_RT */
+
DECLARE_EVENT_CLASS(xfs_agf_class,
TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
unsigned long caller_ip),
@@ -2479,23 +2554,26 @@ DEFINE_LOG_RECOVER_ICREATE_ITEM(xfs_log_recover_icreate_cancel);
DEFINE_LOG_RECOVER_ICREATE_ITEM(xfs_log_recover_icreate_recover);
DECLARE_EVENT_CLASS(xfs_discard_class,
- TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t agbno,
+ TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno,
xfs_extlen_t len),
- TP_ARGS(pag, agbno, len),
+ TP_ARGS(xg, agbno, len),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(enum xfs_group_type, type)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
),
TP_fast_assign(
- __entry->dev = pag_mount(pag)->m_super->s_dev;
- __entry->agno = pag_agno(pag);
+ __entry->dev = xg->xg_mount->m_super->s_dev;
+ __entry->type = xg->xg_type;
+ __entry->agno = xg->xg_gno;
__entry->agbno = agbno;
__entry->len = len;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x",
+ TP_printk("dev %d:%d %sno 0x%x gbno 0x%x fsbcount 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agno,
__entry->agbno,
__entry->len)
@@ -2503,9 +2581,9 @@ DECLARE_EVENT_CLASS(xfs_discard_class,
#define DEFINE_DISCARD_EVENT(name) \
DEFINE_EVENT(xfs_discard_class, name, \
- TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t agbno, \
- xfs_extlen_t len), \
- TP_ARGS(pag, agbno, len))
+ TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno, \
+ xfs_extlen_t len), \
+ TP_ARGS(xg, agbno, len))
DEFINE_DISCARD_EVENT(xfs_discard_extent);
DEFINE_DISCARD_EVENT(xfs_discard_toosmall);
DEFINE_DISCARD_EVENT(xfs_discard_exclude);
@@ -2765,6 +2843,7 @@ DECLARE_EVENT_CLASS(xfs_free_extent_deferred_class,
TP_ARGS(mp, free),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(enum xfs_group_type, type)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
@@ -2772,13 +2851,16 @@ DECLARE_EVENT_CLASS(xfs_free_extent_deferred_class,
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
- __entry->agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock);
- __entry->agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock);
+ __entry->type = free->xefi_group->xg_type;
+ __entry->agno = free->xefi_group->xg_gno;
+ __entry->agbno = xfs_fsb_to_gbno(mp, free->xefi_startblock,
+ free->xefi_group->xg_type);
__entry->len = free->xefi_blockcount;
__entry->flags = free->xefi_flags;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x flags 0x%x",
+ TP_printk("dev %d:%d %sno 0x%x gbno 0x%x fsbcount 0x%x flags 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agno,
__entry->agbno,
__entry->len,
@@ -2788,7 +2870,6 @@ DECLARE_EVENT_CLASS(xfs_free_extent_deferred_class,
DEFINE_EVENT(xfs_free_extent_deferred_class, name, \
TP_PROTO(struct xfs_mount *mp, struct xfs_extent_free_item *free), \
TP_ARGS(mp, free))
-DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_agfl_free_defer);
DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_agfl_free_deferred);
DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_defer);
DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_deferred);
@@ -3085,11 +3166,10 @@ DECLARE_EVENT_CLASS(xfs_bmap_deferred_class,
TP_ARGS(bi),
TP_STRUCT__entry(
__field(dev_t, dev)
- __field(dev_t, opdev)
+ __field(enum xfs_group_type, type)
__field(xfs_agnumber_t, agno)
__field(xfs_ino_t, ino)
- __field(xfs_agblock_t, agbno)
- __field(xfs_fsblock_t, rtbno)
+ __field(unsigned long long, gbno)
__field(int, whichfork)
__field(xfs_fileoff_t, l_loff)
__field(xfs_filblks_t, l_len)
@@ -3098,20 +3178,25 @@ DECLARE_EVENT_CLASS(xfs_bmap_deferred_class,
),
TP_fast_assign(
struct xfs_inode *ip = bi->bi_owner;
+ struct xfs_mount *mp = ip->i_mount;
- __entry->dev = ip->i_mount->m_super->s_dev;
- if (xfs_ifork_is_realtime(ip, bi->bi_whichfork)) {
- __entry->agno = 0;
- __entry->agbno = 0;
- __entry->rtbno = bi->bi_bmap.br_startblock;
- __entry->opdev = ip->i_mount->m_rtdev_targp->bt_dev;
+ __entry->dev = mp->m_super->s_dev;
+ __entry->type = bi->bi_group->xg_type;
+ __entry->agno = bi->bi_group->xg_gno;
+ if (bi->bi_group->xg_type == XG_TYPE_RTG &&
+ !xfs_has_rtgroups(mp)) {
+ /*
+ * Legacy rt filesystems do not have allocation groups
+ * ondisk. We emulate this incore with one gigantic
+ * rtgroup whose size can exceed a 32-bit block number.
+ * For this tracepoint, we report group 0 and a 64-bit
+ * group block number.
+ */
+ __entry->gbno = bi->bi_bmap.br_startblock;
} else {
- __entry->agno = XFS_FSB_TO_AGNO(ip->i_mount,
- bi->bi_bmap.br_startblock);
- __entry->agbno = XFS_FSB_TO_AGBNO(ip->i_mount,
- bi->bi_bmap.br_startblock);
- __entry->rtbno = 0;
- __entry->opdev = __entry->dev;
+ __entry->gbno = xfs_fsb_to_gbno(mp,
+ bi->bi_bmap.br_startblock,
+ bi->bi_group->xg_type);
}
__entry->ino = ip->i_ino;
__entry->whichfork = bi->bi_whichfork;
@@ -3120,14 +3205,13 @@ DECLARE_EVENT_CLASS(xfs_bmap_deferred_class,
__entry->l_state = bi->bi_bmap.br_state;
__entry->op = bi->bi_type;
),
- TP_printk("dev %d:%d op %s opdev %d:%d ino 0x%llx agno 0x%x agbno 0x%x rtbno 0x%llx %s fileoff 0x%llx fsbcount 0x%llx state %d",
+ TP_printk("dev %d:%d op %s ino 0x%llx %sno 0x%x gbno 0x%llx %s fileoff 0x%llx fsbcount 0x%llx state %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__print_symbolic(__entry->op, XFS_BMAP_INTENT_STRINGS),
- MAJOR(__entry->opdev), MINOR(__entry->opdev),
__entry->ino,
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agno,
- __entry->agbno,
- __entry->rtbno,
+ __entry->gbno,
__print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS),
__entry->l_loff,
__entry->l_len,
@@ -4259,10 +4343,6 @@ DEFINE_FS_CORRUPT_EVENT(xfs_fs_mark_sick);
DEFINE_FS_CORRUPT_EVENT(xfs_fs_mark_corrupt);
DEFINE_FS_CORRUPT_EVENT(xfs_fs_mark_healthy);
DEFINE_FS_CORRUPT_EVENT(xfs_fs_unfixed_corruption);
-DEFINE_FS_CORRUPT_EVENT(xfs_rt_mark_sick);
-DEFINE_FS_CORRUPT_EVENT(xfs_rt_mark_corrupt);
-DEFINE_FS_CORRUPT_EVENT(xfs_rt_mark_healthy);
-DEFINE_FS_CORRUPT_EVENT(xfs_rt_unfixed_corruption);
DECLARE_EVENT_CLASS(xfs_group_corrupt_class,
TP_PROTO(const struct xfs_group *xg, unsigned int flags),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index cee7f0564409..4db022c189e1 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -25,6 +25,8 @@
#include "xfs_dquot.h"
#include "xfs_icache.h"
#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
+#include "xfs_sb.h"
struct kmem_cache *xfs_trans_cache;
@@ -420,6 +422,8 @@ xfs_trans_mod_sb(
ASSERT(tp->t_rtx_res_used <= tp->t_rtx_res);
}
tp->t_frextents_delta += delta;
+ if (xfs_has_rtgroups(mp))
+ flags &= ~XFS_TRANS_SB_DIRTY;
break;
case XFS_TRANS_SB_RES_FREXTENTS:
/*
@@ -429,6 +433,8 @@ xfs_trans_mod_sb(
*/
ASSERT(delta < 0);
tp->t_res_frextents_delta += delta;
+ if (xfs_has_rtgroups(mp))
+ flags &= ~XFS_TRANS_SB_DIRTY;
break;
case XFS_TRANS_SB_DBLOCKS:
tp->t_dblocks_delta += delta;
@@ -455,6 +461,10 @@ xfs_trans_mod_sb(
case XFS_TRANS_SB_REXTSLOG:
tp->t_rextslog_delta += delta;
break;
+ case XFS_TRANS_SB_RGCOUNT:
+ ASSERT(delta > 0);
+ tp->t_rgcount_delta += delta;
+ break;
default:
ASSERT(0);
return;
@@ -497,20 +507,22 @@ xfs_trans_apply_sb_deltas(
}
/*
- * Updating frextents requires careful handling because it does not
- * behave like the lazysb counters because we cannot rely on log
- * recovery in older kenels to recompute the value from the rtbitmap.
- * This means that the ondisk frextents must be consistent with the
- * rtbitmap.
+ * sb_frextents was added to the lazy sb counters when the rt groups
+ * feature was introduced. This is possible because we know that all
+ * kernels supporting rtgroups will also recompute frextents from the
+ * realtime bitmap.
+ *
+ * For older file systems, updating frextents requires careful handling
+ * because we cannot rely on log recovery in older kernels to recompute
+ * the value from the rtbitmap. This means that the ondisk frextents
+ * must be consistent with the rtbitmap.
*
* Therefore, log the frextents change to the ondisk superblock and
* update the incore superblock so that future calls to xfs_log_sb
* write the correct value ondisk.
- *
- * Don't touch m_frextents because it includes incore reservations,
- * and those are handled by the unreserve function.
*/
- if (tp->t_frextents_delta || tp->t_res_frextents_delta) {
+ if ((tp->t_frextents_delta || tp->t_res_frextents_delta) &&
+ !xfs_has_rtgroups(tp->t_mountp)) {
struct xfs_mount *mp = tp->t_mountp;
int64_t rtxdelta;
@@ -536,6 +548,18 @@ xfs_trans_apply_sb_deltas(
}
if (tp->t_rextsize_delta) {
be32_add_cpu(&sbp->sb_rextsize, tp->t_rextsize_delta);
+
+ /*
+ * Because the ondisk sb records rtgroup size in units of rt
+ * extents, any time we update the rt extent size we have to
+ * recompute the ondisk rtgroup block log. The incore values
+ * will be recomputed in xfs_trans_unreserve_and_mod_sb.
+ */
+ if (xfs_has_rtgroups(tp->t_mountp)) {
+ sbp->sb_rgblklog = xfs_compute_rgblklog(
+ be32_to_cpu(sbp->sb_rgextents),
+ be32_to_cpu(sbp->sb_rextsize));
+ }
whole = 1;
}
if (tp->t_rbmblocks_delta) {
@@ -554,6 +578,10 @@ xfs_trans_apply_sb_deltas(
sbp->sb_rextslog += tp->t_rextslog_delta;
whole = 1;
}
+ if (tp->t_rgcount_delta) {
+ be32_add_cpu(&sbp->sb_rgcount, tp->t_rgcount_delta);
+ whole = 1;
+ }
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
if (whole)
@@ -618,7 +646,7 @@ xfs_trans_unreserve_and_mod_sb(
}
ASSERT(tp->t_rtx_res || tp->t_frextents_delta >= 0);
- if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
+ if (xfs_has_rtgroups(mp) || (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
rtxdelta += tp->t_frextents_delta;
ASSERT(rtxdelta >= 0);
}
@@ -651,23 +679,21 @@ xfs_trans_unreserve_and_mod_sb(
mp->m_sb.sb_icount += idelta;
mp->m_sb.sb_ifree += ifreedelta;
/*
- * Do not touch sb_frextents here because we are dealing with incore
- * reservation. sb_frextents is not part of the lazy sb counters so it
- * must be consistent with the ondisk rtbitmap and must never include
- * incore reservations.
+ * Do not touch sb_frextents here because it is handled in
+ * xfs_trans_apply_sb_deltas for file systems where it isn't a lazy
+ * counter anyway.
*/
mp->m_sb.sb_dblocks += tp->t_dblocks_delta;
mp->m_sb.sb_agcount += tp->t_agcount_delta;
mp->m_sb.sb_imax_pct += tp->t_imaxpct_delta;
- mp->m_sb.sb_rextsize += tp->t_rextsize_delta;
- if (tp->t_rextsize_delta) {
- mp->m_rtxblklog = log2_if_power2(mp->m_sb.sb_rextsize);
- mp->m_rtxblkmask = mask64_if_power2(mp->m_sb.sb_rextsize);
- }
+ if (tp->t_rextsize_delta)
+ xfs_mount_sb_set_rextsize(mp, &mp->m_sb,
+ mp->m_sb.sb_rextsize + tp->t_rextsize_delta);
mp->m_sb.sb_rbmblocks += tp->t_rbmblocks_delta;
mp->m_sb.sb_rblocks += tp->t_rblocks_delta;
mp->m_sb.sb_rextents += tp->t_rextents_delta;
mp->m_sb.sb_rextslog += tp->t_rextslog_delta;
+ mp->m_sb.sb_rgcount += tp->t_rgcount_delta;
spin_unlock(&mp->m_sb_lock);
/*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index f06cc0f41665..71c2e82e4dad 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -148,6 +148,7 @@ typedef struct xfs_trans {
int64_t t_rblocks_delta;/* superblock rblocks change */
int64_t t_rextents_delta;/* superblocks rextents chg */
int64_t t_rextslog_delta;/* superblocks rextslog chg */
+ int64_t t_rgcount_delta; /* realtime group count */
struct list_head t_items; /* log item descriptors */
struct list_head t_busy; /* list of busy extents */
struct list_head t_dfops; /* deferred operations */
@@ -214,6 +215,7 @@ xfs_trans_read_buf(
}
struct xfs_buf *xfs_trans_getsb(struct xfs_trans *);
+struct xfs_buf *xfs_trans_getrtsb(struct xfs_trans *tp);
void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index e28ab74af4f0..8e886ecfd69a 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -168,12 +168,11 @@ xfs_trans_get_buf_map(
/*
* Get and lock the superblock buffer for the given transaction.
*/
-struct xfs_buf *
-xfs_trans_getsb(
- struct xfs_trans *tp)
+static struct xfs_buf *
+__xfs_trans_getsb(
+ struct xfs_trans *tp,
+ struct xfs_buf *bp)
{
- struct xfs_buf *bp = tp->t_mountp->m_sb_bp;
-
/*
* Just increment the lock recursion count if the buffer is already
* attached to this transaction.
@@ -197,6 +196,22 @@ xfs_trans_getsb(
return bp;
}
+struct xfs_buf *
+xfs_trans_getsb(
+ struct xfs_trans *tp)
+{
+ return __xfs_trans_getsb(tp, tp->t_mountp->m_sb_bp);
+}
+
+struct xfs_buf *
+xfs_trans_getrtsb(
+ struct xfs_trans *tp)
+{
+ if (!tp->t_mountp->m_rtsb_bp)
+ return NULL;
+ return __xfs_trans_getsb(tp, tp->t_mountp->m_rtsb_bp);
+}
+
/*
* Get and lock the buffer for the caller if it is not already
* locked within the given transaction. If it has not yet been