summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-04 12:44:02 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-04 12:44:02 -0700
commit547c43d777968228b1060b6f1b152b96215eb7b2 (patch)
tree3e256530397ec1e751d06ed23230bfe1daf4886c
parent2e08edc5c50a01dc52c005fd939c24476eaf55ef (diff)
parentdc1baa715bbfbb1902da942d06497e79b40e7bc7 (diff)
Merge tag 'xfs-4.17-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Darrick Wong: "Here's the first round of fixes for XFS for 4.17. The biggest new features this time around are the addition of lazytime support, further enhancement of the on-disk inode metadata verifiers, and a patch to smooth over some of the AGFL padding problems that have intermittently plagued users since 4.5. I forsee sending a second pull request next week with further bug fixes and speedups in the online scrub code and elsewhere. This series has been run through a full xfstests run over the weekend and through a quick xfstests run against this morning's master, with no major failures reported. Summary of changes for this release: - Various cleanups and code fixes - Implement lazytime as a mount option - Convert various on-disk metadata checks from asserts to -EFSCORRUPTED - Fix accounting problems with the rmap per-ag reservations - Refactorings and cleanups for xfs_log_force - Various bugfixes for the reflink code - Work around v5 AGFL padding problems to prevent fs shutdowns - Establish inode fork verifiers to inspect on-disk metadata correctness - Various online scrub fixes - Fix v5 swapext blowing up on deleted inodes" * tag 'xfs-4.17-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (49 commits) xfs: do not log/recover swapext extent owner changes for deleted inodes xfs: clean up xfs_mount allocation and dynamic initializers xfs: remove dead inode version setting code xfs: catch inode allocation state mismatch corruption xfs: xfs_scrub_iallocbt_xref_rmap_inodes should use xref_set_corrupt xfs: flag inode corruption if parent ptr doesn't get us a real inode xfs: don't accept inode buffers with suspicious unlinked chains xfs: move inode extent size hint validation to libxfs xfs: record inode buf errors as a xref error in inobt scrubber xfs: remove xfs_buf parameter from inode scrub methods xfs: inode scrubber shouldn't bother with raw checks xfs: bmap scrubber should do rmap xref with bmap for sparse files xfs: refactor inode buffer verifier error logging xfs: refactor inode verifier error logging xfs: refactor bmap record validation xfs: sanity-check the unused space before trying to use it xfs: detect agfl count corruption and reset agfl xfs: unwind the try_again loop in xfs_log_force xfs: refactor xfs_log_force_lsn xfs: minor cleanup for xfs_reflink_end_cow ...
-rw-r--r--fs/inode.c12
-rw-r--r--fs/sync.c6
-rw-r--r--fs/xfs/kmem.c6
-rw-r--r--fs/xfs/kmem.h8
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c39
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.h31
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c139
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h2
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c8
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c51
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h3
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c4
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.h14
-rw-r--r--fs/xfs/libxfs/xfs_btree.c125
-rw-r--r--fs/xfs/libxfs/xfs_btree.h19
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c59
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c78
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c13
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c16
-rw-r--r--fs/xfs/libxfs/xfs_format.h13
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c9
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c124
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h5
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c27
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c5
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c12
-rw-r--r--fs/xfs/libxfs/xfs_sb.c1
-rw-r--r--fs/xfs/scrub/agheader.c6
-rw-r--r--fs/xfs/scrub/attr.c2
-rw-r--r--fs/xfs/scrub/bmap.c174
-rw-r--r--fs/xfs/scrub/common.c24
-rw-r--r--fs/xfs/scrub/common.h13
-rw-r--r--fs/xfs/scrub/dir.c2
-rw-r--r--fs/xfs/scrub/ialloc.c5
-rw-r--r--fs/xfs/scrub/inode.c298
-rw-r--r--fs/xfs/scrub/parent.c12
-rw-r--r--fs/xfs/scrub/quota.c2
-rw-r--r--fs/xfs/scrub/rtbitmap.c3
-rw-r--r--fs/xfs/scrub/trace.h31
-rw-r--r--fs/xfs/xfs_aops.c20
-rw-r--r--fs/xfs/xfs_bmap_util.c44
-rw-r--r--fs/xfs/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_buf_item.c10
-rw-r--r--fs/xfs/xfs_dquot.c6
-rw-r--r--fs/xfs/xfs_dquot_item.c11
-rw-r--r--fs/xfs/xfs_error.c29
-rw-r--r--fs/xfs/xfs_error.h3
-rw-r--r--fs/xfs/xfs_export.c2
-rw-r--r--fs/xfs/xfs_extent_busy.c5
-rw-r--r--fs/xfs/xfs_file.c52
-rw-r--r--fs/xfs/xfs_fsops.c2
-rw-r--r--fs/xfs/xfs_icache.c23
-rw-r--r--fs/xfs/xfs_inode.c11
-rw-r--r--fs/xfs/xfs_inode.h4
-rw-r--r--fs/xfs/xfs_inode_item.c29
-rw-r--r--fs/xfs/xfs_iops.c17
-rw-r--r--fs/xfs/xfs_log.c376
-rw-r--r--fs/xfs/xfs_log.h15
-rw-r--r--fs/xfs/xfs_log_cil.c2
-rw-r--r--fs/xfs/xfs_log_recover.c100
-rw-r--r--fs/xfs/xfs_mount.c4
-rw-r--r--fs/xfs/xfs_mount.h13
-rw-r--r--fs/xfs/xfs_reflink.c25
-rw-r--r--fs/xfs/xfs_super.c71
-rw-r--r--fs/xfs/xfs_trace.h9
-rw-r--r--fs/xfs/xfs_trans.c32
-rw-r--r--fs/xfs/xfs_trans_ail.c152
-rw-r--r--fs/xfs/xfs_trans_buf.c4
-rw-r--r--fs/xfs/xfs_trans_inode.c14
-rw-r--r--fs/xfs/xfs_trans_priv.h42
71 files changed, 1365 insertions, 1167 deletions
diff --git a/fs/inode.c b/fs/inode.c
index ef362364d396..b153aeaa61ea 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -346,9 +346,8 @@ void inc_nlink(struct inode *inode)
}
EXPORT_SYMBOL(inc_nlink);
-void address_space_init_once(struct address_space *mapping)
+static void __address_space_init_once(struct address_space *mapping)
{
- memset(mapping, 0, sizeof(*mapping));
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC | __GFP_ACCOUNT);
spin_lock_init(&mapping->tree_lock);
init_rwsem(&mapping->i_mmap_rwsem);
@@ -356,6 +355,12 @@ void address_space_init_once(struct address_space *mapping)
spin_lock_init(&mapping->private_lock);
mapping->i_mmap = RB_ROOT_CACHED;
}
+
+void address_space_init_once(struct address_space *mapping)
+{
+ memset(mapping, 0, sizeof(*mapping));
+ __address_space_init_once(mapping);
+}
EXPORT_SYMBOL(address_space_init_once);
/*
@@ -371,7 +376,7 @@ void inode_init_once(struct inode *inode)
INIT_LIST_HEAD(&inode->i_io_list);
INIT_LIST_HEAD(&inode->i_wb_list);
INIT_LIST_HEAD(&inode->i_lru);
- address_space_init_once(&inode->i_data);
+ __address_space_init_once(&inode->i_data);
i_size_ordered_init(inode);
}
EXPORT_SYMBOL(inode_init_once);
@@ -1533,7 +1538,6 @@ retry:
if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
atomic_inc(&inode->i_count);
- inode->i_state &= ~I_DIRTY_TIME;
spin_unlock(&inode->i_lock);
trace_writeback_lazytime_iput(inode);
mark_inode_dirty_sync(inode);
diff --git a/fs/sync.c b/fs/sync.c
index 9908a114d506..b54e0541ad89 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -192,12 +192,8 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
if (!file->f_op->fsync)
return -EINVAL;
- if (!datasync && (inode->i_state & I_DIRTY_TIME)) {
- spin_lock(&inode->i_lock);
- inode->i_state &= ~I_DIRTY_TIME;
- spin_unlock(&inode->i_lock);
+ if (!datasync && (inode->i_state & I_DIRTY_TIME))
mark_inode_dirty_sync(inode);
- }
return file->f_op->fsync(file, start, end, datasync);
}
EXPORT_SYMBOL(vfs_fsync_range);
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 393b6849aeb3..7bace03dc9dc 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -46,13 +46,13 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
}
void *
-kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
+kmem_alloc_large(size_t size, xfs_km_flags_t flags)
{
unsigned nofs_flag = 0;
void *ptr;
gfp_t lflags;
- ptr = kmem_zalloc(size, flags | KM_MAYFAIL);
+ ptr = kmem_alloc(size, flags | KM_MAYFAIL);
if (ptr)
return ptr;
@@ -67,7 +67,7 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
nofs_flag = memalloc_nofs_save();
lflags = kmem_flags_convert(flags);
- ptr = __vmalloc(size, lflags | __GFP_ZERO, PAGE_KERNEL);
+ ptr = __vmalloc(size, lflags, PAGE_KERNEL);
if (flags & KM_NOFS)
memalloc_nofs_restore(nofs_flag);
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 4b87472f35bc..6023b594ead7 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -71,7 +71,7 @@ kmem_flags_convert(xfs_km_flags_t flags)
}
extern void *kmem_alloc(size_t, xfs_km_flags_t);
-extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t);
+extern void *kmem_alloc_large(size_t size, xfs_km_flags_t);
extern void *kmem_realloc(const void *, size_t, xfs_km_flags_t);
static inline void kmem_free(const void *ptr)
{
@@ -85,6 +85,12 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags)
return kmem_alloc(size, flags | KM_ZERO);
}
+static inline void *
+kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
+{
+ return kmem_alloc_large(size, flags | KM_ZERO);
+}
+
/*
* Zone interfaces
*/
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index 2291f4224e24..03885a968de8 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -95,13 +95,13 @@ xfs_ag_resv_critical(
switch (type) {
case XFS_AG_RESV_METADATA:
- avail = pag->pagf_freeblks - pag->pag_agfl_resv.ar_reserved;
+ avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved;
orig = pag->pag_meta_resv.ar_asked;
break;
- case XFS_AG_RESV_AGFL:
+ case XFS_AG_RESV_RMAPBT:
avail = pag->pagf_freeblks + pag->pagf_flcount -
pag->pag_meta_resv.ar_reserved;
- orig = pag->pag_agfl_resv.ar_asked;
+ orig = pag->pag_rmapbt_resv.ar_asked;
break;
default:
ASSERT(0);
@@ -126,10 +126,10 @@ xfs_ag_resv_needed(
{
xfs_extlen_t len;
- len = pag->pag_meta_resv.ar_reserved + pag->pag_agfl_resv.ar_reserved;
+ len = pag->pag_meta_resv.ar_reserved + pag->pag_rmapbt_resv.ar_reserved;
switch (type) {
case XFS_AG_RESV_METADATA:
- case XFS_AG_RESV_AGFL:
+ case XFS_AG_RESV_RMAPBT:
len -= xfs_perag_resv(pag, type)->ar_reserved;
break;
case XFS_AG_RESV_NONE:
@@ -160,10 +160,11 @@ __xfs_ag_resv_free(
if (pag->pag_agno == 0)
pag->pag_mount->m_ag_max_usable += resv->ar_asked;
/*
- * AGFL blocks are always considered "free", so whatever
- * was reserved at mount time must be given back at umount.
+ * RMAPBT blocks come from the AGFL and AGFL blocks are always
+ * considered "free", so whatever was reserved at mount time must be
+ * given back at umount.
*/
- if (type == XFS_AG_RESV_AGFL)
+ if (type == XFS_AG_RESV_RMAPBT)
oldresv = resv->ar_orig_reserved;
else
oldresv = resv->ar_reserved;
@@ -185,7 +186,7 @@ xfs_ag_resv_free(
int error;
int err2;
- error = __xfs_ag_resv_free(pag, XFS_AG_RESV_AGFL);
+ error = __xfs_ag_resv_free(pag, XFS_AG_RESV_RMAPBT);
err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA);
if (err2 && !error)
error = err2;
@@ -284,15 +285,15 @@ xfs_ag_resv_init(
}
}
- /* Create the AGFL metadata reservation */
- if (pag->pag_agfl_resv.ar_asked == 0) {
+ /* Create the RMAPBT metadata reservation */
+ if (pag->pag_rmapbt_resv.ar_asked == 0) {
ask = used = 0;
error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used);
if (error)
goto out;
- error = __xfs_ag_resv_init(pag, XFS_AG_RESV_AGFL, ask, used);
+ error = __xfs_ag_resv_init(pag, XFS_AG_RESV_RMAPBT, ask, used);
if (error)
goto out;
}
@@ -304,7 +305,7 @@ xfs_ag_resv_init(
return error;
ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
- xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <=
+ xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <=
pag->pagf_freeblks + pag->pagf_flcount);
#endif
out:
@@ -325,8 +326,10 @@ xfs_ag_resv_alloc_extent(
trace_xfs_ag_resv_alloc_extent(pag, type, args->len);
switch (type) {
- case XFS_AG_RESV_METADATA:
case XFS_AG_RESV_AGFL:
+ return;
+ case XFS_AG_RESV_METADATA:
+ case XFS_AG_RESV_RMAPBT:
resv = xfs_perag_resv(pag, type);
break;
default:
@@ -341,7 +344,7 @@ xfs_ag_resv_alloc_extent(
len = min_t(xfs_extlen_t, args->len, resv->ar_reserved);
resv->ar_reserved -= len;
- if (type == XFS_AG_RESV_AGFL)
+ if (type == XFS_AG_RESV_RMAPBT)
return;
/* Allocations of reserved blocks only need on-disk sb updates... */
xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len);
@@ -365,8 +368,10 @@ xfs_ag_resv_free_extent(
trace_xfs_ag_resv_free_extent(pag, type, len);
switch (type) {
- case XFS_AG_RESV_METADATA:
case XFS_AG_RESV_AGFL:
+ return;
+ case XFS_AG_RESV_METADATA:
+ case XFS_AG_RESV_RMAPBT:
resv = xfs_perag_resv(pag, type);
break;
default:
@@ -379,7 +384,7 @@ xfs_ag_resv_free_extent(
leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved);
resv->ar_reserved += leftover;
- if (type == XFS_AG_RESV_AGFL)
+ if (type == XFS_AG_RESV_RMAPBT)
return;
/* Freeing into the reserved pool only requires on-disk update... */
xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len);
diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h
index 8d6c687deef3..938f2f96c5e8 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.h
+++ b/fs/xfs/libxfs/xfs_ag_resv.h
@@ -32,4 +32,35 @@ void xfs_ag_resv_alloc_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type,
void xfs_ag_resv_free_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type,
struct xfs_trans *tp, xfs_extlen_t len);
+/*
+ * RMAPBT reservation accounting wrappers. Since rmapbt blocks are sourced from
+ * the AGFL, they are allocated one at a time and the reservation updates don't
+ * require a transaction.
+ */
+static inline void
+xfs_ag_resv_rmapbt_alloc(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_alloc_arg args = {0};
+ struct xfs_perag *pag;
+
+ args.len = 1;
+ pag = xfs_perag_get(mp, agno);
+ xfs_ag_resv_alloc_extent(pag, XFS_AG_RESV_RMAPBT, &args);
+ xfs_perag_put(pag);
+}
+
+static inline void
+xfs_ag_resv_rmapbt_free(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ struct xfs_perag *pag;
+
+ pag = xfs_perag_get(mp, agno);
+ xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1);
+ xfs_perag_put(pag);
+}
+
#endif /* __XFS_AG_RESV_H__ */
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index c02781a4c091..39387bdd225d 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -53,6 +53,23 @@ STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
+/*
+ * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
+ * the beginning of the block for a proper header with the location information
+ * and CRC.
+ */
+unsigned int
+xfs_agfl_size(
+ struct xfs_mount *mp)
+{
+ unsigned int size = mp->m_sb.sb_sectsize;
+
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ size -= sizeof(struct xfs_agfl);
+
+ return size / sizeof(xfs_agblock_t);
+}
+
unsigned int
xfs_refc_block(
struct xfs_mount *mp)
@@ -550,7 +567,7 @@ xfs_agfl_verify(
if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
return __this_address;
- for (i = 0; i < XFS_AGFL_SIZE(mp); i++) {
+ for (i = 0; i < xfs_agfl_size(mp); i++) {
if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK &&
be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
return __this_address;
@@ -1564,7 +1581,6 @@ xfs_alloc_ag_vextent_small(
int *stat) /* status: 0-freelist, 1-normal/none */
{
struct xfs_owner_info oinfo;
- struct xfs_perag *pag;
int error;
xfs_agblock_t fbno;
xfs_extlen_t flen;
@@ -1616,18 +1632,13 @@ xfs_alloc_ag_vextent_small(
/*
* If we're feeding an AGFL block to something that
* doesn't live in the free space, we need to clear
- * out the OWN_AG rmap and add the block back to
- * the AGFL per-AG reservation.
+ * out the OWN_AG rmap.
*/
xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
error = xfs_rmap_free(args->tp, args->agbp, args->agno,
fbno, 1, &oinfo);
if (error)
goto error0;
- pag = xfs_perag_get(args->mp, args->agno);
- xfs_ag_resv_free_extent(pag, XFS_AG_RESV_AGFL,
- args->tp, 1);
- xfs_perag_put(pag);
*stat = 0;
return 0;
@@ -1911,14 +1922,12 @@ xfs_free_ag_extent(
XFS_STATS_INC(mp, xs_freex);
XFS_STATS_ADD(mp, xs_freeb, len);
- trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
- haveleft, haveright);
+ trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
return 0;
error0:
- trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
- -1, -1);
+ trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
if (bno_cur)
xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
if (cnt_cur)
@@ -2054,6 +2063,93 @@ xfs_alloc_space_available(
}
/*
+ * Check the agfl fields of the agf for inconsistency or corruption. The purpose
+ * is to detect an agfl header padding mismatch between current and early v5
+ * kernels. This problem manifests as a 1-slot size difference between the
+ * on-disk flcount and the active [first, last] range of a wrapped agfl. This
+ * may also catch variants of agfl count corruption unrelated to padding. Either
+ * way, we'll reset the agfl and warn the user.
+ *
+ * Return true if a reset is required before the agfl can be used, false
+ * otherwise.
+ */
+static bool
+xfs_agfl_needs_reset(
+ struct xfs_mount *mp,
+ struct xfs_agf *agf)
+{
+ uint32_t f = be32_to_cpu(agf->agf_flfirst);
+ uint32_t l = be32_to_cpu(agf->agf_fllast);
+ uint32_t c = be32_to_cpu(agf->agf_flcount);
+ int agfl_size = xfs_agfl_size(mp);
+ int active;
+
+ /* no agfl header on v4 supers */
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return false;
+
+ /*
+ * The agf read verifier catches severe corruption of these fields.
+ * Repeat some sanity checks to cover a packed -> unpacked mismatch if
+ * the verifier allows it.
+ */
+ if (f >= agfl_size || l >= agfl_size)
+ return true;
+ if (c > agfl_size)
+ return true;
+
+ /*
+ * Check consistency between the on-disk count and the active range. An
+ * agfl padding mismatch manifests as an inconsistent flcount.
+ */
+ if (c && l >= f)
+ active = l - f + 1;
+ else if (c)
+ active = agfl_size - f + l + 1;
+ else
+ active = 0;
+
+ return active != c;
+}
+
+/*
+ * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
+ * agfl content cannot be trusted. Warn the user that a repair is required to
+ * recover leaked blocks.
+ *
+ * The purpose of this mechanism is to handle filesystems affected by the agfl
+ * header padding mismatch problem. A reset keeps the filesystem online with a
+ * relatively minor free space accounting inconsistency rather than suffer the
+ * inevitable crash from use of an invalid agfl block.
+ */
+static void
+xfs_agfl_reset(
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ struct xfs_perag *pag)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
+
+ ASSERT(pag->pagf_agflreset);
+ trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
+
+ xfs_warn(mp,
+ "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
+ "Please unmount and run xfs_repair.",
+ pag->pag_agno, pag->pagf_flcount);
+
+ agf->agf_flfirst = 0;
+ agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
+ agf->agf_flcount = 0;
+ xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
+ XFS_AGF_FLCOUNT);
+
+ pag->pagf_flcount = 0;
+ pag->pagf_agflreset = false;
+}
+
+/*
* Decide whether to use this allocation group for this allocation.
* If so, fix up the btree freelist's size.
*/
@@ -2114,6 +2210,10 @@ xfs_alloc_fix_freelist(
}
}
+ /* reset a padding mismatched agfl before final free space check */
+ if (pag->pagf_agflreset)
+ xfs_agfl_reset(tp, agbp, pag);
+
/* If there isn't enough total space or single-extent, reject it. */
need = xfs_alloc_min_freelist(mp, pag);
if (!xfs_alloc_space_available(args, need, flags))
@@ -2266,10 +2366,11 @@ xfs_alloc_get_freelist(
bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
be32_add_cpu(&agf->agf_flfirst, 1);
xfs_trans_brelse(tp, agflbp);
- if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
+ if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
agf->agf_flfirst = 0;
pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
+ ASSERT(!pag->pagf_agflreset);
be32_add_cpu(&agf->agf_flcount, -1);
xfs_trans_agflist_delta(tp, -1);
pag->pagf_flcount--;
@@ -2377,10 +2478,11 @@ xfs_alloc_put_freelist(
be32_to_cpu(agf->agf_seqno), &agflbp)))
return error;
be32_add_cpu(&agf->agf_fllast, 1);
- if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
+ if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
agf->agf_fllast = 0;
pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
+ ASSERT(!pag->pagf_agflreset);
be32_add_cpu(&agf->agf_flcount, 1);
xfs_trans_agflist_delta(tp, 1);
pag->pagf_flcount++;
@@ -2395,7 +2497,7 @@ xfs_alloc_put_freelist(
xfs_alloc_log_agf(tp, agbp, logflags);
- ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
+ ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
@@ -2428,9 +2530,9 @@ xfs_agf_verify(
if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
- be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
- be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
- be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)))
+ be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) &&
+ be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) &&
+ be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
return __this_address;
if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
@@ -2588,6 +2690,7 @@ xfs_alloc_read_agf(
pag->pagb_count = 0;
pag->pagb_tree = RB_ROOT;
pag->pagf_init = 1;
+ pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
}
#ifdef DEBUG
else if (!XFS_FORCED_SHUTDOWN(mp)) {
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 65a0cafe06e4..a311a2414a6b 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -26,6 +26,8 @@ struct xfs_trans;
extern struct workqueue_struct *xfs_alloc_wq;
+unsigned int xfs_agfl_size(struct xfs_mount *mp);
+
/*
* Freespace allocation types. Argument to xfs_alloc_[v]extent.
*/
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 6840b588187e..b451649ba176 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -74,18 +74,13 @@ xfs_allocbt_alloc_block(
int error;
xfs_agblock_t bno;
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-
/* Allocate the new block from the freelist. If we can't, give up. */
error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
&bno, 1);
- if (error) {
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ if (error)
return error;
- }
if (bno == NULLAGBLOCK) {
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
@@ -95,7 +90,6 @@ xfs_allocbt_alloc_block(
xfs_trans_agbtree_delta(cur->bc_tp, 1);
new->s = cpu_to_be32(bno);
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
}
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index daae00ed30c5..3b03d886df66 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1244,8 +1244,9 @@ xfs_iread_extents(
xfs_warn(ip->i_mount,
"corrupt dinode %Lu, (btree extents).",
(unsigned long long) ip->i_ino);
- XFS_CORRUPTION_ERROR(__func__,
- XFS_ERRLEVEL_LOW, ip->i_mount, block);
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED,
+ __func__, block, sizeof(*block),
+ __this_address);
error = -EFSCORRUPTED;
goto out_brelse;
}
@@ -1261,11 +1262,15 @@ xfs_iread_extents(
*/
frp = XFS_BMBT_REC_ADDR(mp, block, 1);
for (j = 0; j < num_recs; j++, frp++, i++) {
+ xfs_failaddr_t fa;
+
xfs_bmbt_disk_get_all(frp, &new);
- if (!xfs_bmbt_validate_extent(mp, whichfork, &new)) {
- XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
- XFS_ERRLEVEL_LOW, mp);
+ fa = xfs_bmap_validate_extent(ip, whichfork, &new);
+ if (fa) {
error = -EFSCORRUPTED;
+ xfs_inode_verifier_error(ip, error,
+ "xfs_iread_extents(2)",
+ frp, sizeof(*frp), fa);
goto out_brelse;
}
xfs_iext_insert(ip, &icur, &new, state);
@@ -6154,3 +6159,39 @@ xfs_bmap_finish_one(
return error;
}
+
+/* Check that an inode's extent does not have invalid flags or bad ranges. */
+xfs_failaddr_t
+xfs_bmap_validate_extent(
+ struct xfs_inode *ip,
+ int whichfork,
+ struct xfs_bmbt_irec *irec)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_fsblock_t endfsb;
+ bool isrt;
+
+ isrt = XFS_IS_REALTIME_INODE(ip);
+ endfsb = irec->br_startblock + irec->br_blockcount - 1;
+ if (isrt) {
+ if (!xfs_verify_rtbno(mp, irec->br_startblock))
+ return __this_address;
+ if (!xfs_verify_rtbno(mp, endfsb))
+ return __this_address;
+ } else {
+ if (!xfs_verify_fsbno(mp, irec->br_startblock))
+ return __this_address;
+ if (!xfs_verify_fsbno(mp, endfsb))
+ return __this_address;
+ if (XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
+ XFS_FSB_TO_AGNO(mp, endfsb))
+ return __this_address;
+ }
+ if (irec->br_state != XFS_EXT_NORM) {
+ if (whichfork != XFS_DATA_FORK)
+ return __this_address;
+ if (!xfs_sb_version_hasextflgbit(&mp->m_sb))
+ return __this_address;
+ }
+ return NULL;
+}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index e36d75799cd5..f3be6416260b 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -274,4 +274,7 @@ static inline int xfs_bmap_fork_to_state(int whichfork)
}
}
+xfs_failaddr_t xfs_bmap_validate_extent(struct xfs_inode *ip, int whichfork,
+ struct xfs_bmbt_irec *irec);
+
#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 9faf479aba49..d89d06bea6e3 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -272,10 +272,10 @@ xfs_bmbt_alloc_block(
cur->bc_private.b.dfops->dop_low = true;
}
if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
+
ASSERT(args.len == 1);
cur->bc_private.b.firstblock = args.fsbno;
cur->bc_private.b.allocated++;
@@ -286,12 +286,10 @@ xfs_bmbt_alloc_block(
new->l = cpu_to_be64(args.fsbno);
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
error0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h
index 135b8c56d23e..e4505746ccaa 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.h
+++ b/fs/xfs/libxfs/xfs_bmap_btree.h
@@ -118,18 +118,4 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,
extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
struct xfs_trans *, struct xfs_inode *, int);
-/*
- * Check that the extent does not contain an invalid unwritten extent flag.
- */
-static inline bool xfs_bmbt_validate_extent(struct xfs_mount *mp, int whichfork,
- struct xfs_bmbt_irec *irec)
-{
- if (irec->br_state == XFS_EXT_NORM)
- return true;
- if (whichfork == XFS_DATA_FORK &&
- xfs_sb_version_hasextflgbit(&mp->m_sb))
- return true;
- return false;
-}
-
#endif /* __XFS_BMAP_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 79ee4a1951d1..edc0193358a5 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -1438,8 +1438,6 @@ xfs_btree_log_keys(
int first,
int last)
{
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
- XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
if (bp) {
xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
@@ -1450,8 +1448,6 @@ xfs_btree_log_keys(
xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
xfs_ilog_fbroot(cur->bc_private.b.whichfork));
}
-
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
}
/*
@@ -1464,15 +1460,12 @@ xfs_btree_log_recs(
int first,
int last)
{
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
- XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
xfs_trans_log_buf(cur->bc_tp, bp,
xfs_btree_rec_offset(cur, first),
xfs_btree_rec_offset(cur, last + 1) - 1);
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
}
/*
@@ -1485,8 +1478,6 @@ xfs_btree_log_ptrs(
int first, /* index of first pointer to log */
int last) /* index of last pointer to log */
{
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
- XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
if (bp) {
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
@@ -1501,7 +1492,6 @@ xfs_btree_log_ptrs(
xfs_ilog_fbroot(cur->bc_private.b.whichfork));
}
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
}
/*
@@ -1543,9 +1533,6 @@ xfs_btree_log_block(
XFS_BTREE_LBLOCK_CRC_LEN
};
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
- XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
-
if (bp) {
int nbits;
@@ -1573,8 +1560,6 @@ xfs_btree_log_block(
xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
xfs_ilog_fbroot(cur->bc_private.b.whichfork));
}
-
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
}
/*
@@ -1593,9 +1578,6 @@ xfs_btree_increment(
int error; /* error return value */
int lev;
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
- XFS_BTREE_TRACE_ARGI(cur, level);
-
ASSERT(level < cur->bc_nlevels);
/* Read-ahead to the right at this level. */
@@ -1671,17 +1653,14 @@ xfs_btree_increment(
cur->bc_ptrs[lev] = 1;
}
out1:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
out0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
error0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
@@ -1701,9 +1680,6 @@ xfs_btree_decrement(
int lev;
union xfs_btree_ptr ptr;
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
- XFS_BTREE_TRACE_ARGI(cur, level);
-
ASSERT(level < cur->bc_nlevels);
/* Read-ahead to the left at this level. */
@@ -1769,17 +1745,14 @@ xfs_btree_decrement(
cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
}
out1:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
out0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
error0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
@@ -1881,9 +1854,6 @@ xfs_btree_lookup(
union xfs_btree_ptr *pp; /* ptr to btree block */
union xfs_btree_ptr ptr; /* ptr to btree block */
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
- XFS_BTREE_TRACE_ARGI(cur, dir);
-
XFS_BTREE_STATS_INC(cur, lookup);
/* No such thing as a zero-level tree. */
@@ -1929,7 +1899,6 @@ xfs_btree_lookup(
ASSERT(level == 0 && cur->bc_nlevels == 1);
cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
@@ -2004,7 +1973,6 @@ xfs_btree_lookup(
if (error)
goto error0;
XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
}
@@ -2019,11 +1987,9 @@ xfs_btree_lookup(
*stat = 1;
else
*stat = 0;
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
return 0;
error0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
@@ -2169,10 +2135,8 @@ __xfs_btree_updkeys(
trace_xfs_btree_updkeys(cur, level, bp);
#ifdef DEBUG
error = xfs_btree_check_block(cur, block, level, bp);
- if (error) {
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ if (error)
return error;
- }
#endif
ptr = cur->bc_ptrs[level];
nlkey = xfs_btree_key_addr(cur, ptr, block);
@@ -2224,9 +2188,6 @@ xfs_btree_update_keys(
if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
return __xfs_btree_updkeys(cur, level, block, bp, false);
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
- XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
-
/*
* Go up the tree from this level toward the root.
* At each level, update the key value to the value input.
@@ -2241,10 +2202,8 @@ xfs_btree_update_keys(
block = xfs_btree_get_block(cur, level, &bp);
#ifdef DEBUG
error = xfs_btree_check_block(cur, block, level, bp);
- if (error) {
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ if (error)
return error;
- }
#endif
ptr = cur->bc_ptrs[level];
kp = xfs_btree_key_addr(cur, ptr, block);
@@ -2252,7 +2211,6 @@ xfs_btree_update_keys(
xfs_btree_log_keys(cur, bp, ptr, ptr);
}
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
return 0;
}
@@ -2272,9 +2230,6 @@ xfs_btree_update(
int ptr;
union xfs_btree_rec *rp;
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
- XFS_BTREE_TRACE_ARGR(cur, rec);
-
/* Pick up the current block. */
block = xfs_btree_get_block(cur, 0, &bp);
@@ -2307,11 +2262,9 @@ xfs_btree_update(
goto error0;
}
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
return 0;
error0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
@@ -2339,9 +2292,6 @@ xfs_btree_lshift(
int error; /* error return value */
int i;
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
- XFS_BTREE_TRACE_ARGI(cur, level);
-
if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
level == cur->bc_nlevels - 1)
goto out0;
@@ -2500,21 +2450,17 @@ xfs_btree_lshift(
/* Slide the cursor value left one. */
cur->bc_ptrs[level]--;
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
out0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
error0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
error1:
- XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
return error;
}
@@ -2541,9 +2487,6 @@ xfs_btree_rshift(
int error; /* error return value */
int i; /* loop counter */
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
- XFS_BTREE_TRACE_ARGI(cur, level);
-
if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
(level == cur->bc_nlevels - 1))
goto out0;
@@ -2676,21 +2619,17 @@ xfs_btree_rshift(
xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
out0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
error0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
error1:
- XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
return error;
}
@@ -2726,9 +2665,6 @@ __xfs_btree_split(
int i;
#endif
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
- XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
-
XFS_BTREE_STATS_INC(cur, split);
/* Set up left block (current one). */
@@ -2878,16 +2814,13 @@ __xfs_btree_split(
(*curp)->bc_ptrs[level + 1]++;
}
*ptrp = rptr;
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
out0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
error0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
@@ -2994,7 +2927,6 @@ xfs_btree_new_iroot(
int i; /* loop counter */
#endif
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_STATS_INC(cur, newroot);
ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
@@ -3008,10 +2940,9 @@ xfs_btree_new_iroot(
error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
if (error)
goto error0;
- if (*stat == 0) {
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ if (*stat == 0)
return 0;
- }
+
XFS_BTREE_STATS_INC(cur, alloc);
/* Copy the root into a real block. */
@@ -3074,10 +3005,8 @@ xfs_btree_new_iroot(
*logflags |=
XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
*stat = 1;
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
return 0;
error0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
@@ -3102,7 +3031,6 @@ xfs_btree_new_root(
union xfs_btree_ptr rptr;
union xfs_btree_ptr lptr;
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_STATS_INC(cur, newroot);
/* initialise our start point from the cursor */
@@ -3202,14 +3130,11 @@ xfs_btree_new_root(
xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
cur->bc_ptrs[cur->bc_nlevels] = nptr;
cur->bc_nlevels++;
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
error0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
out0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
@@ -3230,7 +3155,7 @@ xfs_btree_make_block_unfull(
if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
level == cur->bc_nlevels - 1) {
- struct xfs_inode *ip = cur->bc_private.b.ip;
+ struct xfs_inode *ip = cur->bc_private.b.ip;
if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
/* A root block that can be made bigger. */
@@ -3309,9 +3234,6 @@ xfs_btree_insrec(
#endif
xfs_daddr_t old_bn;
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
- XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec);
-
ncur = NULL;
lkey = &nkey;
@@ -3324,14 +3246,12 @@ xfs_btree_insrec(
error = xfs_btree_new_root(cur, stat);
xfs_btree_set_ptr_null(cur, ptrp);
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
return error;
}
/* If we're off the left edge, return failure. */
ptr = cur->bc_ptrs[level];
if (ptr == 0) {
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
@@ -3489,12 +3409,10 @@ xfs_btree_insrec(
*curp = ncur;
}
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
error0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
@@ -3572,11 +3490,9 @@ xfs_btree_insert(
}
} while (!xfs_btree_ptr_is_null(cur, &nptr));
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = i;
return 0;
error0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
@@ -3611,8 +3527,6 @@ xfs_btree_kill_iroot(
int i;
#endif
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-
ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
ASSERT(cur->bc_nlevels > 1);
@@ -3670,19 +3584,15 @@ xfs_btree_kill_iroot(
#ifdef DEBUG
for (i = 0; i < numrecs; i++) {
error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
- if (error) {
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ if (error)
return error;
- }
}
#endif
xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
error = xfs_btree_free_block(cur, cbp);
- if (error) {
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ if (error)
return error;
- }
cur->bc_bufs[level - 1] = NULL;
be16_add_cpu(&block->bb_level, -1);
@@ -3690,7 +3600,6 @@ xfs_btree_kill_iroot(
XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
cur->bc_nlevels--;
out0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
return 0;
}
@@ -3706,7 +3615,6 @@ xfs_btree_kill_root(
{
int error;
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
XFS_BTREE_STATS_INC(cur, killroot);
/*
@@ -3716,16 +3624,13 @@ xfs_btree_kill_root(
cur->bc_ops->set_root(cur, newroot, -1);
error = xfs_btree_free_block(cur, bp);
- if (error) {
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ if (error)
return error;
- }
cur->bc_bufs[level] = NULL;
cur->bc_ra[level] = 0;
cur->bc_nlevels--;
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
return 0;
}
@@ -3744,7 +3649,6 @@ xfs_btree_dec_cursor(
return error;
}
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
}
@@ -3780,15 +3684,11 @@ xfs_btree_delrec(
struct xfs_btree_cur *tcur; /* temporary btree cursor */
int numrecs; /* temporary numrec count */
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
- XFS_BTREE_TRACE_ARGI(cur, level);
-
tcur = NULL;
/* Get the index of the entry being deleted, check for nothing there. */
ptr = cur->bc_ptrs[level];
if (ptr == 0) {
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
@@ -3805,7 +3705,6 @@ xfs_btree_delrec(
/* Fail if we're off the end of the block. */
if (ptr > numrecs) {
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
@@ -4080,7 +3979,7 @@ xfs_btree_delrec(
tcur = NULL;
if (level == 0)
cur->bc_ptrs[0]++;
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+
*stat = 1;
return 0;
}
@@ -4250,13 +4149,11 @@ xfs_btree_delrec(
* call updkeys directly.
*/
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
/* Return value means the next level up has something to do. */
*stat = 2;
return 0;
error0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
if (tcur)
xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
return error;
@@ -4277,8 +4174,6 @@ xfs_btree_delete(
int i;
bool joined = false;
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-
/*
* Go up the tree, starting at leaf level.
*
@@ -4314,11 +4209,9 @@ xfs_btree_delete(
}
}
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = i;
return 0;
error0:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 50440b5618e8..58e30c0975c3 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -473,25 +473,6 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
#define XFS_FILBLKS_MIN(a,b) min_t(xfs_filblks_t, (a), (b))
#define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b))
-/*
- * Trace hooks. Currently not implemented as they need to be ported
- * over to the generic tracing functionality, which is some effort.
- *
- * i,j = integer (32 bit)
- * b = btree block buffer (xfs_buf_t)
- * p = btree ptr
- * r = btree record
- * k = btree key
- */
-#define XFS_BTREE_TRACE_ARGBI(c, b, i)
-#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)
-#define XFS_BTREE_TRACE_ARGI(c, i)
-#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
-#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
-#define XFS_BTREE_TRACE_ARGIK(c, i, k)
-#define XFS_BTREE_TRACE_ARGR(c, r)
-#define XFS_BTREE_TRACE_CURSOR(c, t)
-
xfs_failaddr_t xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
xfs_failaddr_t xfs_btree_sblock_verify(struct xfs_buf *bp,
unsigned int max_recs);
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index 388d67c5c903..989e95a53db2 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -173,7 +173,7 @@ extern void xfs_dir2_data_log_unused(struct xfs_da_args *args,
extern void xfs_dir2_data_make_free(struct xfs_da_args *args,
struct xfs_buf *bp, xfs_dir2_data_aoff_t offset,
xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
-extern void xfs_dir2_data_use_free(struct xfs_da_args *args,
+extern int xfs_dir2_data_use_free(struct xfs_da_args *args,
struct xfs_buf *bp, struct xfs_dir2_data_unused *dup,
xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,
int *needlogp, int *needscanp);
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 2da86a394bcf..875893ded514 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -451,15 +451,19 @@ xfs_dir2_block_addname(
* No stale entries, will use enddup space to hold new leaf.
*/
if (!btp->stale) {
+ xfs_dir2_data_aoff_t aoff;
+
/*
* Mark the space needed for the new leaf entry, now in use.
*/
- xfs_dir2_data_use_free(args, bp, enddup,
- (xfs_dir2_data_aoff_t)
- ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) -
- sizeof(*blp)),
- (xfs_dir2_data_aoff_t)sizeof(*blp),
- &needlog, &needscan);
+ aoff = (xfs_dir2_data_aoff_t)((char *)enddup - (char *)hdr +
+ be16_to_cpu(enddup->length) - sizeof(*blp));
+ error = xfs_dir2_data_use_free(args, bp, enddup, aoff,
+ (xfs_dir2_data_aoff_t)sizeof(*blp), &needlog,
+ &needscan);
+ if (error)
+ return error;
+
/*
* Update the tail (entry count).
*/
@@ -541,9 +545,11 @@ xfs_dir2_block_addname(
/*
* Mark space for the data entry used.
*/
- xfs_dir2_data_use_free(args, bp, dup,
- (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
- (xfs_dir2_data_aoff_t)len, &needlog, &needscan);
+ error = xfs_dir2_data_use_free(args, bp, dup,
+ (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
+ (xfs_dir2_data_aoff_t)len, &needlog, &needscan);
+ if (error)
+ return error;
/*
* Create the new data entry.
*/
@@ -997,8 +1003,10 @@ xfs_dir2_leaf_to_block(
/*
* Use up the space at the end of the block (blp/btp).
*/
- xfs_dir2_data_use_free(args, dbp, dup, args->geo->blksize - size, size,
- &needlog, &needscan);
+ error = xfs_dir2_data_use_free(args, dbp, dup,
+ args->geo->blksize - size, size, &needlog, &needscan);
+ if (error)
+ return error;
/*
* Initialize the block tail.
*/
@@ -1110,18 +1118,14 @@ xfs_dir2_sf_to_block(
* Add block 0 to the inode.
*/
error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
- if (error) {
- kmem_free(sfp);
- return error;
- }
+ if (error)
+ goto out_free;
/*
* Initialize the data block, then convert it to block format.
*/
error = xfs_dir3_data_init(args, blkno, &bp);
- if (error) {
- kmem_free(sfp);
- return error;
- }
+ if (error)
+ goto out_free;
xfs_dir3_block_init(mp, tp, bp, dp);
hdr = bp->b_addr;
@@ -1136,8 +1140,10 @@ xfs_dir2_sf_to_block(
*/
dup = dp->d_ops->data_unused_p(hdr);
needlog = needscan = 0;
- xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i,
- i, &needlog, &needscan);
+ error = xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i,
+ i, &needlog, &needscan);
+ if (error)
+ goto out_free;
ASSERT(needscan == 0);
/*
* Fill in the tail.
@@ -1150,9 +1156,11 @@ xfs_dir2_sf_to_block(
/*
* Remove the freespace, we'll manage it.
*/
- xfs_dir2_data_use_free(args, bp, dup,
- (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
- be16_to_cpu(dup->length), &needlog, &needscan);
+ error = xfs_dir2_data_use_free(args, bp, dup,
+ (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
+ be16_to_cpu(dup->length), &needlog, &needscan);
+ if (error)
+ goto out_free;
/*
* Create entry for .
*/
@@ -1256,4 +1264,7 @@ xfs_dir2_sf_to_block(
xfs_dir2_block_log_tail(tp, bp);
xfs_dir3_data_check(dp, bp);
return 0;
+out_free:
+ kmem_free(sfp);
+ return error;
}
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index 920279485275..cb67ec730b9b 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -932,10 +932,51 @@ xfs_dir2_data_make_free(
*needscanp = needscan;
}
+/* Check our free data for obvious signs of corruption. */
+static inline xfs_failaddr_t
+xfs_dir2_data_check_free(
+ struct xfs_dir2_data_hdr *hdr,
+ struct xfs_dir2_data_unused *dup,
+ xfs_dir2_data_aoff_t offset,
+ xfs_dir2_data_aoff_t len)
+{
+ if (hdr->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC) &&
+ hdr->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC) &&
+ hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) &&
+ hdr->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC))
+ return __this_address;
+ if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG)
+ return __this_address;
+ if (offset < (char *)dup - (char *)hdr)
+ return __this_address;
+ if (offset + len > (char *)dup + be16_to_cpu(dup->length) - (char *)hdr)
+ return __this_address;
+ if ((char *)dup - (char *)hdr !=
+ be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)))
+ return __this_address;
+ return NULL;
+}
+
+/* Sanity-check a new bestfree entry. */
+static inline xfs_failaddr_t
+xfs_dir2_data_check_new_free(
+ struct xfs_dir2_data_hdr *hdr,
+ struct xfs_dir2_data_free *dfp,
+ struct xfs_dir2_data_unused *newdup)
+{
+ if (dfp == NULL)
+ return __this_address;
+ if (dfp->length != newdup->length)
+ return __this_address;
+ if (be16_to_cpu(dfp->offset) != (char *)newdup - (char *)hdr)
+ return __this_address;
+ return NULL;
+}
+
/*
* Take a byte range out of an existing unused space and make it un-free.
*/
-void
+int
xfs_dir2_data_use_free(
struct xfs_da_args *args,
struct xfs_buf *bp,
@@ -947,23 +988,19 @@ xfs_dir2_data_use_free(
{
xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_data_free_t *dfp; /* bestfree pointer */
+ xfs_dir2_data_unused_t *newdup; /* new unused entry */
+ xfs_dir2_data_unused_t *newdup2; /* another new unused entry */
+ struct xfs_dir2_data_free *bf;
+ xfs_failaddr_t fa;
int matchback; /* matches end of freespace */
int matchfront; /* matches start of freespace */
int needscan; /* need to regen bestfree */
- xfs_dir2_data_unused_t *newdup; /* new unused entry */
- xfs_dir2_data_unused_t *newdup2; /* another new unused entry */
int oldlen; /* old unused entry's length */
- struct xfs_dir2_data_free *bf;
hdr = bp->b_addr;
- ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
- hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
- hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
- hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
- ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
- ASSERT(offset >= (char *)dup - (char *)hdr);
- ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr);
- ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
+ fa = xfs_dir2_data_check_free(hdr, dup, offset, len);
+ if (fa)
+ goto corrupt;
/*
* Look up the entry in the bestfree table.
*/
@@ -1008,9 +1045,9 @@ xfs_dir2_data_use_free(
xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup,
needlogp);
- ASSERT(dfp != NULL);
- ASSERT(dfp->length == newdup->length);
- ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
+ fa = xfs_dir2_data_check_new_free(hdr, dfp, newdup);
+ if (fa)
+ goto corrupt;
/*
* If we got inserted at the last slot,
* that means we don't know if there was a better
@@ -1036,9 +1073,9 @@ xfs_dir2_data_use_free(
xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);
dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup,
needlogp);
- ASSERT(dfp != NULL);
- ASSERT(dfp->length == newdup->length);
- ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
+ fa = xfs_dir2_data_check_new_free(hdr, dfp, newdup);
+ if (fa)
+ goto corrupt;
/*
* If we got inserted at the last slot,
* that means we don't know if there was a better
@@ -1084,6 +1121,11 @@ xfs_dir2_data_use_free(
}
}
*needscanp = needscan;
+ return 0;
+corrupt:
+ xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount,
+ hdr, __FILE__, __LINE__, fa);
+ return -EFSCORRUPTED;
}
/* Find the end of the entry data in a data/block format dir block. */
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index d7e630f41f9c..50fc9c0c5e2b 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -877,9 +877,13 @@ xfs_dir2_leaf_addname(
/*
* Mark the initial part of our freespace in use for the new entry.
*/
- xfs_dir2_data_use_free(args, dbp, dup,
- (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
- &needlog, &needscan);
+ error = xfs_dir2_data_use_free(args, dbp, dup,
+ (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
+ length, &needlog, &needscan);
+ if (error) {
+ xfs_trans_brelse(tp, lbp);
+ return error;
+ }
/*
* Initialize our new entry (at last).
*/
@@ -1415,7 +1419,8 @@ xfs_dir2_leaf_removename(
oldbest = be16_to_cpu(bf[0].length);
ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
bestsp = xfs_dir2_leaf_bests_p(ltp);
- ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
+ if (be16_to_cpu(bestsp[db]) != oldbest)
+ return -EFSCORRUPTED;
/*
* Mark the former data entry unused.
*/
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 239d97a64296..9df096cc3c37 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -387,8 +387,9 @@ xfs_dir2_leaf_to_node(
dp->d_ops->free_hdr_from_disk(&freehdr, free);
leaf = lbp->b_addr;
ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
- ASSERT(be32_to_cpu(ltp->bestcount) <=
- (uint)dp->i_d.di_size / args->geo->blksize);
+ if (be32_to_cpu(ltp->bestcount) >
+ (uint)dp->i_d.di_size / args->geo->blksize)
+ return -EFSCORRUPTED;
/*
* Copy freespace entries from the leaf block to the new block.
@@ -1728,6 +1729,7 @@ xfs_dir2_node_addname_int(
__be16 *bests;
struct xfs_dir3_icfree_hdr freehdr;
struct xfs_dir2_data_free *bf;
+ xfs_dir2_data_aoff_t aoff;
dp = args->dp;
mp = dp->i_mount;
@@ -2022,9 +2024,13 @@ xfs_dir2_node_addname_int(
/*
* Mark the first part of the unused space, inuse for us.
*/
- xfs_dir2_data_use_free(args, dbp, dup,
- (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
- &needlog, &needscan);
+ aoff = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
+ error = xfs_dir2_data_use_free(args, dbp, dup, aoff, length,
+ &needlog, &needscan);
+ if (error) {
+ xfs_trans_brelse(tp, dbp);
+ return error;
+ }
/*
* Fill in the new entry and log it.
*/
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 1acb584fc5f7..42956d8d95ed 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -803,24 +803,13 @@ typedef struct xfs_agi {
&(XFS_BUF_TO_AGFL(bp)->agfl_bno[0]) : \
(__be32 *)(bp)->b_addr)
-/*
- * Size of the AGFL. For CRC-enabled filesystes we steal a couple of
- * slots in the beginning of the block for a proper header with the
- * location information and CRC.
- */
-#define XFS_AGFL_SIZE(mp) \
- (((mp)->m_sb.sb_sectsize - \
- (xfs_sb_version_hascrc(&((mp)->m_sb)) ? \
- sizeof(struct xfs_agfl) : 0)) / \
- sizeof(xfs_agblock_t))
-
typedef struct xfs_agfl {
__be32 agfl_magicnum;
__be32 agfl_seqno;
uuid_t agfl_uuid;
__be64 agfl_lsn;
__be32 agfl_crc;
- __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */
+ __be32 agfl_bno[]; /* actually xfs_agfl_size(mp) */
} __attribute__((packed)) xfs_agfl_t;
#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc)
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index af197a5f3a82..a2dd7f4a2719 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -93,8 +93,6 @@ __xfs_inobt_alloc_block(
int error; /* error return value */
xfs_agblock_t sbno = be32_to_cpu(start->s);
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-
memset(&args, 0, sizeof(args));
args.tp = cur->bc_tp;
args.mp = cur->bc_mp;
@@ -107,17 +105,14 @@ __xfs_inobt_alloc_block(
args.resv = resv;
error = xfs_alloc_vextent(&args);
- if (error) {
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ if (error)
return error;
- }
+
if (args.fsbno == NULLFSBLOCK) {
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
ASSERT(args.len == 1);
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno));
*stat = 1;
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 4fe17b368316..ef68b1de006a 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -93,20 +93,26 @@ xfs_inode_buf_verify(
bool readahead)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
+ xfs_agnumber_t agno;
int i;
int ni;
/*
* Validate the magic number and version of every inode in the buffer
*/
+ agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
for (i = 0; i < ni; i++) {
int di_ok;
xfs_dinode_t *dip;
+ xfs_agino_t unlinked_ino;
dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
+ unlinked_ino = be32_to_cpu(dip->di_next_unlinked);
di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
- xfs_dinode_good_version(mp, dip->di_version);
+ xfs_dinode_good_version(mp, dip->di_version) &&
+ (unlinked_ino == NULLAGINO ||
+ xfs_verify_agino(mp, agno, unlinked_ino));
if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
XFS_ERRTAG_ITOBP_INOTOBP))) {
if (readahead) {
@@ -115,16 +121,18 @@ xfs_inode_buf_verify(
return;
}
- xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
#ifdef DEBUG
xfs_alert(mp,
"bad inode magic/vsn daddr %lld #%d (magic=%x)",
(unsigned long long)bp->b_bn, i,
be16_to_cpu(dip->di_magic));
#endif
+ xfs_buf_verifier_error(bp, -EFSCORRUPTED,
+ __func__, dip, sizeof(*dip),
+ NULL);
+ return;
}
}
- xfs_inobp_check(mp, bp);
}
@@ -564,10 +572,7 @@ xfs_iread(
/* initialise the on-disk inode core */
memset(&ip->i_d, 0, sizeof(ip->i_d));
VFS_I(ip)->i_generation = prandom_u32();
- if (xfs_sb_version_hascrc(&mp->m_sb))
- ip->i_d.di_version = 3;
- else
- ip->i_d.di_version = 2;
+ ip->i_d.di_version = 3;
return 0;
}
@@ -649,3 +654,108 @@ xfs_iread(
xfs_trans_brelse(tp, bp);
return error;
}
+
+/*
+ * Validate di_extsize hint.
+ *
+ * The rules are documented at xfs_ioctl_setattr_check_extsize().
+ * These functions must be kept in sync with each other.
+ */
+xfs_failaddr_t
+xfs_inode_validate_extsize(
+ struct xfs_mount *mp,
+ uint32_t extsize,
+ uint16_t mode,
+ uint16_t flags)
+{
+ bool rt_flag;
+ bool hint_flag;
+ bool inherit_flag;
+ uint32_t extsize_bytes;
+ uint32_t blocksize_bytes;
+
+ rt_flag = (flags & XFS_DIFLAG_REALTIME);
+ hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
+ inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
+ extsize_bytes = XFS_FSB_TO_B(mp, extsize);
+
+ if (rt_flag)
+ blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
+ else
+ blocksize_bytes = mp->m_sb.sb_blocksize;
+
+ if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
+ return __this_address;
+
+ if (hint_flag && !S_ISREG(mode))
+ return __this_address;
+
+ if (inherit_flag && !S_ISDIR(mode))
+ return __this_address;
+
+ if ((hint_flag || inherit_flag) && extsize == 0)
+ return __this_address;
+
+ if (!(hint_flag || inherit_flag) && extsize != 0)
+ return __this_address;
+
+ if (extsize_bytes % blocksize_bytes)
+ return __this_address;
+
+ if (extsize > MAXEXTLEN)
+ return __this_address;
+
+ if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
+ return __this_address;
+
+ return NULL;
+}
+
+/*
+ * Validate di_cowextsize hint.
+ *
+ * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
+ * These functions must be kept in sync with each other.
+ */
+xfs_failaddr_t
+xfs_inode_validate_cowextsize(
+ struct xfs_mount *mp,
+ uint32_t cowextsize,
+ uint16_t mode,
+ uint16_t flags,
+ uint64_t flags2)
+{
+ bool rt_flag;
+ bool hint_flag;
+ uint32_t cowextsize_bytes;
+
+ rt_flag = (flags & XFS_DIFLAG_REALTIME);
+ hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
+ cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize);
+
+ if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb))
+ return __this_address;
+
+ if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
+ return __this_address;
+
+ if (hint_flag && cowextsize == 0)
+ return __this_address;
+
+ if (!hint_flag && cowextsize != 0)
+ return __this_address;
+
+ if (hint_flag && rt_flag)
+ return __this_address;
+
+ if (cowextsize_bytes % mp->m_sb.sb_blocksize)
+ return __this_address;
+
+ if (cowextsize > MAXEXTLEN)
+ return __this_address;
+
+ if (cowextsize > mp->m_sb.sb_agblocks / 2)
+ return __this_address;
+
+ return NULL;
+}
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 8a5e1da52d74..d9a376a78ee2 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -84,5 +84,10 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino,
struct xfs_dinode *dip);
+xfs_failaddr_t xfs_inode_validate_extsize(struct xfs_mount *mp,
+ uint32_t extsize, uint16_t mode, uint16_t flags);
+xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp,
+ uint32_t cowextsize, uint16_t mode, uint16_t flags,
+ uint64_t flags2);
#endif /* __XFS_INODE_BUF_H__ */
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 866d2861c625..701c42a28d05 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -195,8 +195,9 @@ xfs_iformat_local(
"corrupt inode %Lu (bad size %d for local fork, size = %d).",
(unsigned long long) ip->i_ino, size,
XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
- XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
- ip->i_mount, dip);
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED,
+ "xfs_iformat_local", dip, sizeof(*dip),
+ __this_address);
return -EFSCORRUPTED;
}
@@ -231,8 +232,9 @@ xfs_iformat_extents(
if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, mp, whichfork))) {
xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
(unsigned long long) ip->i_ino, nex);
- XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
- mp, dip);
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED,
+ "xfs_iformat_extents(1)", dip, sizeof(*dip),
+ __this_address);
return -EFSCORRUPTED;
}
@@ -245,10 +247,14 @@ xfs_iformat_extents(
xfs_iext_first(ifp, &icur);
for (i = 0; i < nex; i++, dp++) {
+ xfs_failaddr_t fa;
+
xfs_bmbt_disk_get_all(dp, &new);
- if (!xfs_bmbt_validate_extent(mp, whichfork, &new)) {
- XFS_ERROR_REPORT("xfs_iformat_extents(2)",
- XFS_ERRLEVEL_LOW, mp);
+ fa = xfs_bmap_validate_extent(ip, whichfork, &new);
+ if (fa) {
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED,
+ "xfs_iformat_extents(2)",
+ dp, sizeof(*dp), fa);
return -EFSCORRUPTED;
}
@@ -305,8 +311,9 @@ xfs_iformat_btree(
level == 0 || level > XFS_BTREE_MAXLEVELS) {
xfs_warn(mp, "corrupt inode %Lu (btree).",
(unsigned long long) ip->i_ino);
- XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
- mp, dip);
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED,
+ "xfs_iformat_btree", dfp, size,
+ __this_address);
return -EFSCORRUPTED;
}
@@ -595,7 +602,7 @@ xfs_iextents_copy(
for_each_xfs_iext(ifp, &icur, &rec) {
if (isnullstartblock(rec.br_startblock))
continue;
- ASSERT(xfs_bmbt_validate_extent(ip->i_mount, whichfork, &rec));
+ ASSERT(xfs_bmap_validate_extent(ip, whichfork, &rec) == NULL);
xfs_bmbt_disk_set_all(dp, &rec);
trace_xfs_write_extent(ip, &icur, state, _RET_IP_);
copied += sizeof(struct xfs_bmbt_rec);
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 8479769e470d..265fdcefcbae 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -79,8 +79,6 @@ xfs_refcountbt_alloc_block(
struct xfs_alloc_arg args; /* block allocation args */
int error; /* error return value */
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-
memset(&args, 0, sizeof(args));
args.tp = cur->bc_tp;
args.mp = cur->bc_mp;
@@ -98,7 +96,6 @@ xfs_refcountbt_alloc_block(
trace_xfs_refcountbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno,
args.agbno, 1);
if (args.fsbno == NULLFSBLOCK) {
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
@@ -109,12 +106,10 @@ xfs_refcountbt_alloc_block(
be32_add_cpu(&agf->agf_refcount_blocks, 1);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 1;
return 0;
out_error:
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index e829c3e489ea..8b0d0de1cd11 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -104,20 +104,15 @@ xfs_rmapbt_alloc_block(
int error;
xfs_agblock_t bno;
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-
/* Allocate the new block from the freelist. If we can't, give up. */
error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
&bno, 1);
- if (error) {
- XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ if (error)
return error;
- }
trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_private.a.agno,
bno, 1);
if (bno == NULLAGBLOCK) {
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
}
@@ -130,7 +125,8 @@ xfs_rmapbt_alloc_block(
be32_add_cpu(&agf->agf_rmap_blocks, 1);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
- XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+ xfs_ag_resv_rmapbt_alloc(cur->bc_mp, cur->bc_private.a.agno);
+
*stat = 1;
return 0;
}
@@ -158,6 +154,8 @@ xfs_rmapbt_free_block(
XFS_EXTENT_BUSY_SKIP_DISCARD);
xfs_trans_agbtree_delta(cur->bc_tp, -1);
+ xfs_ag_resv_rmapbt_free(cur->bc_mp, cur->bc_private.a.agno);
+
return 0;
}
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index a55f7a45fa78..53433cc024fd 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -731,7 +731,6 @@ xfs_sb_mount_common(
struct xfs_sb *sbp)
{
mp->m_agfrotor = mp->m_agirotor = 0;
- spin_lock_init(&mp->m_agirotor_lock);
mp->m_maxagi = mp->m_sb.sb_agcount;
mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index 05c66e05ae20..018aabbd9394 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -80,7 +80,7 @@ xfs_scrub_walk_agfl(
}
/* first to the end */
- for (i = flfirst; i < XFS_AGFL_SIZE(mp); i++) {
+ for (i = flfirst; i < xfs_agfl_size(mp); i++) {
error = fn(sc, be32_to_cpu(agfl_bno[i]), priv);
if (error)
return error;
@@ -664,7 +664,7 @@ xfs_scrub_agf(
if (agfl_last > agfl_first)
fl_count = agfl_last - agfl_first + 1;
else
- fl_count = XFS_AGFL_SIZE(mp) - agfl_first + agfl_last + 1;
+ fl_count = xfs_agfl_size(mp) - agfl_first + agfl_last + 1;
if (agfl_count != 0 && fl_count != agfl_count)
xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
@@ -791,7 +791,7 @@ xfs_scrub_agfl(
/* Allocate buffer to ensure uniqueness of AGFL entries. */
agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
agflcount = be32_to_cpu(agf->agf_flcount);
- if (agflcount > XFS_AGFL_SIZE(sc->mp)) {
+ if (agflcount > xfs_agfl_size(sc->mp)) {
xfs_scrub_block_set_corrupt(sc, sc->sa.agf_bp);
goto out;
}
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index 4ed80474f545..127575f0abfb 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -98,7 +98,7 @@ xfs_scrub_xattr_listent(
if (flags & XFS_ATTR_INCOMPLETE) {
/* Incomplete attr key, just mark the inode for preening. */
- xfs_scrub_ino_set_preen(sx->sc, context->dp->i_ino, NULL);
+ xfs_scrub_ino_set_preen(sx->sc, context->dp->i_ino);
return;
}
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index d00282130492..639d14b51e90 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -37,6 +37,7 @@
#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
#include "xfs_refcount.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
@@ -423,6 +424,169 @@ xfs_scrub_bmap_btree(
return error;
}
+struct xfs_scrub_bmap_check_rmap_info {
+ struct xfs_scrub_context *sc;
+ int whichfork;
+ struct xfs_iext_cursor icur;
+};
+
+/* Can we find bmaps that fit this rmap? */
+STATIC int
+xfs_scrub_bmap_check_rmap(
+ struct xfs_btree_cur *cur,
+ struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_bmbt_irec irec;
+ struct xfs_scrub_bmap_check_rmap_info *sbcri = priv;
+ struct xfs_ifork *ifp;
+ struct xfs_scrub_context *sc = sbcri->sc;
+ bool have_map;
+
+ /* Is this even the right fork? */
+ if (rec->rm_owner != sc->ip->i_ino)
+ return 0;
+ if ((sbcri->whichfork == XFS_ATTR_FORK) ^
+ !!(rec->rm_flags & XFS_RMAP_ATTR_FORK))
+ return 0;
+ if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
+ return 0;
+
+ /* Now look up the bmbt record. */
+ ifp = XFS_IFORK_PTR(sc->ip, sbcri->whichfork);
+ if (!ifp) {
+ xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
+ rec->rm_offset);
+ goto out;
+ }
+ have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset,
+ &sbcri->icur, &irec);
+ if (!have_map)
+ xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
+ rec->rm_offset);
+ /*
+ * bmap extent record lengths are constrained to 2^21 blocks in length
+ * because of space constraints in the on-disk metadata structure.
+ * However, rmap extent record lengths are constrained only by AG
+ * length, so we have to loop through the bmbt to make sure that the
+ * entire rmap is covered by bmbt records.
+ */
+ while (have_map) {
+ if (irec.br_startoff != rec->rm_offset)
+ xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
+ rec->rm_offset);
+ if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp,
+ cur->bc_private.a.agno, rec->rm_startblock))
+ xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
+ rec->rm_offset);
+ if (irec.br_blockcount > rec->rm_blockcount)
+ xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
+ rec->rm_offset);
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ break;
+ rec->rm_startblock += irec.br_blockcount;
+ rec->rm_offset += irec.br_blockcount;
+ rec->rm_blockcount -= irec.br_blockcount;
+ if (rec->rm_blockcount == 0)
+ break;
+ have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec);
+ if (!have_map)
+ xfs_scrub_fblock_set_corrupt(sc, sbcri->whichfork,
+ rec->rm_offset);
+ }
+
+out:
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return XFS_BTREE_QUERY_RANGE_ABORT;
+ return 0;
+}
+
+/* Make sure each rmap has a corresponding bmbt entry. */
+STATIC int
+xfs_scrub_bmap_check_ag_rmaps(
+ struct xfs_scrub_context *sc,
+ int whichfork,
+ xfs_agnumber_t agno)
+{
+ struct xfs_scrub_bmap_check_rmap_info sbcri;
+ struct xfs_btree_cur *cur;
+ struct xfs_buf *agf;
+ int error;
+
+ error = xfs_alloc_read_agf(sc->mp, sc->tp, agno, 0, &agf);
+ if (error)
+ return error;
+
+ cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, agno);
+ if (!cur) {
+ error = -ENOMEM;
+ goto out_agf;
+ }
+
+ sbcri.sc = sc;
+ sbcri.whichfork = whichfork;
+ error = xfs_rmap_query_all(cur, xfs_scrub_bmap_check_rmap, &sbcri);
+ if (error == XFS_BTREE_QUERY_RANGE_ABORT)
+ error = 0;
+
+ xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+out_agf:
+ xfs_trans_brelse(sc->tp, agf);
+ return error;
+}
+
+/* Make sure each rmap has a corresponding bmbt entry. */
+STATIC int
+xfs_scrub_bmap_check_rmaps(
+ struct xfs_scrub_context *sc,
+ int whichfork)
+{
+ loff_t size;
+ xfs_agnumber_t agno;
+ int error;
+
+ if (!xfs_sb_version_hasrmapbt(&sc->mp->m_sb) ||
+ whichfork == XFS_COW_FORK ||
+ (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
+ return 0;
+
+ /* Don't support realtime rmap checks yet. */
+ if (XFS_IS_REALTIME_INODE(sc->ip) && whichfork == XFS_DATA_FORK)
+ return 0;
+
+ /*
+ * Only do this for complex maps that are in btree format, or for
+ * situations where we would seem to have a size but zero extents.
+ * The inode repair code can zap broken iforks, which means we have
+ * to flag this bmap as corrupt if there are rmaps that need to be
+ * reattached.
+ */
+ switch (whichfork) {
+ case XFS_DATA_FORK:
+ size = i_size_read(VFS_I(sc->ip));
+ break;
+ case XFS_ATTR_FORK:
+ size = XFS_IFORK_Q(sc->ip);
+ break;
+ default:
+ size = 0;
+ break;
+ }
+ if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE &&
+ (size == 0 || XFS_IFORK_NEXTENTS(sc->ip, whichfork) > 0))
+ return 0;
+
+ for (agno = 0; agno < sc->mp->m_sb.sb_agcount; agno++) {
+ error = xfs_scrub_bmap_check_ag_rmaps(sc, whichfork, agno);
+ if (error)
+ return error;
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ break;
+ }
+
+ return 0;
+}
+
/*
* Scrub an inode fork's block mappings.
*
@@ -457,16 +621,16 @@ xfs_scrub_bmap(
goto out;
/* No CoW forks on non-reflink inodes/filesystems. */
if (!xfs_is_reflink_inode(ip)) {
- xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
+ xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
goto out;
}
break;
case XFS_ATTR_FORK:
if (!ifp)
- goto out;
+ goto out_check_rmap;
if (!xfs_sb_version_hasattr(&mp->m_sb) &&
!xfs_sb_version_hasattr2(&mp->m_sb))
- xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
+ xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
break;
default:
ASSERT(whichfork == XFS_DATA_FORK);
@@ -534,6 +698,10 @@ xfs_scrub_bmap(
goto out;
}
+out_check_rmap:
+ error = xfs_scrub_bmap_check_rmaps(sc, whichfork);
+ if (!xfs_scrub_fblock_xref_process_error(sc, whichfork, 0, &error))
+ goto out;
out:
return error;
}
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 8033ab9d8f47..8ed91d5c868d 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -213,12 +213,10 @@ xfs_scrub_block_set_preen(
void
xfs_scrub_ino_set_preen(
struct xfs_scrub_context *sc,
- xfs_ino_t ino,
- struct xfs_buf *bp)
+ xfs_ino_t ino)
{
sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
- trace_xfs_scrub_ino_preen(sc, ino, bp ? bp->b_bn : 0,
- __return_address);
+ trace_xfs_scrub_ino_preen(sc, ino, __return_address);
}
/* Record a corrupt block. */
@@ -249,22 +247,20 @@ xfs_scrub_block_xref_set_corrupt(
void
xfs_scrub_ino_set_corrupt(
struct xfs_scrub_context *sc,
- xfs_ino_t ino,
- struct xfs_buf *bp)
+ xfs_ino_t ino)
{
sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
- trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address);
+ trace_xfs_scrub_ino_error(sc, ino, __return_address);
}
/* Record a corruption while cross-referencing with an inode. */
void
xfs_scrub_ino_xref_set_corrupt(
struct xfs_scrub_context *sc,
- xfs_ino_t ino,
- struct xfs_buf *bp)
+ xfs_ino_t ino)
{
sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
- trace_xfs_scrub_ino_error(sc, ino, bp ? bp->b_bn : 0, __return_address);
+ trace_xfs_scrub_ino_error(sc, ino, __return_address);
}
/* Record corruption in a block indexed by a file fork. */
@@ -296,12 +292,10 @@ xfs_scrub_fblock_xref_set_corrupt(
void
xfs_scrub_ino_set_warning(
struct xfs_scrub_context *sc,
- xfs_ino_t ino,
- struct xfs_buf *bp)
+ xfs_ino_t ino)
{
sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
- trace_xfs_scrub_ino_warning(sc, ino, bp ? bp->b_bn : 0,
- __return_address);
+ trace_xfs_scrub_ino_warning(sc, ino, __return_address);
}
/* Warn about a block indexed by a file fork that needs review. */
@@ -619,7 +613,7 @@ xfs_scrub_checkpoint_log(
{
int error;
- error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
+ error = xfs_log_force(mp, XFS_LOG_SYNC);
if (error)
return error;
xfs_ail_push_all_sync(mp->m_ail);
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index ddb65d22c76a..deaf60400981 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -63,25 +63,22 @@ bool xfs_scrub_fblock_xref_process_error(struct xfs_scrub_context *sc,
void xfs_scrub_block_set_preen(struct xfs_scrub_context *sc,
struct xfs_buf *bp);
-void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino,
- struct xfs_buf *bp);
+void xfs_scrub_ino_set_preen(struct xfs_scrub_context *sc, xfs_ino_t ino);
void xfs_scrub_block_set_corrupt(struct xfs_scrub_context *sc,
struct xfs_buf *bp);
-void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino,
- struct xfs_buf *bp);
+void xfs_scrub_ino_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino);
void xfs_scrub_fblock_set_corrupt(struct xfs_scrub_context *sc, int whichfork,
xfs_fileoff_t offset);
void xfs_scrub_block_xref_set_corrupt(struct xfs_scrub_context *sc,
struct xfs_buf *bp);
-void xfs_scrub_ino_xref_set_corrupt(struct xfs_scrub_context *sc, xfs_ino_t ino,
- struct xfs_buf *bp);
+void xfs_scrub_ino_xref_set_corrupt(struct xfs_scrub_context *sc,
+ xfs_ino_t ino);
void xfs_scrub_fblock_xref_set_corrupt(struct xfs_scrub_context *sc,
int whichfork, xfs_fileoff_t offset);
-void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino,
- struct xfs_buf *bp);
+void xfs_scrub_ino_set_warning(struct xfs_scrub_context *sc, xfs_ino_t ino);
void xfs_scrub_fblock_set_warning(struct xfs_scrub_context *sc, int whichfork,
xfs_fileoff_t offset);
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 50b6a26b0299..38f29806eb54 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -781,7 +781,7 @@ xfs_scrub_directory(
/* Plausible size? */
if (sc->ip->i_d.di_size < xfs_dir2_sf_hdr_size(0)) {
- xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
+ xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
goto out;
}
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 63ab3f98430d..106ca4bd753f 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -259,7 +259,8 @@ xfs_scrub_iallocbt_check_freemask(
error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap,
&dip, &bp, 0, 0);
- if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, 0, &error))
+ if (!xfs_scrub_btree_xref_process_error(bs->sc, bs->cur, 0,
+ &error))
continue;
/* Which inodes are free? */
@@ -433,7 +434,7 @@ xfs_scrub_iallocbt_xref_rmap_inodes(
if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.rmap_cur))
return;
if (blocks != inode_blocks)
- xfs_scrub_btree_set_corrupt(sc, sc->sa.ino_cur, 0);
+ xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
}
/* Scrub the inode btrees for some AG. */
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 21297bef8df1..df14930e4fc5 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -89,67 +89,21 @@ out:
/* Inode core */
-/*
- * Validate di_extsize hint.
- *
- * The rules are documented at xfs_ioctl_setattr_check_extsize().
- * These functions must be kept in sync with each other.
- */
+/* Validate di_extsize hint. */
STATIC void
xfs_scrub_inode_extsize(
struct xfs_scrub_context *sc,
- struct xfs_buf *bp,
struct xfs_dinode *dip,
xfs_ino_t ino,
uint16_t mode,
uint16_t flags)
{
- struct xfs_mount *mp = sc->mp;
- bool rt_flag;
- bool hint_flag;
- bool inherit_flag;
- uint32_t extsize;
- uint32_t extsize_bytes;
- uint32_t blocksize_bytes;
-
- rt_flag = (flags & XFS_DIFLAG_REALTIME);
- hint_flag = (flags & XFS_DIFLAG_EXTSIZE);
- inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
- extsize = be32_to_cpu(dip->di_extsize);
- extsize_bytes = XFS_FSB_TO_B(sc->mp, extsize);
-
- if (rt_flag)
- blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
- else
- blocksize_bytes = mp->m_sb.sb_blocksize;
-
- if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode)))
- goto bad;
-
- if (hint_flag && !S_ISREG(mode))
- goto bad;
-
- if (inherit_flag && !S_ISDIR(mode))
- goto bad;
-
- if ((hint_flag || inherit_flag) && extsize == 0)
- goto bad;
-
- if (!(hint_flag || inherit_flag) && extsize != 0)
- goto bad;
-
- if (extsize_bytes % blocksize_bytes)
- goto bad;
-
- if (extsize > MAXEXTLEN)
- goto bad;
+ xfs_failaddr_t fa;
- if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
- goto bad;
-
- return;
-bad:
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize),
+ mode, flags);
+ if (fa)
+ xfs_scrub_ino_set_corrupt(sc, ino);
}
/*
@@ -161,58 +115,25 @@ bad:
STATIC void
xfs_scrub_inode_cowextsize(
struct xfs_scrub_context *sc,
- struct xfs_buf *bp,
struct xfs_dinode *dip,
xfs_ino_t ino,
uint16_t mode,
uint16_t flags,
uint64_t flags2)
{
- struct xfs_mount *mp = sc->mp;
- bool rt_flag;
- bool hint_flag;
- uint32_t extsize;
- uint32_t extsize_bytes;
-
- rt_flag = (flags & XFS_DIFLAG_REALTIME);
- hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE);
- extsize = be32_to_cpu(dip->di_cowextsize);
- extsize_bytes = XFS_FSB_TO_B(sc->mp, extsize);
-
- if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb))
- goto bad;
-
- if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode)))
- goto bad;
-
- if (hint_flag && extsize == 0)
- goto bad;
-
- if (!hint_flag && extsize != 0)
- goto bad;
-
- if (hint_flag && rt_flag)
- goto bad;
-
- if (extsize_bytes % mp->m_sb.sb_blocksize)
- goto bad;
-
- if (extsize > MAXEXTLEN)
- goto bad;
-
- if (extsize > mp->m_sb.sb_agblocks / 2)
- goto bad;
+ xfs_failaddr_t fa;
- return;
-bad:
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ fa = xfs_inode_validate_cowextsize(sc->mp,
+ be32_to_cpu(dip->di_cowextsize), mode, flags,
+ flags2);
+ if (fa)
+ xfs_scrub_ino_set_corrupt(sc, ino);
}
/* Make sure the di_flags make sense for the inode. */
STATIC void
xfs_scrub_inode_flags(
struct xfs_scrub_context *sc,
- struct xfs_buf *bp,
struct xfs_dinode *dip,
xfs_ino_t ino,
uint16_t mode,
@@ -251,14 +172,13 @@ xfs_scrub_inode_flags(
return;
bad:
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
}
/* Make sure the di_flags2 make sense for the inode. */
STATIC void
xfs_scrub_inode_flags2(
struct xfs_scrub_context *sc,
- struct xfs_buf *bp,
struct xfs_dinode *dip,
xfs_ino_t ino,
uint16_t mode,
@@ -295,14 +215,13 @@ xfs_scrub_inode_flags2(
return;
bad:
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
}
/* Scrub all the ondisk inode fields. */
STATIC void
xfs_scrub_dinode(
struct xfs_scrub_context *sc,
- struct xfs_buf *bp,
struct xfs_dinode *dip,
xfs_ino_t ino)
{
@@ -333,7 +252,7 @@ xfs_scrub_dinode(
/* mode is recognized */
break;
default:
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
break;
}
@@ -344,22 +263,22 @@ xfs_scrub_dinode(
* We autoconvert v1 inodes into v2 inodes on writeout,
* so just mark this inode for preening.
*/
- xfs_scrub_ino_set_preen(sc, ino, bp);
+ xfs_scrub_ino_set_preen(sc, ino);
break;
case 2:
case 3:
if (dip->di_onlink != 0)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
if (dip->di_mode == 0 && sc->ip)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
if (dip->di_projid_hi != 0 &&
!xfs_sb_version_hasprojid32bit(&mp->m_sb))
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
break;
default:
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
return;
}
@@ -369,40 +288,40 @@ xfs_scrub_dinode(
*/
if (dip->di_uid == cpu_to_be32(-1U) ||
dip->di_gid == cpu_to_be32(-1U))
- xfs_scrub_ino_set_warning(sc, ino, bp);
+ xfs_scrub_ino_set_warning(sc, ino);
/* di_format */
switch (dip->di_format) {
case XFS_DINODE_FMT_DEV:
if (!S_ISCHR(mode) && !S_ISBLK(mode) &&
!S_ISFIFO(mode) && !S_ISSOCK(mode))
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
break;
case XFS_DINODE_FMT_LOCAL:
if (!S_ISDIR(mode) && !S_ISLNK(mode))
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
break;
case XFS_DINODE_FMT_EXTENTS:
if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode))
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
break;
case XFS_DINODE_FMT_BTREE:
if (!S_ISREG(mode) && !S_ISDIR(mode))
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
break;
case XFS_DINODE_FMT_UUID:
default:
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
break;
}
/* di_[amc]time.nsec */
if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
/*
* di_size. xfs_dinode_verify checks for things that screw up
@@ -411,19 +330,19 @@ xfs_scrub_dinode(
*/
isize = be64_to_cpu(dip->di_size);
if (isize & (1ULL << 63))
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
/* Devices, fifos, and sockets must have zero size */
if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
/* Directories can't be larger than the data section size (32G) */
if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE))
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
/* Symlinks can't be larger than SYMLINK_MAXLEN */
if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN))
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
/*
* Warn if the running kernel can't handle the kinds of offsets
@@ -432,7 +351,7 @@ xfs_scrub_dinode(
* overly large offsets, flag the inode for admin review.
*/
if (isize >= mp->m_super->s_maxbytes)
- xfs_scrub_ino_set_warning(sc, ino, bp);
+ xfs_scrub_ino_set_warning(sc, ino);
/* di_nblocks */
if (flags2 & XFS_DIFLAG2_REFLINK) {
@@ -447,15 +366,15 @@ xfs_scrub_dinode(
*/
if (be64_to_cpu(dip->di_nblocks) >=
mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
} else {
if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
}
- xfs_scrub_inode_flags(sc, bp, dip, ino, mode, flags);
+ xfs_scrub_inode_flags(sc, dip, ino, mode, flags);
- xfs_scrub_inode_extsize(sc, bp, dip, ino, mode, flags);
+ xfs_scrub_inode_extsize(sc, dip, ino, mode, flags);
/* di_nextents */
nextents = be32_to_cpu(dip->di_nextents);
@@ -463,31 +382,31 @@ xfs_scrub_dinode(
switch (dip->di_format) {
case XFS_DINODE_FMT_EXTENTS:
if (nextents > fork_recs)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
break;
case XFS_DINODE_FMT_BTREE:
if (nextents <= fork_recs)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
break;
default:
if (nextents != 0)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
break;
}
/* di_forkoff */
if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
if (dip->di_anextents != 0 && dip->di_forkoff == 0)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
/* di_aformat */
if (dip->di_aformat != XFS_DINODE_FMT_LOCAL &&
dip->di_aformat != XFS_DINODE_FMT_EXTENTS &&
dip->di_aformat != XFS_DINODE_FMT_BTREE)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
/* di_anextents */
nextents = be16_to_cpu(dip->di_anextents);
@@ -495,92 +414,26 @@ xfs_scrub_dinode(
switch (dip->di_aformat) {
case XFS_DINODE_FMT_EXTENTS:
if (nextents > fork_recs)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
break;
case XFS_DINODE_FMT_BTREE:
if (nextents <= fork_recs)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
break;
default:
if (nextents != 0)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
}
if (dip->di_version >= 3) {
if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
- xfs_scrub_inode_flags2(sc, bp, dip, ino, mode, flags, flags2);
- xfs_scrub_inode_cowextsize(sc, bp, dip, ino, mode, flags,
+ xfs_scrub_ino_set_corrupt(sc, ino);
+ xfs_scrub_inode_flags2(sc, dip, ino, mode, flags, flags2);
+ xfs_scrub_inode_cowextsize(sc, dip, ino, mode, flags,
flags2);
}
}
-/* Map and read a raw inode. */
-STATIC int
-xfs_scrub_inode_map_raw(
- struct xfs_scrub_context *sc,
- xfs_ino_t ino,
- struct xfs_buf **bpp,
- struct xfs_dinode **dipp)
-{
- struct xfs_imap imap;
- struct xfs_mount *mp = sc->mp;
- struct xfs_buf *bp = NULL;
- struct xfs_dinode *dip;
- int error;
-
- error = xfs_imap(mp, sc->tp, ino, &imap, XFS_IGET_UNTRUSTED);
- if (error == -EINVAL) {
- /*
- * Inode could have gotten deleted out from under us;
- * just forget about it.
- */
- error = -ENOENT;
- goto out;
- }
- if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
- XFS_INO_TO_AGBNO(mp, ino), &error))
- goto out;
-
- error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
- imap.im_blkno, imap.im_len, XBF_UNMAPPED, &bp,
- NULL);
- if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
- XFS_INO_TO_AGBNO(mp, ino), &error))
- goto out;
-
- /*
- * Is this really an inode? We disabled verifiers in the above
- * xfs_trans_read_buf call because the inode buffer verifier
- * fails on /any/ inode record in the inode cluster with a bad
- * magic or version number, not just the one that we're
- * checking. Therefore, grab the buffer unconditionally, attach
- * the inode verifiers by hand, and run the inode verifier only
- * on the one inode we want.
- */
- bp->b_ops = &xfs_inode_buf_ops;
- dip = xfs_buf_offset(bp, imap.im_boffset);
- if (xfs_dinode_verify(mp, ino, dip) != NULL ||
- !xfs_dinode_good_version(mp, dip->di_version)) {
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
- goto out_buf;
- }
-
- /* ...and is it the one we asked for? */
- if (be32_to_cpu(dip->di_gen) != sc->sm->sm_gen) {
- error = -ENOENT;
- goto out_buf;
- }
-
- *dipp = dip;
- *bpp = bp;
-out:
- return error;
-out_buf:
- xfs_trans_brelse(sc->tp, bp);
- return error;
-}
-
/*
* Make sure the finobt doesn't think this inode is free.
* We don't have to check the inobt ourselves because we got the inode via
@@ -645,18 +498,18 @@ xfs_scrub_inode_xref_bmap(
if (!xfs_scrub_should_check_xref(sc, &error, NULL))
return;
if (nextents < be32_to_cpu(dip->di_nextents))
- xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
+ xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino);
error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
&nextents, &acount);
if (!xfs_scrub_should_check_xref(sc, &error, NULL))
return;
if (nextents != be16_to_cpu(dip->di_anextents))
- xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
+ xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino);
/* Check nblocks against the inode. */
if (count + acount != be64_to_cpu(dip->di_nblocks))
- xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino, NULL);
+ xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino);
}
/* Cross-reference with the other btrees. */
@@ -700,8 +553,7 @@ xfs_scrub_inode_xref(
static void
xfs_scrub_inode_check_reflink_iflag(
struct xfs_scrub_context *sc,
- xfs_ino_t ino,
- struct xfs_buf *bp)
+ xfs_ino_t ino)
{
struct xfs_mount *mp = sc->mp;
bool has_shared;
@@ -716,9 +568,9 @@ xfs_scrub_inode_check_reflink_iflag(
XFS_INO_TO_AGBNO(mp, ino), &error))
return;
if (xfs_is_reflink_inode(sc->ip) && !has_shared)
- xfs_scrub_ino_set_preen(sc, ino, bp);
+ xfs_scrub_ino_set_preen(sc, ino);
else if (!xfs_is_reflink_inode(sc->ip) && has_shared)
- xfs_scrub_ino_set_corrupt(sc, ino, bp);
+ xfs_scrub_ino_set_corrupt(sc, ino);
}
/* Scrub an inode. */
@@ -727,43 +579,33 @@ xfs_scrub_inode(
struct xfs_scrub_context *sc)
{
struct xfs_dinode di;
- struct xfs_buf *bp = NULL;
- struct xfs_dinode *dip;
- xfs_ino_t ino;
int error = 0;
- /* Did we get the in-core inode, or are we doing this manually? */
- if (sc->ip) {
- ino = sc->ip->i_ino;
- xfs_inode_to_disk(sc->ip, &di, 0);
- dip = &di;
- } else {
- /* Map & read inode. */
- ino = sc->sm->sm_ino;
- error = xfs_scrub_inode_map_raw(sc, ino, &bp, &dip);
- if (error || !bp)
- goto out;
+ /*
+ * If sc->ip is NULL, that means that the setup function called
+ * xfs_iget to look up the inode. xfs_iget returned a EFSCORRUPTED
+ * and a NULL inode, so flag the corruption error and return.
+ */
+ if (!sc->ip) {
+ xfs_scrub_ino_set_corrupt(sc, sc->sm->sm_ino);
+ return 0;
}
- xfs_scrub_dinode(sc, bp, dip, ino);
+ /* Scrub the inode core. */
+ xfs_inode_to_disk(sc->ip, &di, 0);
+ xfs_scrub_dinode(sc, &di, sc->ip->i_ino);
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
goto out;
- /* Now let's do the things that require a live inode. */
- if (!sc->ip)
- goto out;
-
/*
* Look for discrepancies between file's data blocks and the reflink
* iflag. We already checked the iflag against the file mode when
* we scrubbed the dinode.
*/
if (S_ISREG(VFS_I(sc->ip)->i_mode))
- xfs_scrub_inode_check_reflink_iflag(sc, ino, bp);
+ xfs_scrub_inode_check_reflink_iflag(sc, sc->ip->i_ino);
- xfs_scrub_inode_xref(sc, ino, dip);
+ xfs_scrub_inode_xref(sc, sc->ip->i_ino, &di);
out:
- if (bp)
- xfs_trans_brelse(sc->tp, bp);
return error;
}
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index 0d3851410c74..1fb88c18d455 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -167,8 +167,18 @@ xfs_scrub_parent_validate(
* if the parent pointer erroneously points to a file, we
* can't use DONTCACHE here because DONTCACHE inodes can trigger
* immediate inactive cleanup of the inode.
+ *
+ * If _iget returns -EINVAL then the parent inode number is garbage
+ * and the directory is corrupt. If the _iget returns -EFSCORRUPTED
+ * or -EFSBADCRC then the parent is corrupt which is a cross
+ * referencing error. Any other error is an operational error.
*/
- error = xfs_iget(mp, sc->tp, dnum, 0, 0, &dp);
+ error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp);
+ if (error == -EINVAL) {
+ error = -EFSCORRUPTED;
+ xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error);
+ goto out;
+ }
if (!xfs_scrub_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
goto out;
if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) {
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 51daa4ae2627..6ba465e6c885 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -219,7 +219,7 @@ xfs_scrub_quota(
/* Look for problem extents. */
xfs_ilock(ip, XFS_ILOCK_EXCL);
if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) {
- xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
+ xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
goto out_unlock_inode;
}
max_dqid_off = ((xfs_dqid_t)-1) / qi->qi_dqperchunk;
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index 26390991369a..39c41dfe08ee 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -116,8 +116,7 @@ xfs_scrub_xref_is_used_rt_space(
if (!xfs_scrub_should_check_xref(sc, &error, NULL))
goto out_unlock;
if (is_free)
- xfs_scrub_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino,
- NULL);
+ xfs_scrub_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino);
out_unlock:
xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
}
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 4dc896852bf0..5d2b1c241be5 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -174,53 +174,32 @@ DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_error);
DEFINE_SCRUB_BLOCK_ERROR_EVENT(xfs_scrub_block_preen);
DECLARE_EVENT_CLASS(xfs_scrub_ino_error_class,
- TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, xfs_daddr_t daddr,
- void *ret_ip),
- TP_ARGS(sc, ino, daddr, ret_ip),
+ TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, void *ret_ip),
+ TP_ARGS(sc, ino, ret_ip),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(unsigned int, type)
- __field(xfs_agnumber_t, agno)
- __field(xfs_agblock_t, bno)
__field(void *, ret_ip)
),
TP_fast_assign(
- xfs_fsblock_t fsbno;
- xfs_agnumber_t agno;
- xfs_agblock_t bno;
-
- if (daddr) {
- fsbno = XFS_DADDR_TO_FSB(sc->mp, daddr);
- agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
- bno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
- } else {
- agno = XFS_INO_TO_AGNO(sc->mp, ino);
- bno = XFS_AGINO_TO_AGBNO(sc->mp,
- XFS_INO_TO_AGINO(sc->mp, ino));
- }
-
__entry->dev = sc->mp->m_super->s_dev;
__entry->ino = ino;
__entry->type = sc->sm->sm_type;
- __entry->agno = agno;
- __entry->bno = bno;
__entry->ret_ip = ret_ip;
),
- TP_printk("dev %d:%d ino 0x%llx type %u agno %u agbno %u ret_ip %pS",
+ TP_printk("dev %d:%d ino 0x%llx type %u ret_ip %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->type,
- __entry->agno,
- __entry->bno,
__entry->ret_ip)
)
#define DEFINE_SCRUB_INO_ERROR_EVENT(name) \
DEFINE_EVENT(xfs_scrub_ino_error_class, name, \
TP_PROTO(struct xfs_scrub_context *sc, xfs_ino_t ino, \
- xfs_daddr_t daddr, void *ret_ip), \
- TP_ARGS(sc, ino, daddr, ret_ip))
+ void *ret_ip), \
+ TP_ARGS(sc, ino, ret_ip))
DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_error);
DEFINE_SCRUB_INO_ERROR_EVENT(xfs_scrub_ino_preen);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 9c6a830da0ee..19eadc807056 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -209,7 +209,8 @@ xfs_setfilesize_trans_alloc(
struct xfs_trans *tp;
int error;
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0,
+ XFS_TRANS_NOFS, &tp);
if (error)
return error;
@@ -1330,21 +1331,20 @@ xfs_get_blocks(
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
- error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
- &imap, &nimaps, XFS_BMAPI_ENTIRE);
+ error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
+ &nimaps, 0);
if (error)
goto out_unlock;
-
- if (nimaps) {
- trace_xfs_get_blocks_found(ip, offset, size,
- imap.br_state == XFS_EXT_UNWRITTEN ?
- XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap);
- xfs_iunlock(ip, lockmode);
- } else {
+ if (!nimaps) {
trace_xfs_get_blocks_notfound(ip, offset, size);
goto out_unlock;
}
+ trace_xfs_get_blocks_found(ip, offset, size,
+ imap.br_state == XFS_EXT_UNWRITTEN ?
+ XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap);
+ xfs_iunlock(ip, lockmode);
+
/* trim mapping down to size requested */
xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index c83f549dc17b..05dee8fdd895 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1208,18 +1208,15 @@ xfs_free_file_space(
/*
* Now that we've unmap all full blocks we'll have to zero out any
- * partial block at the beginning and/or end. xfs_zero_range is
- * smart enough to skip any holes, including those we just created,
- * but we must take care not to zero beyond EOF and enlarge i_size.
+ * partial block at the beginning and/or end. iomap_zero_range is smart
+ * enough to skip any holes, including those we just created, but we
+ * must take care not to zero beyond EOF and enlarge i_size.
*/
-
if (offset >= XFS_ISIZE(ip))
return 0;
-
if (offset + len > XFS_ISIZE(ip))
len = XFS_ISIZE(ip) - offset;
-
- return xfs_zero_range(ip, offset, len, NULL);
+ return iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops);
}
/*
@@ -1899,17 +1896,28 @@ xfs_swap_extents(
* performed with log redo items!
*/
if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+ int w = XFS_DATA_FORK;
+ uint32_t ipnext = XFS_IFORK_NEXTENTS(ip, w);
+ uint32_t tipnext = XFS_IFORK_NEXTENTS(tip, w);
+
+ /*
+ * Conceptually this shouldn't affect the shape of either bmbt,
+ * but since we atomically move extents one by one, we reserve
+ * enough space to rebuild both trees.
+ */
+ resblks = XFS_SWAP_RMAP_SPACE_RES(mp, ipnext, w);
+ resblks += XFS_SWAP_RMAP_SPACE_RES(mp, tipnext, w);
+
/*
- * Conceptually this shouldn't affect the shape of either
- * bmbt, but since we atomically move extents one by one,
- * we reserve enough space to rebuild both trees.
+ * Handle the corner case where either inode might straddle the
+ * btree format boundary. If so, the inode could bounce between
+ * btree <-> extent format on unmap -> remap cycles, freeing and
+ * allocating a bmapbt block each time.
*/
- resblks = XFS_SWAP_RMAP_SPACE_RES(mp,
- XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK),
- XFS_DATA_FORK) +
- XFS_SWAP_RMAP_SPACE_RES(mp,
- XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK),
- XFS_DATA_FORK);
+ if (ipnext == (XFS_IFORK_MAXEXT(ip, w) + 1))
+ resblks += XFS_IFORK_MAXEXT(ip, w);
+ if (tipnext == (XFS_IFORK_MAXEXT(tip, w) + 1))
+ resblks += XFS_IFORK_MAXEXT(tip, w);
}
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
if (error)
@@ -2003,11 +2011,11 @@ xfs_swap_extents(
ip->i_cowfp = tip->i_cowfp;
tip->i_cowfp = cowfp;
- if (ip->i_cowfp && ip->i_cnextents)
+ if (ip->i_cowfp && ip->i_cowfp->if_bytes)
xfs_inode_set_cowblocks_tag(ip);
else
xfs_inode_clear_cowblocks_tag(ip);
- if (tip->i_cowfp && tip->i_cnextents)
+ if (tip->i_cowfp && tip->i_cowfp->if_bytes)
xfs_inode_set_cowblocks_tag(tip);
else
xfs_inode_clear_cowblocks_tag(tip);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index d1da2ee9e6db..ac669a10c62f 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1708,7 +1708,7 @@ xfs_buftarg_isolate(
* zero. If the value is already zero, we need to reclaim the
* buffer, otherwise it gets another trip through the LRU.
*/
- if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
+ if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
spin_unlock(&bp->b_lock);
return LRU_ROTATE;
}
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 270ddb4d2313..82ad270e390e 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -460,7 +460,7 @@ xfs_buf_item_unpin(
list_del_init(&bp->b_li_list);
bp->b_iodone = NULL;
} else {
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR);
xfs_buf_item_relse(bp);
ASSERT(bp->b_log_item == NULL);
@@ -1057,12 +1057,12 @@ xfs_buf_do_callbacks_fail(
lip = list_first_entry(&bp->b_li_list, struct xfs_log_item,
li_bio_list);
ailp = lip->li_ailp;
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
if (lip->li_ops->iop_error)
lip->li_ops->iop_error(lip, bp);
}
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
}
static bool
@@ -1226,7 +1226,7 @@ xfs_buf_iodone(
*
* Either way, AIL is useless if we're forcing a shutdown.
*/
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
xfs_buf_item_free(BUF_ITEM(lip));
}
@@ -1246,7 +1246,7 @@ xfs_buf_resubmit_failed_buffers(
/*
* Clear XFS_LI_FAILED flag from all items before resubmit
*
- * XFS_LI_FAILED set/clear is protected by xa_lock, caller this
+ * XFS_LI_FAILED set/clear is protected by ail_lock, caller this
* function already have it acquired
*/
list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 43572f8a1b8e..a7daef9e16bf 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -394,8 +394,6 @@ xfs_qm_dqalloc(
error1:
xfs_defer_cancel(&dfops);
error0:
- xfs_iunlock(quotip, XFS_ILOCK_EXCL);
-
return error;
}
@@ -920,7 +918,7 @@ xfs_qm_dqflush_done(
(lip->li_flags & XFS_LI_FAILED))) {
/* xfs_trans_ail_delete() drops the AIL lock. */
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
if (lip->li_lsn == qip->qli_flush_lsn) {
xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
} else {
@@ -930,7 +928,7 @@ xfs_qm_dqflush_done(
*/
if (lip->li_flags & XFS_LI_FAILED)
xfs_clear_li_failed(lip);
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
}
}
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 96eaa6933709..4b331e354da7 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -157,8 +157,9 @@ xfs_dquot_item_error(
STATIC uint
xfs_qm_dquot_logitem_push(
struct xfs_log_item *lip,
- struct list_head *buffer_list) __releases(&lip->li_ailp->xa_lock)
- __acquires(&lip->li_ailp->xa_lock)
+ struct list_head *buffer_list)
+ __releases(&lip->li_ailp->ail_lock)
+ __acquires(&lip->li_ailp->ail_lock)
{
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
struct xfs_buf *bp = lip->li_buf;
@@ -205,7 +206,7 @@ xfs_qm_dquot_logitem_push(
goto out_unlock;
}
- spin_unlock(&lip->li_ailp->xa_lock);
+ spin_unlock(&lip->li_ailp->ail_lock);
error = xfs_qm_dqflush(dqp, &bp);
if (error) {
@@ -217,7 +218,7 @@ xfs_qm_dquot_logitem_push(
xfs_buf_relse(bp);
}
- spin_lock(&lip->li_ailp->xa_lock);
+ spin_lock(&lip->li_ailp->ail_lock);
out_unlock:
xfs_dqunlock(dqp);
return rval;
@@ -400,7 +401,7 @@ xfs_qm_qoffend_logitem_committed(
* Delete the qoff-start logitem from the AIL.
* xfs_trans_ail_delete() drops the AIL lock.
*/
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
xfs_trans_ail_delete(ailp, &qfs->qql_item, SHUTDOWN_LOG_IO_ERROR);
kmem_free(qfs->qql_item.li_lv_shadow);
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index ccf520f0b00d..a63f5083f497 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -347,27 +347,32 @@ xfs_corruption_error(
* values, and omit the stack trace unless the error level is tuned high.
*/
void
-xfs_verifier_error(
+xfs_buf_verifier_error(
struct xfs_buf *bp,
int error,
+ const char *name,
+ void *buf,
+ size_t bufsz,
xfs_failaddr_t failaddr)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
xfs_failaddr_t fa;
+ int sz;
fa = failaddr ? failaddr : __return_address;
__xfs_buf_ioerror(bp, error, fa);
- xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx",
+ xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx %s",
bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
- fa, bp->b_ops->name, bp->b_bn);
+ fa, bp->b_ops->name, bp->b_bn, name);
xfs_alert(mp, "Unmount and run xfs_repair");
if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
+ sz = min_t(size_t, XFS_CORRUPTION_DUMP_LEN, bufsz);
xfs_alert(mp, "First %d bytes of corrupted metadata buffer:",
- XFS_CORRUPTION_DUMP_LEN);
- xfs_hex_dump(xfs_buf_offset(bp, 0), XFS_CORRUPTION_DUMP_LEN);
+ sz);
+ xfs_hex_dump(buf, sz);
}
if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
@@ -375,6 +380,20 @@ xfs_verifier_error(
}
/*
+ * Warnings specifically for verifier errors. Differentiate CRC vs. invalid
+ * values, and omit the stack trace unless the error level is tuned high.
+ */
+void
+xfs_verifier_error(
+ struct xfs_buf *bp,
+ int error,
+ xfs_failaddr_t failaddr)
+{
+ return xfs_buf_verifier_error(bp, error, "", xfs_buf_offset(bp, 0),
+ XFS_CORRUPTION_DUMP_LEN, failaddr);
+}
+
+/*
* Warnings for inode corruption problems. Don't bother with the stack
* trace unless the error level is turned up high.
*/
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 7e728c5a46b8..ce391349e78b 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -26,6 +26,9 @@ extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
extern void xfs_corruption_error(const char *tag, int level,
struct xfs_mount *mp, void *p, const char *filename,
int linenum, xfs_failaddr_t failaddr);
+extern void xfs_buf_verifier_error(struct xfs_buf *bp, int error,
+ const char *name, void *buf, size_t bufsz,
+ xfs_failaddr_t failaddr);
extern void xfs_verifier_error(struct xfs_buf *bp, int error,
xfs_failaddr_t failaddr);
extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error,
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index fe1bfee35898..761f3189eff2 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -237,7 +237,7 @@ xfs_fs_nfs_commit_metadata(
if (!lsn)
return 0;
- return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
+ return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
}
const struct export_operations xfs_export_operations = {
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
index 77760dbf0242..13e3d1a69e76 100644
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -611,10 +611,9 @@ xfs_extent_busy_flush(
unsigned busy_gen)
{
DEFINE_WAIT (wait);
- int log_flushed = 0, error;
+ int error;
- trace_xfs_log_force(mp, 0, _THIS_IP_);
- error = _xfs_log_force(mp, XFS_LOG_SYNC, &log_flushed);
+ error = xfs_log_force(mp, XFS_LOG_SYNC);
if (error)
return;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 9ea08326f876..299aee4b7b0b 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -48,20 +48,6 @@
static const struct vm_operations_struct xfs_file_vm_ops;
-/*
- * Clear the specified ranges to zero through either the pagecache or DAX.
- * Holes and unwritten extents will be left as-is as they already are zeroed.
- */
-int
-xfs_zero_range(
- struct xfs_inode *ip,
- xfs_off_t pos,
- xfs_off_t count,
- bool *did_zero)
-{
- return iomap_zero_range(VFS_I(ip), pos, count, did_zero, &xfs_iomap_ops);
-}
-
int
xfs_update_prealloc_flags(
struct xfs_inode *ip,
@@ -122,7 +108,7 @@ xfs_dir_fsync(
if (!lsn)
return 0;
- return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
+ return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
}
STATIC int
@@ -182,7 +168,7 @@ xfs_file_fsync(
}
if (lsn) {
- error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
+ error = xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
ip->i_itemp->ili_fsync_fields = 0;
}
xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -301,31 +287,6 @@ xfs_file_read_iter(
}
/*
- * Zero any on disk space between the current EOF and the new, larger EOF.
- *
- * This handles the normal case of zeroing the remainder of the last block in
- * the file and the unusual case of zeroing blocks out beyond the size of the
- * file. This second case only happens with fixed size extents and when the
- * system crashes before the inode size was updated but after blocks were
- * allocated.
- *
- * Expects the iolock to be held exclusive, and will take the ilock internally.
- */
-int /* error (positive) */
-xfs_zero_eof(
- struct xfs_inode *ip,
- xfs_off_t offset, /* starting I/O offset */
- xfs_fsize_t isize, /* current inode size */
- bool *did_zeroing)
-{
- ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
- ASSERT(offset > isize);
-
- trace_xfs_zero_eof(ip, isize, offset - isize);
- return xfs_zero_range(ip, isize, offset - isize, did_zeroing);
-}
-
-/*
* Common pre-write limit and setup checks.
*
* Called with the iolocked held either shared and exclusive according to
@@ -344,6 +305,7 @@ xfs_file_aio_write_checks(
ssize_t error = 0;
size_t count = iov_iter_count(from);
bool drained_dio = false;
+ loff_t isize;
restart:
error = generic_write_checks(iocb, from);
@@ -380,7 +342,8 @@ restart:
* and hence be able to correctly determine if we need to run zeroing.
*/
spin_lock(&ip->i_flags_lock);
- if (iocb->ki_pos > i_size_read(inode)) {
+ isize = i_size_read(inode);
+ if (iocb->ki_pos > isize) {
spin_unlock(&ip->i_flags_lock);
if (!drained_dio) {
if (*iolock == XFS_IOLOCK_SHARED) {
@@ -401,7 +364,10 @@ restart:
drained_dio = true;
goto restart;
}
- error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), NULL);
+
+ trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
+ error = iomap_zero_range(inode, isize, iocb->ki_pos - isize,
+ NULL, &xfs_iomap_ops);
if (error)
return error;
} else
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 8b4545623e25..523792768080 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -217,7 +217,7 @@ xfs_growfs_data_private(
}
agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
- for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++)
+ for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++)
agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
error = xfs_bwrite(bp);
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index d53a316162d6..9a18f69f6e96 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -483,7 +483,28 @@ xfs_iget_cache_miss(
trace_xfs_iget_miss(ip);
- if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) {
+
+ /*
+ * If we are allocating a new inode, then check what was returned is
+ * actually a free, empty inode. If we are not allocating an inode,
+ * the check we didn't find a free inode.
+ */
+ if (flags & XFS_IGET_CREATE) {
+ if (VFS_I(ip)->i_mode != 0) {
+ xfs_warn(mp,
+"Corruption detected! Free inode 0x%llx not marked free on disk",
+ ino);
+ error = -EFSCORRUPTED;
+ goto out_destroy;
+ }
+ if (ip->i_d.di_nblocks != 0) {
+ xfs_warn(mp,
+"Corruption detected! Free inode 0x%llx has blocks allocated!",
+ ino);
+ error = -EFSCORRUPTED;
+ goto out_destroy;
+ }
+ } else if (VFS_I(ip)->i_mode == 0) {
error = -ENOENT;
goto out_destroy;
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 604ee384a00a..3e3aab3888fa 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1872,6 +1872,7 @@ xfs_inactive(
xfs_inode_t *ip)
{
struct xfs_mount *mp;
+ struct xfs_ifork *cow_ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
int error;
int truncate = 0;
@@ -1892,6 +1893,10 @@ xfs_inactive(
if (mp->m_flags & XFS_MOUNT_RDONLY)
return;
+ /* Try to clean out the cow blocks if there are any. */
+ if (xfs_is_reflink_inode(ip) && cow_ifp->if_bytes > 0)
+ xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
+
if (VFS_I(ip)->i_nlink != 0) {
/*
* force is true because we are evicting an inode from the
@@ -2470,6 +2475,10 @@ xfs_ifree(
ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */
ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
+
+ /* Don't attempt to replay owner changes for a deleted inode */
+ ip->i_itemp->ili_fields &= ~(XFS_ILOG_AOWNER|XFS_ILOG_DOWNER);
+
/*
* Bump the generation count so no one will be confused
* by reincarnations of this inode.
@@ -2497,7 +2506,7 @@ xfs_iunpin(
trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
/* Give the log a push to start the unpinning I/O */
- xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0);
+ xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0, NULL);
}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 3e8dc990d41c..132d8aa2afc4 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -443,10 +443,6 @@ enum xfs_prealloc_flags {
int xfs_update_prealloc_flags(struct xfs_inode *ip,
enum xfs_prealloc_flags flags);
-int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
- xfs_fsize_t isize, bool *did_zeroing);
-int xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count,
- bool *did_zero);
/* from xfs_iops.c */
extern void xfs_setup_inode(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index d5037f060d6f..34b91b789702 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -502,8 +502,8 @@ STATIC uint
xfs_inode_item_push(
struct xfs_log_item *lip,
struct list_head *buffer_list)
- __releases(&lip->li_ailp->xa_lock)
- __acquires(&lip->li_ailp->xa_lock)
+ __releases(&lip->li_ailp->ail_lock)
+ __acquires(&lip->li_ailp->ail_lock)
{
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
struct xfs_inode *ip = iip->ili_inode;
@@ -562,7 +562,7 @@ xfs_inode_item_push(
ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
- spin_unlock(&lip->li_ailp->xa_lock);
+ spin_unlock(&lip->li_ailp->ail_lock);
error = xfs_iflush(ip, &bp);
if (!error) {
@@ -571,7 +571,7 @@ xfs_inode_item_push(
xfs_buf_relse(bp);
}
- spin_lock(&lip->li_ailp->xa_lock);
+ spin_lock(&lip->li_ailp->ail_lock);
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_SHARED);
return rval;
@@ -579,9 +579,6 @@ out_unlock:
/*
* Unlock the inode associated with the inode log item.
- * Clear the fields of the inode and inode log item that
- * are specific to the current transaction. If the
- * hold flags is set, do not unlock the inode.
*/
STATIC void
xfs_inode_item_unlock(
@@ -637,10 +634,6 @@ xfs_inode_item_committed(
return lsn;
}
-/*
- * XXX rcc - this one really has to do something. Probably needs
- * to stamp in a new field in the incore inode.
- */
STATIC void
xfs_inode_item_committing(
struct xfs_log_item *lip,
@@ -759,7 +752,7 @@ xfs_iflush_done(
bool mlip_changed = false;
/* this is an opencoded batch version of xfs_trans_ail_delete */
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
list_for_each_entry(blip, &tmp, li_bio_list) {
if (INODE_ITEM(blip)->ili_logged &&
blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
@@ -770,15 +763,15 @@ xfs_iflush_done(
}
if (mlip_changed) {
- if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount))
- xlog_assign_tail_lsn_locked(ailp->xa_mount);
- if (list_empty(&ailp->xa_ail))
- wake_up_all(&ailp->xa_empty);
+ if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
+ xlog_assign_tail_lsn_locked(ailp->ail_mount);
+ if (list_empty(&ailp->ail_head))
+ wake_up_all(&ailp->ail_empty);
}
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
if (mlip_changed)
- xfs_log_space_wake(ailp->xa_mount);
+ xfs_log_space_wake(ailp->ail_mount);
}
/*
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 56475fcd76f2..e0307fbff911 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -46,6 +46,7 @@
#include <linux/security.h>
#include <linux/iomap.h>
#include <linux/slab.h>
+#include <linux/iversion.h>
/*
* Directories have different lock order w.r.t. mmap_sem compared to regular
@@ -874,7 +875,9 @@ xfs_setattr_size(
* truncate.
*/
if (newsize > oldsize) {
- error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
+ trace_xfs_zero_eof(ip, oldsize, newsize - oldsize);
+ error = iomap_zero_range(inode, oldsize, newsize - oldsize,
+ &did_zeroing, &xfs_iomap_ops);
} else {
error = iomap_truncate_page(inode, newsize, &did_zeroing,
&xfs_iomap_ops);
@@ -1052,11 +1055,21 @@ xfs_vn_update_time(
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
+ int log_flags = XFS_ILOG_TIMESTAMP;
struct xfs_trans *tp;
int error;
trace_xfs_update_time(ip);
+ if (inode->i_sb->s_flags & SB_LAZYTIME) {
+ if (!((flags & S_VERSION) &&
+ inode_maybe_inc_iversion(inode, false)))
+ return generic_update_time(inode, now, flags);
+
+ /* Capture the iversion update that just occurred */
+ log_flags |= XFS_ILOG_CORE;
+ }
+
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
if (error)
return error;
@@ -1070,7 +1083,7 @@ xfs_vn_update_time(
inode->i_atime = *now;
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
+ xfs_trans_log_inode(tp, ip, log_flags);
return xfs_trans_commit(tp);
}
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 3e5ba1ecc080..b9c9c848146b 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -869,7 +869,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
return 0;
}
- error = _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
+ error = xfs_log_force(mp, XFS_LOG_SYNC);
ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
#ifdef DEBUG
@@ -1149,7 +1149,7 @@ xlog_assign_tail_lsn_locked(
struct xfs_log_item *lip;
xfs_lsn_t tail_lsn;
- assert_spin_locked(&mp->m_ail->xa_lock);
+ assert_spin_locked(&mp->m_ail->ail_lock);
/*
* To make sure we always have a valid LSN for the log tail we keep
@@ -1172,9 +1172,9 @@ xlog_assign_tail_lsn(
{
xfs_lsn_t tail_lsn;
- spin_lock(&mp->m_ail->xa_lock);
+ spin_lock(&mp->m_ail->ail_lock);
tail_lsn = xlog_assign_tail_lsn_locked(mp);
- spin_unlock(&mp->m_ail->xa_lock);
+ spin_unlock(&mp->m_ail->ail_lock);
return tail_lsn;
}
@@ -3304,269 +3304,215 @@ xlog_state_switch_iclogs(
* not in the active nor dirty state.
*/
int
-_xfs_log_force(
+xfs_log_force(
struct xfs_mount *mp,
- uint flags,
- int *log_flushed)
+ uint flags)
{
struct xlog *log = mp->m_log;
struct xlog_in_core *iclog;
xfs_lsn_t lsn;
XFS_STATS_INC(mp, xs_log_force);
+ trace_xfs_log_force(mp, 0, _RET_IP_);
xlog_cil_force(log);
spin_lock(&log->l_icloglock);
-
iclog = log->l_iclog;
- if (iclog->ic_state & XLOG_STATE_IOERROR) {
- spin_unlock(&log->l_icloglock);
- return -EIO;
- }
+ if (iclog->ic_state & XLOG_STATE_IOERROR)
+ goto out_error;
- /* If the head iclog is not active nor dirty, we just attach
- * ourselves to the head and go to sleep.
- */
- if (iclog->ic_state == XLOG_STATE_ACTIVE ||
- iclog->ic_state == XLOG_STATE_DIRTY) {
+ if (iclog->ic_state == XLOG_STATE_DIRTY ||
+ (iclog->ic_state == XLOG_STATE_ACTIVE &&
+ atomic_read(&iclog->ic_refcnt) == 0 && iclog->ic_offset == 0)) {
/*
- * If the head is dirty or (active and empty), then
- * we need to look at the previous iclog. If the previous
- * iclog is active or dirty we are done. There is nothing
- * to sync out. Otherwise, we attach ourselves to the
+ * If the head is dirty or (active and empty), then we need to
+ * look at the previous iclog.
+ *
+ * If the previous iclog is active or dirty we are done. There
+ * is nothing to sync out. Otherwise, we attach ourselves to the
* previous iclog and go to sleep.
*/
- if (iclog->ic_state == XLOG_STATE_DIRTY ||
- (atomic_read(&iclog->ic_refcnt) == 0
- && iclog->ic_offset == 0)) {
- iclog = iclog->ic_prev;
- if (iclog->ic_state == XLOG_STATE_ACTIVE ||
- iclog->ic_state == XLOG_STATE_DIRTY)
- goto no_sleep;
- else
- goto maybe_sleep;
- } else {
- if (atomic_read(&iclog->ic_refcnt) == 0) {
- /* We are the only one with access to this
- * iclog. Flush it out now. There should
- * be a roundoff of zero to show that someone
- * has already taken care of the roundoff from
- * the previous sync.
- */
- atomic_inc(&iclog->ic_refcnt);
- lsn = be64_to_cpu(iclog->ic_header.h_lsn);
- xlog_state_switch_iclogs(log, iclog, 0);
- spin_unlock(&log->l_icloglock);
+ iclog = iclog->ic_prev;
+ if (iclog->ic_state == XLOG_STATE_ACTIVE ||
+ iclog->ic_state == XLOG_STATE_DIRTY)
+ goto out_unlock;
+ } else if (iclog->ic_state == XLOG_STATE_ACTIVE) {
+ if (atomic_read(&iclog->ic_refcnt) == 0) {
+ /*
+ * We are the only one with access to this iclog.
+ *
+ * Flush it out now. There should be a roundoff of zero
+ * to show that someone has already taken care of the
+ * roundoff from the previous sync.
+ */
+ atomic_inc(&iclog->ic_refcnt);
+ lsn = be64_to_cpu(iclog->ic_header.h_lsn);
+ xlog_state_switch_iclogs(log, iclog, 0);
+ spin_unlock(&log->l_icloglock);
- if (xlog_state_release_iclog(log, iclog))
- return -EIO;
-
- if (log_flushed)
- *log_flushed = 1;
- spin_lock(&log->l_icloglock);
- if (be64_to_cpu(iclog->ic_header.h_lsn) == lsn &&
- iclog->ic_state != XLOG_STATE_DIRTY)
- goto maybe_sleep;
- else
- goto no_sleep;
- } else {
- /* Someone else is writing to this iclog.
- * Use its call to flush out the data. However,
- * the other thread may not force out this LR,
- * so we mark it WANT_SYNC.
- */
- xlog_state_switch_iclogs(log, iclog, 0);
- goto maybe_sleep;
- }
- }
- }
+ if (xlog_state_release_iclog(log, iclog))
+ return -EIO;
- /* By the time we come around again, the iclog could've been filled
- * which would give it another lsn. If we have a new lsn, just
- * return because the relevant data has been flushed.
- */
-maybe_sleep:
- if (flags & XFS_LOG_SYNC) {
- /*
- * We must check if we're shutting down here, before
- * we wait, while we're holding the l_icloglock.
- * Then we check again after waking up, in case our
- * sleep was disturbed by a bad news.
- */
- if (iclog->ic_state & XLOG_STATE_IOERROR) {
- spin_unlock(&log->l_icloglock);
- return -EIO;
+ spin_lock(&log->l_icloglock);
+ if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn ||
+ iclog->ic_state == XLOG_STATE_DIRTY)
+ goto out_unlock;
+ } else {
+ /*
+ * Someone else is writing to this iclog.
+ *
+ * Use its call to flush out the data. However, the
+ * other thread may not force out this LR, so we mark
+ * it WANT_SYNC.
+ */
+ xlog_state_switch_iclogs(log, iclog, 0);
}
- XFS_STATS_INC(mp, xs_log_force_sleep);
- xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
+ } else {
/*
- * No need to grab the log lock here since we're
- * only deciding whether or not to return EIO
- * and the memory read should be atomic.
+ * If the head iclog is not active nor dirty, we just attach
+ * ourselves to the head and go to sleep if necessary.
*/
- if (iclog->ic_state & XLOG_STATE_IOERROR)
- return -EIO;
- } else {
-
-no_sleep:
- spin_unlock(&log->l_icloglock);
+ ;
}
+
+ if (!(flags & XFS_LOG_SYNC))
+ goto out_unlock;
+
+ if (iclog->ic_state & XLOG_STATE_IOERROR)
+ goto out_error;
+ XFS_STATS_INC(mp, xs_log_force_sleep);
+ xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
+ if (iclog->ic_state & XLOG_STATE_IOERROR)
+ return -EIO;
return 0;
-}
-/*
- * Wrapper for _xfs_log_force(), to be used when caller doesn't care
- * about errors or whether the log was flushed or not. This is the normal
- * interface to use when trying to unpin items or move the log forward.
- */
-void
-xfs_log_force(
- xfs_mount_t *mp,
- uint flags)
-{
- trace_xfs_log_force(mp, 0, _RET_IP_);
- _xfs_log_force(mp, flags, NULL);
+out_unlock:
+ spin_unlock(&log->l_icloglock);
+ return 0;
+out_error:
+ spin_unlock(&log->l_icloglock);
+ return -EIO;
}
-/*
- * Force the in-core log to disk for a specific LSN.
- *
- * Find in-core log with lsn.
- * If it is in the DIRTY state, just return.
- * If it is in the ACTIVE state, move the in-core log into the WANT_SYNC
- * state and go to sleep or return.
- * If it is in any other state, go to sleep or return.
- *
- * Synchronous forces are implemented with a signal variable. All callers
- * to force a given lsn to disk will wait on a the sv attached to the
- * specific in-core log. When given in-core log finally completes its
- * write to disk, that thread will wake up all threads waiting on the
- * sv.
- */
-int
-_xfs_log_force_lsn(
+static int
+__xfs_log_force_lsn(
struct xfs_mount *mp,
xfs_lsn_t lsn,
uint flags,
- int *log_flushed)
+ int *log_flushed,
+ bool already_slept)
{
struct xlog *log = mp->m_log;
struct xlog_in_core *iclog;
- int already_slept = 0;
- ASSERT(lsn != 0);
-
- XFS_STATS_INC(mp, xs_log_force);
-
- lsn = xlog_cil_force_lsn(log, lsn);
- if (lsn == NULLCOMMITLSN)
- return 0;
-
-try_again:
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
- if (iclog->ic_state & XLOG_STATE_IOERROR) {
- spin_unlock(&log->l_icloglock);
- return -EIO;
- }
+ if (iclog->ic_state & XLOG_STATE_IOERROR)
+ goto out_error;
- do {
- if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
- iclog = iclog->ic_next;
- continue;
- }
+ while (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
+ iclog = iclog->ic_next;
+ if (iclog == log->l_iclog)
+ goto out_unlock;
+ }
- if (iclog->ic_state == XLOG_STATE_DIRTY) {
- spin_unlock(&log->l_icloglock);
- return 0;
- }
+ if (iclog->ic_state == XLOG_STATE_DIRTY)
+ goto out_unlock;
- if (iclog->ic_state == XLOG_STATE_ACTIVE) {
- /*
- * We sleep here if we haven't already slept (e.g.
- * this is the first time we've looked at the correct
- * iclog buf) and the buffer before us is going to
- * be sync'ed. The reason for this is that if we
- * are doing sync transactions here, by waiting for
- * the previous I/O to complete, we can allow a few
- * more transactions into this iclog before we close
- * it down.
- *
- * Otherwise, we mark the buffer WANT_SYNC, and bump
- * up the refcnt so we can release the log (which
- * drops the ref count). The state switch keeps new
- * transaction commits from using this buffer. When
- * the current commits finish writing into the buffer,
- * the refcount will drop to zero and the buffer will
- * go out then.
- */
- if (!already_slept &&
- (iclog->ic_prev->ic_state &
- (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
- ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
+ if (iclog->ic_state == XLOG_STATE_ACTIVE) {
+ /*
+ * We sleep here if we haven't already slept (e.g. this is the
+ * first time we've looked at the correct iclog buf) and the
+ * buffer before us is going to be sync'ed. The reason for this
+ * is that if we are doing sync transactions here, by waiting
+ * for the previous I/O to complete, we can allow a few more
+ * transactions into this iclog before we close it down.
+ *
+ * Otherwise, we mark the buffer WANT_SYNC, and bump up the
+ * refcnt so we can release the log (which drops the ref count).
+ * The state switch keeps new transaction commits from using
+ * this buffer. When the current commits finish writing into
+ * the buffer, the refcount will drop to zero and the buffer
+ * will go out then.
+ */
+ if (!already_slept &&
+ (iclog->ic_prev->ic_state &
+ (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
+ ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
- XFS_STATS_INC(mp, xs_log_force_sleep);
+ XFS_STATS_INC(mp, xs_log_force_sleep);
- xlog_wait(&iclog->ic_prev->ic_write_wait,
- &log->l_icloglock);
- already_slept = 1;
- goto try_again;
- }
- atomic_inc(&iclog->ic_refcnt);
- xlog_state_switch_iclogs(log, iclog, 0);
- spin_unlock(&log->l_icloglock);
- if (xlog_state_release_iclog(log, iclog))
- return -EIO;
- if (log_flushed)
- *log_flushed = 1;
- spin_lock(&log->l_icloglock);
+ xlog_wait(&iclog->ic_prev->ic_write_wait,
+ &log->l_icloglock);
+ return -EAGAIN;
}
+ atomic_inc(&iclog->ic_refcnt);
+ xlog_state_switch_iclogs(log, iclog, 0);
+ spin_unlock(&log->l_icloglock);
+ if (xlog_state_release_iclog(log, iclog))
+ return -EIO;
+ if (log_flushed)
+ *log_flushed = 1;
+ spin_lock(&log->l_icloglock);
+ }
- if ((flags & XFS_LOG_SYNC) && /* sleep */
- !(iclog->ic_state &
- (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
- /*
- * Don't wait on completion if we know that we've
- * gotten a log write error.
- */
- if (iclog->ic_state & XLOG_STATE_IOERROR) {
- spin_unlock(&log->l_icloglock);
- return -EIO;
- }
- XFS_STATS_INC(mp, xs_log_force_sleep);
- xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
- /*
- * No need to grab the log lock here since we're
- * only deciding whether or not to return EIO
- * and the memory read should be atomic.
- */
- if (iclog->ic_state & XLOG_STATE_IOERROR)
- return -EIO;
- } else { /* just return */
- spin_unlock(&log->l_icloglock);
- }
+ if (!(flags & XFS_LOG_SYNC) ||
+ (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY)))
+ goto out_unlock;
- return 0;
- } while (iclog != log->l_iclog);
+ if (iclog->ic_state & XLOG_STATE_IOERROR)
+ goto out_error;
+
+ XFS_STATS_INC(mp, xs_log_force_sleep);
+ xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
+ if (iclog->ic_state & XLOG_STATE_IOERROR)
+ return -EIO;
+ return 0;
+out_unlock:
spin_unlock(&log->l_icloglock);
return 0;
+out_error:
+ spin_unlock(&log->l_icloglock);
+ return -EIO;
}
/*
- * Wrapper for _xfs_log_force_lsn(), to be used when caller doesn't care
- * about errors or whether the log was flushed or not. This is the normal
- * interface to use when trying to unpin items or move the log forward.
+ * Force the in-core log to disk for a specific LSN.
+ *
+ * Find in-core log with lsn.
+ * If it is in the DIRTY state, just return.
+ * If it is in the ACTIVE state, move the in-core log into the WANT_SYNC
+ * state and go to sleep or return.
+ * If it is in any other state, go to sleep or return.
+ *
+ * Synchronous forces are implemented with a wait queue. All callers trying
+ * to force a given lsn to disk must wait on the queue attached to the
+ * specific in-core log. When given in-core log finally completes its write
+ * to disk, that thread will wake up all threads waiting on the queue.
*/
-void
+int
xfs_log_force_lsn(
- xfs_mount_t *mp,
- xfs_lsn_t lsn,
- uint flags)
+ struct xfs_mount *mp,
+ xfs_lsn_t lsn,
+ uint flags,
+ int *log_flushed)
{
+ int ret;
+ ASSERT(lsn != 0);
+
+ XFS_STATS_INC(mp, xs_log_force);
trace_xfs_log_force(mp, lsn, _RET_IP_);
- _xfs_log_force_lsn(mp, lsn, flags, NULL);
+
+ lsn = xlog_cil_force_lsn(mp->m_log, lsn);
+ if (lsn == NULLCOMMITLSN)
+ return 0;
+
+ ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, false);
+ if (ret == -EAGAIN)
+ ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, true);
+ return ret;
}
/*
@@ -4035,7 +3981,7 @@ xfs_log_force_umount(
* to guarantee this.
*/
if (!logerror)
- _xfs_log_force(mp, XFS_LOG_SYNC, NULL);
+ xfs_log_force(mp, XFS_LOG_SYNC);
/*
* mark the filesystem and the as in a shutdown state and wake
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index bf212772595c..7e2d62922a16 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -129,18 +129,9 @@ xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
struct xlog_ticket *ticket,
struct xlog_in_core **iclog,
bool regrant);
-int _xfs_log_force(struct xfs_mount *mp,
- uint flags,
- int *log_forced);
-void xfs_log_force(struct xfs_mount *mp,
- uint flags);
-int _xfs_log_force_lsn(struct xfs_mount *mp,
- xfs_lsn_t lsn,
- uint flags,
- int *log_forced);
-void xfs_log_force_lsn(struct xfs_mount *mp,
- xfs_lsn_t lsn,
- uint flags);
+int xfs_log_force(struct xfs_mount *mp, uint flags);
+int xfs_log_force_lsn(struct xfs_mount *mp, xfs_lsn_t lsn, uint flags,
+ int *log_forced);
int xfs_log_mount(struct xfs_mount *mp,
struct xfs_buftarg *log_target,
xfs_daddr_t start_block,
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 43aa42a3a5d3..cb376ac8a595 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -202,7 +202,7 @@ xlog_cil_alloc_shadow_bufs(
*/
kmem_free(lip->li_lv_shadow);
- lv = kmem_alloc(buf_size, KM_SLEEP|KM_NOFS);
+ lv = kmem_alloc_large(buf_size, KM_SLEEP | KM_NOFS);
memset(lv, 0, xlog_cil_iovec_space(niovecs));
lv->lv_item = lip;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 00240c9ee72e..2b2383f1895e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3173,13 +3173,6 @@ xlog_recover_inode_pass2(
/* recover the log dinode inode into the on disk inode */
xfs_log_dinode_to_disk(ldip, dip);
- /* the rest is in on-disk format */
- if (item->ri_buf[1].i_len > isize) {
- memcpy((char *)dip + isize,
- item->ri_buf[1].i_addr + isize,
- item->ri_buf[1].i_len - isize);
- }
-
fields = in_f->ilf_fields;
if (fields & XFS_ILOG_DEV)
xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
@@ -3252,7 +3245,9 @@ xlog_recover_inode_pass2(
}
out_owner_change:
- if (in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER))
+ /* Recover the swapext owner change unless inode has been deleted */
+ if ((in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) &&
+ (dip->di_mode != 0))
error = xfs_recover_inode_owner_change(mp, dip, in_f,
buffer_list);
/* re-generate the checksum. */
@@ -3434,7 +3429,7 @@ xlog_recover_efi_pass2(
}
atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
- spin_lock(&log->l_ailp->xa_lock);
+ spin_lock(&log->l_ailp->ail_lock);
/*
* The EFI has two references. One for the EFD and one for EFI to ensure
* it makes it into the AIL. Insert the EFI into the AIL directly and
@@ -3477,7 +3472,7 @@ xlog_recover_efd_pass2(
* Search for the EFI with the id in the EFD format structure in the
* AIL.
*/
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
while (lip != NULL) {
if (lip->li_type == XFS_LI_EFI) {
@@ -3487,9 +3482,9 @@ xlog_recover_efd_pass2(
* Drop the EFD reference to the EFI. This
* removes the EFI from the AIL and frees it.
*/
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
xfs_efi_release(efip);
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
break;
}
}
@@ -3497,7 +3492,7 @@ xlog_recover_efd_pass2(
}
xfs_trans_ail_cursor_done(&cur);
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
return 0;
}
@@ -3530,7 +3525,7 @@ xlog_recover_rui_pass2(
}
atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
- spin_lock(&log->l_ailp->xa_lock);
+ spin_lock(&log->l_ailp->ail_lock);
/*
* The RUI has two references. One for the RUD and one for RUI to ensure
* it makes it into the AIL. Insert the RUI into the AIL directly and
@@ -3570,7 +3565,7 @@ xlog_recover_rud_pass2(
* Search for the RUI with the id in the RUD format structure in the
* AIL.
*/
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
while (lip != NULL) {
if (lip->li_type == XFS_LI_RUI) {
@@ -3580,9 +3575,9 @@ xlog_recover_rud_pass2(
* Drop the RUD reference to the RUI. This
* removes the RUI from the AIL and frees it.
*/
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
xfs_rui_release(ruip);
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
break;
}
}
@@ -3590,7 +3585,7 @@ xlog_recover_rud_pass2(
}
xfs_trans_ail_cursor_done(&cur);
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
return 0;
}
@@ -3646,7 +3641,7 @@ xlog_recover_cui_pass2(
}
atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents);
- spin_lock(&log->l_ailp->xa_lock);
+ spin_lock(&log->l_ailp->ail_lock);
/*
* The CUI has two references. One for the CUD and one for CUI to ensure
* it makes it into the AIL. Insert the CUI into the AIL directly and
@@ -3687,7 +3682,7 @@ xlog_recover_cud_pass2(
* Search for the CUI with the id in the CUD format structure in the
* AIL.
*/
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
while (lip != NULL) {
if (lip->li_type == XFS_LI_CUI) {
@@ -3697,9 +3692,9 @@ xlog_recover_cud_pass2(
* Drop the CUD reference to the CUI. This
* removes the CUI from the AIL and frees it.
*/
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
xfs_cui_release(cuip);
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
break;
}
}
@@ -3707,7 +3702,7 @@ xlog_recover_cud_pass2(
}
xfs_trans_ail_cursor_done(&cur);
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
return 0;
}
@@ -3765,7 +3760,7 @@ xlog_recover_bui_pass2(
}
atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
- spin_lock(&log->l_ailp->xa_lock);
+ spin_lock(&log->l_ailp->ail_lock);
/*
* The RUI has two references. One for the RUD and one for RUI to ensure
* it makes it into the AIL. Insert the RUI into the AIL directly and
@@ -3806,7 +3801,7 @@ xlog_recover_bud_pass2(
* Search for the BUI with the id in the BUD format structure in the
* AIL.
*/
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
while (lip != NULL) {
if (lip->li_type == XFS_LI_BUI) {
@@ -3816,9 +3811,9 @@ xlog_recover_bud_pass2(
* Drop the BUD reference to the BUI. This
* removes the BUI from the AIL and frees it.
*/
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
xfs_bui_release(buip);
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
break;
}
}
@@ -3826,7 +3821,7 @@ xlog_recover_bud_pass2(
}
xfs_trans_ail_cursor_done(&cur);
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
return 0;
}
@@ -4659,9 +4654,9 @@ xlog_recover_process_efi(
if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
return 0;
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
error = xfs_efi_recover(mp, efip);
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
return error;
}
@@ -4677,9 +4672,9 @@ xlog_recover_cancel_efi(
efip = container_of(lip, struct xfs_efi_log_item, efi_item);
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
xfs_efi_release(efip);
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
}
/* Recover the RUI if necessary. */
@@ -4699,9 +4694,9 @@ xlog_recover_process_rui(
if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags))
return 0;
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
error = xfs_rui_recover(mp, ruip);
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
return error;
}
@@ -4717,9 +4712,9 @@ xlog_recover_cancel_rui(
ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
xfs_rui_release(ruip);
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
}
/* Recover the CUI if necessary. */
@@ -4740,9 +4735,9 @@ xlog_recover_process_cui(
if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags))
return 0;
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
error = xfs_cui_recover(mp, cuip, dfops);
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
return error;
}
@@ -4758,9 +4753,9 @@ xlog_recover_cancel_cui(
cuip = container_of(lip, struct xfs_cui_log_item, cui_item);
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
xfs_cui_release(cuip);
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
}
/* Recover the BUI if necessary. */
@@ -4781,9 +4776,9 @@ xlog_recover_process_bui(
if (test_bit(XFS_BUI_RECOVERED, &buip->bui_flags))
return 0;
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
error = xfs_bui_recover(mp, buip, dfops);
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
return error;
}
@@ -4799,9 +4794,9 @@ xlog_recover_cancel_bui(
buip = container_of(lip, struct xfs_bui_log_item, bui_item);
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
xfs_bui_release(buip);
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
}
/* Is this log item a deferred action intent? */
@@ -4889,7 +4884,7 @@ xlog_recover_process_intents(
#endif
ailp = log->l_ailp;
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
#if defined(DEBUG) || defined(XFS_WARN)
last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
@@ -4943,7 +4938,7 @@ xlog_recover_process_intents(
}
out:
xfs_trans_ail_cursor_done(&cur);
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
if (error)
xfs_defer_cancel(&dfops);
else
@@ -4966,7 +4961,7 @@ xlog_recover_cancel_intents(
struct xfs_ail *ailp;
ailp = log->l_ailp;
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
while (lip != NULL) {
/*
@@ -5000,7 +4995,7 @@ xlog_recover_cancel_intents(
}
xfs_trans_ail_cursor_done(&cur);
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
return error;
}
@@ -5127,16 +5122,9 @@ xlog_recover_process_iunlinks(
xfs_agino_t agino;
int bucket;
int error;
- uint mp_dmevmask;
mp = log->l_mp;
- /*
- * Prevent any DMAPI event from being sent while in this function.
- */
- mp_dmevmask = mp->m_dmevmask;
- mp->m_dmevmask = 0;
-
for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
/*
* Find the agi for this ag.
@@ -5172,8 +5160,6 @@ xlog_recover_process_iunlinks(
}
xfs_buf_rele(agibp);
}
-
- mp->m_dmevmask = mp_dmevmask;
}
STATIC int
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 98fd41cbb9e1..a901b86772f8 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -803,8 +803,6 @@ xfs_mountfs(
get_unaligned_be16(&sbp->sb_uuid.b[4]);
mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]);
- mp->m_dmevmask = 0; /* not persistent; set after each mount */
-
error = xfs_da_mount(mp);
if (error) {
xfs_warn(mp, "Failed dir/attr init: %d", error);
@@ -819,8 +817,6 @@ xfs_mountfs(
/*
* Allocate and initialize the per-ag data.
*/
- spin_lock_init(&mp->m_perag_lock);
- INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
if (error) {
xfs_warn(mp, "Failed per-ag init: %d", error);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index e0792d036be2..10b90bbc5162 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -138,7 +138,6 @@ typedef struct xfs_mount {
spinlock_t m_perag_lock; /* lock for m_perag_tree */
struct mutex m_growlock; /* growfs mutex */
int m_fixedfsid[2]; /* unchanged for life of FS */
- uint m_dmevmask; /* DMI events for this FS */
uint64_t m_flags; /* global mount flags */
bool m_inotbt_nores; /* no per-AG finobt resv. */
int m_ialloc_inos; /* inodes in inode allocation */
@@ -326,8 +325,9 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
/* per-AG block reservation data structures*/
enum xfs_ag_resv_type {
XFS_AG_RESV_NONE = 0,
- XFS_AG_RESV_METADATA,
XFS_AG_RESV_AGFL,
+ XFS_AG_RESV_METADATA,
+ XFS_AG_RESV_RMAPBT,
};
struct xfs_ag_resv {
@@ -353,6 +353,7 @@ typedef struct xfs_perag {
char pagi_inodeok; /* The agi is ok for inodes */
uint8_t pagf_levels[XFS_BTNUM_AGF];
/* # of levels in bno & cnt btree */
+ bool pagf_agflreset; /* agfl requires reset before use */
uint32_t pagf_flcount; /* count of blocks in freelist */
xfs_extlen_t pagf_freeblks; /* total free blocks */
xfs_extlen_t pagf_longest; /* longest free space */
@@ -391,8 +392,8 @@ typedef struct xfs_perag {
/* Blocks reserved for all kinds of metadata. */
struct xfs_ag_resv pag_meta_resv;
- /* Blocks reserved for just AGFL-based metadata. */
- struct xfs_ag_resv pag_agfl_resv;
+ /* Blocks reserved for the reverse mapping btree. */
+ struct xfs_ag_resv pag_rmapbt_resv;
/* reference count */
uint8_t pagf_refcount_level;
@@ -406,8 +407,8 @@ xfs_perag_resv(
switch (type) {
case XFS_AG_RESV_METADATA:
return &pag->pag_meta_resv;
- case XFS_AG_RESV_AGFL:
- return &pag->pag_agfl_resv;
+ case XFS_AG_RESV_RMAPBT:
+ return &pag->pag_rmapbt_resv;
default:
return NULL;
}
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 270246943a06..cdbd342a5249 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -394,7 +394,7 @@ xfs_reflink_allocate_cow(
retry:
ASSERT(xfs_is_reflink_inode(ip));
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
/*
* Even if the extent is not shared we might have a preallocation for
@@ -668,7 +668,7 @@ xfs_reflink_cancel_cow_range(
/* Start a rolling transaction to remove the mappings */
error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
- 0, 0, 0, &tp);
+ 0, 0, XFS_TRANS_NOFS, &tp);
if (error)
goto out;
@@ -741,7 +741,7 @@ xfs_reflink_end_cow(
(unsigned int)(end_fsb - offset_fsb),
XFS_DATA_FORK);
error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
- resblks, 0, XFS_TRANS_RESERVE, &tp);
+ resblks, 0, XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp);
if (error)
goto out;
@@ -762,10 +762,8 @@ xfs_reflink_end_cow(
xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
/* Extent delete may have bumped ext forward */
- if (!del.br_blockcount) {
- xfs_iext_prev(ifp, &icur);
- goto next_extent;
- }
+ if (!del.br_blockcount)
+ goto prev_extent;
ASSERT(!isnullstartblock(got.br_startblock));
@@ -774,10 +772,8 @@ xfs_reflink_end_cow(
* speculatively preallocated CoW extents that have been
* allocated but have not yet been involved in a write.
*/
- if (got.br_state == XFS_EXT_UNWRITTEN) {
- xfs_iext_prev(ifp, &icur);
- goto next_extent;
- }
+ if (got.br_state == XFS_EXT_UNWRITTEN)
+ goto prev_extent;
/* Unmap the old blocks in the data fork. */
xfs_defer_init(&dfops, &firstfsb);
@@ -816,9 +812,12 @@ xfs_reflink_end_cow(
error = xfs_defer_finish(&tp, &dfops);
if (error)
goto out_defer;
-next_extent:
if (!xfs_iext_get_extent(ifp, &icur, &got))
break;
+ continue;
+prev_extent:
+ if (!xfs_iext_prev_extent(ifp, &icur, &got))
+ break;
}
error = xfs_trans_commit(tp);
@@ -1061,7 +1060,7 @@ xfs_reflink_ag_has_free_space(
return 0;
pag = xfs_perag_get(mp, agno);
- if (xfs_ag_resv_critical(pag, XFS_AG_RESV_AGFL) ||
+ if (xfs_ag_resv_critical(pag, XFS_AG_RESV_RMAPBT) ||
xfs_ag_resv_critical(pag, XFS_AG_RESV_METADATA))
error = -ENOSPC;
xfs_perag_put(pag);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 93588ea3d3d2..612c1d5348b3 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -972,7 +972,6 @@ xfs_fs_destroy_inode(
struct inode *inode)
{
struct xfs_inode *ip = XFS_I(inode);
- int error;
trace_xfs_destroy_inode(ip);
@@ -980,14 +979,6 @@ xfs_fs_destroy_inode(
XFS_STATS_INC(ip->i_mount, vn_rele);
XFS_STATS_INC(ip->i_mount, vn_remove);
- if (xfs_is_reflink_inode(ip)) {
- error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
- if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount))
- xfs_warn(ip->i_mount,
-"Error %d while evicting CoW blocks for inode %llu.",
- error, ip->i_ino);
- }
-
xfs_inactive(ip);
ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
@@ -1009,6 +1000,28 @@ xfs_fs_destroy_inode(
xfs_inode_set_reclaim_tag(ip);
}
+static void
+xfs_fs_dirty_inode(
+ struct inode *inode,
+ int flag)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+
+ if (!(inode->i_sb->s_flags & SB_LAZYTIME))
+ return;
+ if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))
+ return;
+
+ if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
+ return;
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
+ xfs_trans_commit(tp);
+}
+
/*
* Slab object creation initialisation for the XFS inode.
* This covers only the idempotent fields in the XFS inode;
@@ -1566,29 +1579,48 @@ xfs_destroy_percpu_counters(
percpu_counter_destroy(&mp->m_fdblocks);
}
-STATIC int
-xfs_fs_fill_super(
- struct super_block *sb,
- void *data,
- int silent)
+static struct xfs_mount *
+xfs_mount_alloc(
+ struct super_block *sb)
{
- struct inode *root;
- struct xfs_mount *mp = NULL;
- int flags = 0, error = -ENOMEM;
+ struct xfs_mount *mp;
mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
if (!mp)
- goto out;
+ return NULL;
+ mp->m_super = sb;
spin_lock_init(&mp->m_sb_lock);
+ spin_lock_init(&mp->m_agirotor_lock);
+ INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
+ spin_lock_init(&mp->m_perag_lock);
mutex_init(&mp->m_growlock);
atomic_set(&mp->m_active_trans, 0);
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);
mp->m_kobj.kobject.kset = xfs_kset;
+ return mp;
+}
- mp->m_super = sb;
+
+STATIC int
+xfs_fs_fill_super(
+ struct super_block *sb,
+ void *data,
+ int silent)
+{
+ struct inode *root;
+ struct xfs_mount *mp = NULL;
+ int flags = 0, error = -ENOMEM;
+
+ /*
+ * allocate mp and do all low-level struct initializations before we
+ * attach it to the super
+ */
+ mp = xfs_mount_alloc(sb);
+ if (!mp)
+ goto out;
sb->s_fs_info = mp;
error = xfs_parseargs(mp, (char *)data);
@@ -1789,6 +1821,7 @@ xfs_fs_free_cached_objects(
static const struct super_operations xfs_super_operations = {
.alloc_inode = xfs_fs_alloc_inode,
.destroy_inode = xfs_fs_destroy_inode,
+ .dirty_inode = xfs_fs_dirty_inode,
.drop_inode = xfs_fs_drop_inode,
.put_super = xfs_fs_put_super,
.sync_fs = xfs_fs_sync_fs,
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 945de08af7ba..a982c0b623d0 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1477,7 +1477,7 @@ TRACE_EVENT(xfs_extent_busy_trim,
__entry->tlen)
);
-TRACE_EVENT(xfs_agf,
+DECLARE_EVENT_CLASS(xfs_agf_class,
TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
unsigned long caller_ip),
TP_ARGS(mp, agf, flags, caller_ip),
@@ -1533,6 +1533,13 @@ TRACE_EVENT(xfs_agf,
__entry->longest,
(void *)__entry->caller_ip)
);
+#define DEFINE_AGF_EVENT(name) \
+DEFINE_EVENT(xfs_agf_class, name, \
+ TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags, \
+ unsigned long caller_ip), \
+ TP_ARGS(mp, agf, flags, caller_ip))
+DEFINE_AGF_EVENT(xfs_agf);
+DEFINE_AGF_EVENT(xfs_agfl_reset);
TRACE_EVENT(xfs_free_extent,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 86f92df32c42..d6d8f9d129a7 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -119,8 +119,11 @@ xfs_trans_dup(
/* We gave our writer reference to the new transaction */
tp->t_flags |= XFS_TRANS_NO_WRITECOUNT;
ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
+
+ ASSERT(tp->t_blk_res >= tp->t_blk_res_used);
ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
tp->t_blk_res = tp->t_blk_res_used;
+
ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
tp->t_rtx_res = tp->t_rtx_res_used;
ntp->t_pflags = tp->t_pflags;
@@ -344,13 +347,14 @@ xfs_trans_mod_sb(
break;
case XFS_TRANS_SB_FDBLOCKS:
/*
- * Track the number of blocks allocated in the
- * transaction. Make sure it does not exceed the
- * number reserved.
+ * Track the number of blocks allocated in the transaction.
+ * Make sure it does not exceed the number reserved. If so,
+ * shutdown as this can lead to accounting inconsistency.
*/
if (delta < 0) {
tp->t_blk_res_used += (uint)-delta;
- ASSERT(tp->t_blk_res_used <= tp->t_blk_res);
+ if (tp->t_blk_res_used > tp->t_blk_res)
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
}
tp->t_fdblocks_delta += delta;
if (xfs_sb_version_haslazysbcount(&mp->m_sb))
@@ -803,8 +807,8 @@ xfs_log_item_batch_insert(
{
int i;
- spin_lock(&ailp->xa_lock);
- /* xfs_trans_ail_update_bulk drops ailp->xa_lock */
+ spin_lock(&ailp->ail_lock);
+ /* xfs_trans_ail_update_bulk drops ailp->ail_lock */
xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
for (i = 0; i < nr_items; i++) {
@@ -847,9 +851,9 @@ xfs_trans_committed_bulk(
struct xfs_ail_cursor cur;
int i = 0;
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
/* unpin all the log items */
for (lv = log_vector; lv; lv = lv->lv_next ) {
@@ -869,7 +873,7 @@ xfs_trans_committed_bulk(
* object into the AIL as we are in a shutdown situation.
*/
if (aborted) {
- ASSERT(XFS_FORCED_SHUTDOWN(ailp->xa_mount));
+ ASSERT(XFS_FORCED_SHUTDOWN(ailp->ail_mount));
lip->li_ops->iop_unpin(lip, 1);
continue;
}
@@ -883,11 +887,11 @@ xfs_trans_committed_bulk(
* not affect the AIL cursor the bulk insert path is
* using.
*/
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
xfs_trans_ail_update(ailp, lip, item_lsn);
else
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
lip->li_ops->iop_unpin(lip, 0);
continue;
}
@@ -905,9 +909,9 @@ xfs_trans_committed_bulk(
if (i)
xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
xfs_trans_ail_cursor_done(&cur);
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
}
/*
@@ -966,7 +970,7 @@ __xfs_trans_commit(
* log out now and wait for it.
*/
if (sync) {
- error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);
+ error = xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);
XFS_STATS_INC(mp, xs_trans_sync);
} else {
XFS_STATS_INC(mp, xs_trans_async);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index cef89f7127d3..d4a2445215e6 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -40,7 +40,7 @@ xfs_ail_check(
{
xfs_log_item_t *prev_lip;
- if (list_empty(&ailp->xa_ail))
+ if (list_empty(&ailp->ail_head))
return;
/*
@@ -48,11 +48,11 @@ xfs_ail_check(
*/
ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
- if (&prev_lip->li_ail != &ailp->xa_ail)
+ if (&prev_lip->li_ail != &ailp->ail_head)
ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
- if (&prev_lip->li_ail != &ailp->xa_ail)
+ if (&prev_lip->li_ail != &ailp->ail_head)
ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
@@ -69,10 +69,10 @@ static xfs_log_item_t *
xfs_ail_max(
struct xfs_ail *ailp)
{
- if (list_empty(&ailp->xa_ail))
+ if (list_empty(&ailp->ail_head))
return NULL;
- return list_entry(ailp->xa_ail.prev, xfs_log_item_t, li_ail);
+ return list_entry(ailp->ail_head.prev, xfs_log_item_t, li_ail);
}
/*
@@ -84,7 +84,7 @@ xfs_ail_next(
struct xfs_ail *ailp,
xfs_log_item_t *lip)
{
- if (lip->li_ail.next == &ailp->xa_ail)
+ if (lip->li_ail.next == &ailp->ail_head)
return NULL;
return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
@@ -105,11 +105,11 @@ xfs_ail_min_lsn(
xfs_lsn_t lsn = 0;
xfs_log_item_t *lip;
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
lip = xfs_ail_min(ailp);
if (lip)
lsn = lip->li_lsn;
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
return lsn;
}
@@ -124,11 +124,11 @@ xfs_ail_max_lsn(
xfs_lsn_t lsn = 0;
xfs_log_item_t *lip;
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
lip = xfs_ail_max(ailp);
if (lip)
lsn = lip->li_lsn;
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
return lsn;
}
@@ -146,7 +146,7 @@ xfs_trans_ail_cursor_init(
struct xfs_ail_cursor *cur)
{
cur->item = NULL;
- list_add_tail(&cur->list, &ailp->xa_cursors);
+ list_add_tail(&cur->list, &ailp->ail_cursors);
}
/*
@@ -194,7 +194,7 @@ xfs_trans_ail_cursor_clear(
{
struct xfs_ail_cursor *cur;
- list_for_each_entry(cur, &ailp->xa_cursors, list) {
+ list_for_each_entry(cur, &ailp->ail_cursors, list) {
if (cur->item == lip)
cur->item = (struct xfs_log_item *)
((uintptr_t)cur->item | 1);
@@ -222,7 +222,7 @@ xfs_trans_ail_cursor_first(
goto out;
}
- list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
+ list_for_each_entry(lip, &ailp->ail_head, li_ail) {
if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0)
goto out;
}
@@ -241,7 +241,7 @@ __xfs_trans_ail_cursor_last(
{
xfs_log_item_t *lip;
- list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) {
+ list_for_each_entry_reverse(lip, &ailp->ail_head, li_ail) {
if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
return lip;
}
@@ -310,7 +310,7 @@ xfs_ail_splice(
if (lip)
list_splice(list, &lip->li_ail);
else
- list_splice(list, &ailp->xa_ail);
+ list_splice(list, &ailp->ail_head);
}
/*
@@ -335,17 +335,17 @@ xfsaild_push_item(
* If log item pinning is enabled, skip the push and track the item as
* pinned. This can help induce head-behind-tail conditions.
*/
- if (XFS_TEST_ERROR(false, ailp->xa_mount, XFS_ERRTAG_LOG_ITEM_PIN))
+ if (XFS_TEST_ERROR(false, ailp->ail_mount, XFS_ERRTAG_LOG_ITEM_PIN))
return XFS_ITEM_PINNED;
- return lip->li_ops->iop_push(lip, &ailp->xa_buf_list);
+ return lip->li_ops->iop_push(lip, &ailp->ail_buf_list);
}
static long
xfsaild_push(
struct xfs_ail *ailp)
{
- xfs_mount_t *mp = ailp->xa_mount;
+ xfs_mount_t *mp = ailp->ail_mount;
struct xfs_ail_cursor cur;
xfs_log_item_t *lip;
xfs_lsn_t lsn;
@@ -360,30 +360,30 @@ xfsaild_push(
* buffers the last time we ran, force the log first and wait for it
* before pushing again.
*/
- if (ailp->xa_log_flush && ailp->xa_last_pushed_lsn == 0 &&
- (!list_empty_careful(&ailp->xa_buf_list) ||
+ if (ailp->ail_log_flush && ailp->ail_last_pushed_lsn == 0 &&
+ (!list_empty_careful(&ailp->ail_buf_list) ||
xfs_ail_min_lsn(ailp))) {
- ailp->xa_log_flush = 0;
+ ailp->ail_log_flush = 0;
XFS_STATS_INC(mp, xs_push_ail_flush);
xfs_log_force(mp, XFS_LOG_SYNC);
}
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
- /* barrier matches the xa_target update in xfs_ail_push() */
+ /* barrier matches the ail_target update in xfs_ail_push() */
smp_rmb();
- target = ailp->xa_target;
- ailp->xa_target_prev = target;
+ target = ailp->ail_target;
+ ailp->ail_target_prev = target;
- lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn);
+ lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->ail_last_pushed_lsn);
if (!lip) {
/*
* If the AIL is empty or our push has reached the end we are
* done now.
*/
xfs_trans_ail_cursor_done(&cur);
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
goto out_done;
}
@@ -404,7 +404,7 @@ xfsaild_push(
XFS_STATS_INC(mp, xs_push_ail_success);
trace_xfs_ail_push(lip);
- ailp->xa_last_pushed_lsn = lsn;
+ ailp->ail_last_pushed_lsn = lsn;
break;
case XFS_ITEM_FLUSHING:
@@ -423,7 +423,7 @@ xfsaild_push(
trace_xfs_ail_flushing(lip);
flushing++;
- ailp->xa_last_pushed_lsn = lsn;
+ ailp->ail_last_pushed_lsn = lsn;
break;
case XFS_ITEM_PINNED:
@@ -431,7 +431,7 @@ xfsaild_push(
trace_xfs_ail_pinned(lip);
stuck++;
- ailp->xa_log_flush++;
+ ailp->ail_log_flush++;
break;
case XFS_ITEM_LOCKED:
XFS_STATS_INC(mp, xs_push_ail_locked);
@@ -468,10 +468,10 @@ xfsaild_push(
lsn = lip->li_lsn;
}
xfs_trans_ail_cursor_done(&cur);
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
- if (xfs_buf_delwri_submit_nowait(&ailp->xa_buf_list))
- ailp->xa_log_flush++;
+ if (xfs_buf_delwri_submit_nowait(&ailp->ail_buf_list))
+ ailp->ail_log_flush++;
if (!count || XFS_LSN_CMP(lsn, target) >= 0) {
out_done:
@@ -481,7 +481,7 @@ out_done:
* AIL before we start the next scan from the start of the AIL.
*/
tout = 50;
- ailp->xa_last_pushed_lsn = 0;
+ ailp->ail_last_pushed_lsn = 0;
} else if (((stuck + flushing) * 100) / count > 90) {
/*
* Either there is a lot of contention on the AIL or we are
@@ -494,7 +494,7 @@ out_done:
* the restart to issue a log force to unpin the stuck items.
*/
tout = 20;
- ailp->xa_last_pushed_lsn = 0;
+ ailp->ail_last_pushed_lsn = 0;
} else {
/*
* Assume we have more work to do in a short while.
@@ -536,26 +536,26 @@ xfsaild(
break;
}
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
/*
* Idle if the AIL is empty and we are not racing with a target
* update. We check the AIL after we set the task to a sleep
- * state to guarantee that we either catch an xa_target update
+ * state to guarantee that we either catch an ail_target update
* or that a wake_up resets the state to TASK_RUNNING.
* Otherwise, we run the risk of sleeping indefinitely.
*
- * The barrier matches the xa_target update in xfs_ail_push().
+ * The barrier matches the ail_target update in xfs_ail_push().
*/
smp_rmb();
if (!xfs_ail_min(ailp) &&
- ailp->xa_target == ailp->xa_target_prev) {
- spin_unlock(&ailp->xa_lock);
+ ailp->ail_target == ailp->ail_target_prev) {
+ spin_unlock(&ailp->ail_lock);
freezable_schedule();
tout = 0;
continue;
}
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
if (tout)
freezable_schedule_timeout(msecs_to_jiffies(tout));
@@ -592,8 +592,8 @@ xfs_ail_push(
xfs_log_item_t *lip;
lip = xfs_ail_min(ailp);
- if (!lip || XFS_FORCED_SHUTDOWN(ailp->xa_mount) ||
- XFS_LSN_CMP(threshold_lsn, ailp->xa_target) <= 0)
+ if (!lip || XFS_FORCED_SHUTDOWN(ailp->ail_mount) ||
+ XFS_LSN_CMP(threshold_lsn, ailp->ail_target) <= 0)
return;
/*
@@ -601,10 +601,10 @@ xfs_ail_push(
* the XFS_AIL_PUSHING_BIT.
*/
smp_wmb();
- xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn);
+ xfs_trans_ail_copy_lsn(ailp, &ailp->ail_target, &threshold_lsn);
smp_wmb();
- wake_up_process(ailp->xa_task);
+ wake_up_process(ailp->ail_task);
}
/*
@@ -630,18 +630,18 @@ xfs_ail_push_all_sync(
struct xfs_log_item *lip;
DEFINE_WAIT(wait);
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
while ((lip = xfs_ail_max(ailp)) != NULL) {
- prepare_to_wait(&ailp->xa_empty, &wait, TASK_UNINTERRUPTIBLE);
- ailp->xa_target = lip->li_lsn;
- wake_up_process(ailp->xa_task);
- spin_unlock(&ailp->xa_lock);
+ prepare_to_wait(&ailp->ail_empty, &wait, TASK_UNINTERRUPTIBLE);
+ ailp->ail_target = lip->li_lsn;
+ wake_up_process(ailp->ail_task);
+ spin_unlock(&ailp->ail_lock);
schedule();
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
}
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
- finish_wait(&ailp->xa_empty, &wait);
+ finish_wait(&ailp->ail_empty, &wait);
}
/*
@@ -672,7 +672,7 @@ xfs_trans_ail_update_bulk(
struct xfs_ail_cursor *cur,
struct xfs_log_item **log_items,
int nr_items,
- xfs_lsn_t lsn) __releases(ailp->xa_lock)
+ xfs_lsn_t lsn) __releases(ailp->ail_lock)
{
xfs_log_item_t *mlip;
int mlip_changed = 0;
@@ -705,13 +705,13 @@ xfs_trans_ail_update_bulk(
xfs_ail_splice(ailp, cur, &tmp, lsn);
if (mlip_changed) {
- if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount))
- xlog_assign_tail_lsn_locked(ailp->xa_mount);
- spin_unlock(&ailp->xa_lock);
+ if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
+ xlog_assign_tail_lsn_locked(ailp->ail_mount);
+ spin_unlock(&ailp->ail_lock);
- xfs_log_space_wake(ailp->xa_mount);
+ xfs_log_space_wake(ailp->ail_mount);
} else {
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
}
}
@@ -756,13 +756,13 @@ void
xfs_trans_ail_delete(
struct xfs_ail *ailp,
struct xfs_log_item *lip,
- int shutdown_type) __releases(ailp->xa_lock)
+ int shutdown_type) __releases(ailp->ail_lock)
{
- struct xfs_mount *mp = ailp->xa_mount;
+ struct xfs_mount *mp = ailp->ail_mount;
bool mlip_changed;
if (!(lip->li_flags & XFS_LI_IN_AIL)) {
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
if (!XFS_FORCED_SHUTDOWN(mp)) {
xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
"%s: attempting to delete a log item that is not in the AIL",
@@ -776,13 +776,13 @@ xfs_trans_ail_delete(
if (mlip_changed) {
if (!XFS_FORCED_SHUTDOWN(mp))
xlog_assign_tail_lsn_locked(mp);
- if (list_empty(&ailp->xa_ail))
- wake_up_all(&ailp->xa_empty);
+ if (list_empty(&ailp->ail_head))
+ wake_up_all(&ailp->ail_empty);
}
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
if (mlip_changed)
- xfs_log_space_wake(ailp->xa_mount);
+ xfs_log_space_wake(ailp->ail_mount);
}
int
@@ -795,16 +795,16 @@ xfs_trans_ail_init(
if (!ailp)
return -ENOMEM;
- ailp->xa_mount = mp;
- INIT_LIST_HEAD(&ailp->xa_ail);
- INIT_LIST_HEAD(&ailp->xa_cursors);
- spin_lock_init(&ailp->xa_lock);
- INIT_LIST_HEAD(&ailp->xa_buf_list);
- init_waitqueue_head(&ailp->xa_empty);
+ ailp->ail_mount = mp;
+ INIT_LIST_HEAD(&ailp->ail_head);
+ INIT_LIST_HEAD(&ailp->ail_cursors);
+ spin_lock_init(&ailp->ail_lock);
+ INIT_LIST_HEAD(&ailp->ail_buf_list);
+ init_waitqueue_head(&ailp->ail_empty);
- ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
- ailp->xa_mount->m_fsname);
- if (IS_ERR(ailp->xa_task))
+ ailp->ail_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
+ ailp->ail_mount->m_fsname);
+ if (IS_ERR(ailp->ail_task))
goto out_free_ailp;
mp->m_ail = ailp;
@@ -821,6 +821,6 @@ xfs_trans_ail_destroy(
{
struct xfs_ail *ailp = mp->m_ail;
- kthread_stop(ailp->xa_task);
+ kthread_stop(ailp->ail_task);
kmem_free(ailp);
}
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 653ce379d36b..a5d9dfc45d98 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -431,8 +431,8 @@ xfs_trans_brelse(
* If the fs has shutdown and we dropped the last reference, it may fall
* on us to release a (possibly dirty) bli if it never made it to the
* AIL (e.g., the aborted unpin already happened and didn't release it
- * due to our reference). Since we're already shutdown and need xa_lock,
- * just force remove from the AIL and release the bli here.
+ * due to our reference). Since we're already shutdown and need
+ * ail_lock, just force remove from the AIL and release the bli here.
*/
if (XFS_FORCED_SHUTDOWN(tp->t_mountp) && freed) {
xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR);
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 4a89da4b6fe7..07cea592dc01 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -98,10 +98,24 @@ xfs_trans_log_inode(
xfs_inode_t *ip,
uint flags)
{
+ struct inode *inode = VFS_I(ip);
+
ASSERT(ip->i_itemp != NULL);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
/*
+ * Don't bother with i_lock for the I_DIRTY_TIME check here, as races
+ * don't matter - we either will need an extra transaction in 24 hours
+ * to log the timestamps, or will clear already cleared fields in the
+ * worst case.
+ */
+ if (inode->i_state & (I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED)) {
+ spin_lock(&inode->i_lock);
+ inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
+ spin_unlock(&inode->i_lock);
+ }
+
+ /*
* Record the specific change for fdatasync optimisation. This
* allows fdatasync to skip log forces for inodes that are only
* timestamp dirty. We do this before the change count so that
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index b317a3644c00..be24b0c8a332 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -65,17 +65,17 @@ struct xfs_ail_cursor {
* Eventually we need to drive the locking in here as well.
*/
struct xfs_ail {
- struct xfs_mount *xa_mount;
- struct task_struct *xa_task;
- struct list_head xa_ail;
- xfs_lsn_t xa_target;
- xfs_lsn_t xa_target_prev;
- struct list_head xa_cursors;
- spinlock_t xa_lock;
- xfs_lsn_t xa_last_pushed_lsn;
- int xa_log_flush;
- struct list_head xa_buf_list;
- wait_queue_head_t xa_empty;
+ struct xfs_mount *ail_mount;
+ struct task_struct *ail_task;
+ struct list_head ail_head;
+ xfs_lsn_t ail_target;
+ xfs_lsn_t ail_target_prev;
+ struct list_head ail_cursors;
+ spinlock_t ail_lock;
+ xfs_lsn_t ail_last_pushed_lsn;
+ int ail_log_flush;
+ struct list_head ail_buf_list;
+ wait_queue_head_t ail_empty;
};
/*
@@ -84,7 +84,7 @@ struct xfs_ail {
void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
struct xfs_ail_cursor *cur,
struct xfs_log_item **log_items, int nr_items,
- xfs_lsn_t lsn) __releases(ailp->xa_lock);
+ xfs_lsn_t lsn) __releases(ailp->ail_lock);
/*
* Return a pointer to the first item in the AIL. If the AIL is empty, then
* return NULL.
@@ -93,7 +93,7 @@ static inline struct xfs_log_item *
xfs_ail_min(
struct xfs_ail *ailp)
{
- return list_first_entry_or_null(&ailp->xa_ail, struct xfs_log_item,
+ return list_first_entry_or_null(&ailp->ail_head, struct xfs_log_item,
li_ail);
}
@@ -101,14 +101,14 @@ static inline void
xfs_trans_ail_update(
struct xfs_ail *ailp,
struct xfs_log_item *lip,
- xfs_lsn_t lsn) __releases(ailp->xa_lock)
+ xfs_lsn_t lsn) __releases(ailp->ail_lock)
{
xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
}
bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip,
- int shutdown_type) __releases(ailp->xa_lock);
+ int shutdown_type) __releases(ailp->ail_lock);
static inline void
xfs_trans_ail_remove(
@@ -117,12 +117,12 @@ xfs_trans_ail_remove(
{
struct xfs_ail *ailp = lip->li_ailp;
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
/* xfs_trans_ail_delete() drops the AIL lock */
if (lip->li_flags & XFS_LI_IN_AIL)
xfs_trans_ail_delete(ailp, lip, shutdown_type);
else
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
}
void xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
@@ -149,9 +149,9 @@ xfs_trans_ail_copy_lsn(
xfs_lsn_t *src)
{
ASSERT(sizeof(xfs_lsn_t) == 8); /* don't lock if it shrinks */
- spin_lock(&ailp->xa_lock);
+ spin_lock(&ailp->ail_lock);
*dst = *src;
- spin_unlock(&ailp->xa_lock);
+ spin_unlock(&ailp->ail_lock);
}
#else
static inline void
@@ -172,7 +172,7 @@ xfs_clear_li_failed(
struct xfs_buf *bp = lip->li_buf;
ASSERT(lip->li_flags & XFS_LI_IN_AIL);
- lockdep_assert_held(&lip->li_ailp->xa_lock);
+ lockdep_assert_held(&lip->li_ailp->ail_lock);
if (lip->li_flags & XFS_LI_FAILED) {
lip->li_flags &= ~XFS_LI_FAILED;
@@ -186,7 +186,7 @@ xfs_set_li_failed(
struct xfs_log_item *lip,
struct xfs_buf *bp)
{
- lockdep_assert_held(&lip->li_ailp->xa_lock);
+ lockdep_assert_held(&lip->li_ailp->ail_lock);
if (!(lip->li_flags & XFS_LI_FAILED)) {
xfs_buf_hold(bp);