From 0eacdd16adc425e61926876f15b58008aa5c4784 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 8 Oct 2020 00:48:22 +0200
Subject: gfs2: Turn gfs2_rbm_incr into gfs2_rbm_add

Change gfs2_rbm_incr to advance an rbm by a given number of blocks.  Use that
in gfs2_reservation_check_and_update to save a gfs2_rbm_to_block ->
gfs2_rbm_from_block round trip.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/rgrp.c | 55 +++++++++++++++++++++++++++++++++----------------------
 1 file changed, 33 insertions(+), 22 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 5e8eef9990e3..47d171745f83 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -277,29 +277,38 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
 }
 
 /**
- * gfs2_rbm_incr - increment an rbm structure
+ * gfs2_rbm_add - add a number of blocks to an rbm
  * @rbm: The rbm with rgd already set correctly
+ * @blocks: The number of blocks to add to rpm
  *
- * This function takes an existing rbm structure and increments it to the next
- * viable block offset.
- *
- * Returns: If incrementing the offset would cause the rbm to go past the
- *          end of the rgrp, true is returned, otherwise false.
+ * This function takes an existing rbm structure and adds a number of blocks to
+ * it.
  *
+ * Returns: True if the new rbm would point past the end of the rgrp.
  */
 
-static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
+static bool gfs2_rbm_add(struct gfs2_rbm *rbm, u32 blocks)
 {
-	if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */
-		rbm->offset++;
+	struct gfs2_rgrpd *rgd = rbm->rgd;
+	struct gfs2_bitmap *bi = rgd->rd_bits + rbm->bii;
+
+	if (rbm->offset + blocks < bi->bi_blocks) {
+		rbm->offset += blocks;
 		return false;
 	}
-	if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */
-		return true;
+	blocks -= bi->bi_blocks - rbm->offset;
 
-	rbm->offset = 0;
-	rbm->bii++;
-	return false;
+	for(;;) {
+		bi++;
+		if (bi == rgd->rd_bits + rgd->rd_length)
+			return true;
+		if (blocks < bi->bi_blocks) {
+			rbm->offset = blocks;
+			rbm->bii = bi - rgd->rd_bits;
+			return false;
+		}
+		blocks -= bi->bi_blocks;
+	}
 }
 
 /**
@@ -323,7 +332,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le
 		(*len)--;
 		if (*len == 0)
 			return true;
-		if (gfs2_rbm_incr(rbm))
+		if (gfs2_rbm_add(rbm, 1))
 			return true;
 	}
 
@@ -1639,7 +1648,6 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
 	u64 block = gfs2_rbm_to_block(rbm);
 	u32 extlen = 1;
 	u64 nblock;
-	int ret;
 
 	/*
 	 * If we have a minimum extent length, then skip over any extent
@@ -1664,12 +1672,15 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
 			maxext->len = extlen;
 			maxext->rbm = *rbm;
 		}
-fail:
-		nblock = block + extlen;
+	} else {
+		u64 len = nblock - block;
+		if (len >= (u64)1 << 32)
+			return -E2BIG;
+		extlen = len;
 	}
-	ret = gfs2_rbm_from_block(rbm, nblock);
-	if (ret < 0)
-		return ret;
+fail:
+	if (gfs2_rbm_add(rbm, extlen))
+		return -E2BIG;
 	return 1;
 }
 
@@ -2205,7 +2216,7 @@ static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd,
 			bi_prev = bi;
 		}
 		gfs2_setbit(&rbm, false, new_state);
-		gfs2_rbm_incr(&rbm);
+		gfs2_rbm_add(&rbm, 1);
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From c65b76b893032a9fa69056656c771f5e2fc9ef0c Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 11 Oct 2018 18:56:40 +0200
Subject: gfs2: Only use struct gfs2_rbm for bitmap manipulations

GFS2 uses struct gfs2_rbm to represent a filesystem block number as a
bit position within a resource group.  This representation is used in
the bitmap manipulation code to prevent excessive conversions between
block numbers and bit positions, but also in struct gfs2_blkreserv which
is part of struct gfs2_inode, to mark the start of a reservation.  In
the inode, the bit position representation makes less sense: first, the
start position is used as a block number about as often as a bit
position; second, the bit position representation makes the code
unnecessarily complicated and difficult to read.

Therefore, change struct gfs2_blkreserv to represent the start of a
reservation as a block number instead of a bit position.  (This requires
keeping track of the resource group in gfs2_blkreserv separately.) With
that change, various things can be slightly simplified, and struct
gfs2_rbm can be moved to rgrp.c.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/bmap.c       |   2 +-
 fs/gfs2/incore.h     |  30 +---------
 fs/gfs2/rgrp.c       | 154 ++++++++++++++++++++++++++++++---------------------
 fs/gfs2/trace_gfs2.h |  10 ++--
 fs/gfs2/trans.h      |   2 +-
 5 files changed, 101 insertions(+), 97 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 62d9081d1e26..e2ee3703f7df 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1544,7 +1544,7 @@ more_rgrps:
 
 			/* Must be done with the rgrp glock held: */
 			if (gfs2_rs_active(&ip->i_res) &&
-			    rgd == ip->i_res.rs_rbm.rgd)
+			    rgd == ip->i_res.rs_rgd)
 				gfs2_rs_deltree(&ip->i_res);
 		}
 
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 8e1ab8ed4abc..cca806ff611c 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -125,31 +125,6 @@ struct gfs2_rgrpd {
 	struct rb_root rd_rstree;       /* multi-block reservation tree */
 };
 
-struct gfs2_rbm {
-	struct gfs2_rgrpd *rgd;
-	u32 offset;		/* The offset is bitmap relative */
-	int bii;		/* Bitmap index */
-};
-
-static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm)
-{
-	return rbm->rgd->rd_bits + rbm->bii;
-}
-
-static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
-{
-	BUG_ON(rbm->offset >= rbm->rgd->rd_data);
-	return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) +
-		rbm->offset;
-}
-
-static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1,
-			       const struct gfs2_rbm *rbm2)
-{
-	return (rbm1->rgd == rbm2->rgd) && (rbm1->bii == rbm2->bii) &&
-	       (rbm1->offset == rbm2->offset);
-}
-
 enum gfs2_state_bits {
 	BH_Pinned = BH_PrivateStart,
 	BH_Escaped = BH_PrivateStart + 1,
@@ -313,8 +288,9 @@ struct gfs2_qadata { /* quota allocation data */
 */
 
 struct gfs2_blkreserv {
-	struct rb_node rs_node;       /* link to other block reservations */
-	struct gfs2_rbm rs_rbm;       /* Start of reservation */
+	struct rb_node rs_node;       /* node within rd_rstree */
+	struct gfs2_rgrpd *rs_rgd;
+	u64 rs_start;		      /* start of reservation */
 	u32 rs_free;                  /* how many blocks are still free */
 };
 
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 47d171745f83..391186382cee 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -36,6 +36,24 @@
 #define BFITNOENT ((u32)~0)
 #define NO_BLOCK ((u64)~0)
 
+struct gfs2_rbm {
+	struct gfs2_rgrpd *rgd;
+	u32 offset;		/* The offset is bitmap relative */
+	int bii;		/* Bitmap index */
+};
+
+static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm)
+{
+	return rbm->rgd->rd_bits + rbm->bii;
+}
+
+static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
+{
+	BUG_ON(rbm->offset >= rbm->rgd->rd_data);
+	return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) +
+		rbm->offset;
+}
+
 /*
  * These routines are used by the resource group routines (rgrp.c)
  * to keep track of block allocation.  Each block is represented by two
@@ -175,7 +193,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
 
 /**
  * rs_cmp - multi-block reservation range compare
- * @blk: absolute file system block number of the new reservation
+ * @start: start of the new reservation
  * @len: number of blocks in the new reservation
  * @rs: existing reservation to compare against
  *
@@ -183,13 +201,11 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
  *         -1 if the block range is before the start of the reservation
  *          0 if the block range overlaps with the reservation
  */
-static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
+static inline int rs_cmp(u64 start, u32 len, struct gfs2_blkreserv *rs)
 {
-	u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
-
-	if (blk >= startblk + rs->rs_free)
+	if (start >= rs->rs_start + rs->rs_free)
 		return 1;
-	if (blk + len - 1 < startblk)
+	if (rs->rs_start >= start + len)
 		return -1;
 	return 0;
 }
@@ -311,13 +327,28 @@ static bool gfs2_rbm_add(struct gfs2_rbm *rbm, u32 blocks)
 	}
 }
 
+static struct gfs2_bitmap *gfs2_block_to_bitmap(struct gfs2_rgrpd *rgd,
+						u64 block)
+{
+	unsigned int delta = (sizeof(struct gfs2_rgrp) -
+			      sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
+	unsigned int rblock, bii;
+
+	if (!rgrp_contains_block(rgd, block))
+		return NULL;
+	rblock = block - rgd->rd_data0;
+	bii = (rblock + delta) / rgd->rd_sbd->sd_blocks_per_bitmap;
+	return rgd->rd_bits + bii;
+}
+
 /**
  * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
  * @rbm: Position to search (value/result)
  * @n_unaligned: Number of unaligned blocks to check
  * @len: Decremented for each block found (terminate on zero)
  *
- * Returns: true if a non-free block is encountered
+ * Returns: true if a non-free block is encountered or the end of the resource
+ *	    group is reached.
  */
 
 static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
@@ -604,10 +635,11 @@ static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs,
 {
 	struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res);
 
-	gfs2_print_dbg(seq, "%s  B: n:%llu s:%llu b:%u f:%u\n", fs_id_buf,
+	gfs2_print_dbg(seq, "%s  B: n:%llu s:%llu f:%u\n",
+		       fs_id_buf,
 		       (unsigned long long)ip->i_no_addr,
-		       (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
-		       rs->rs_rbm.offset, rs->rs_free);
+		       (unsigned long long)rs->rs_start,
+		       rs->rs_free);
 }
 
 /**
@@ -622,30 +654,28 @@ static void __rs_deltree(struct gfs2_blkreserv *rs)
 	if (!gfs2_rs_active(rs))
 		return;
 
-	rgd = rs->rs_rbm.rgd;
+	rgd = rs->rs_rgd;
 	trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
 	rb_erase(&rs->rs_node, &rgd->rd_rstree);
 	RB_CLEAR_NODE(&rs->rs_node);
 
 	if (rs->rs_free) {
-		u64 last_block = gfs2_rbm_to_block(&rs->rs_rbm) +
-				 rs->rs_free - 1;
-		struct gfs2_rbm last_rbm = { .rgd = rs->rs_rbm.rgd, };
+		u64 last_block = rs->rs_start + rs->rs_free - 1;
 		struct gfs2_bitmap *start, *last;
 
 		/* return reserved blocks to the rgrp */
-		BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
-		rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
+		BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free);
+		rs->rs_rgd->rd_reserved -= rs->rs_free;
 		/* The rgrp extent failure point is likely not to increase;
 		   it will only do so if the freed blocks are somehow
 		   contiguous with a span of free blocks that follows. Still,
 		   it will force the number to be recalculated later. */
 		rgd->rd_extfail_pt += rs->rs_free;
 		rs->rs_free = 0;
-		if (gfs2_rbm_from_block(&last_rbm, last_block))
+		start = gfs2_block_to_bitmap(rgd, rs->rs_start);
+		last = gfs2_block_to_bitmap(rgd, last_block);
+		if (!start || !last)
 			return;
-		start = rbm_bi(&rs->rs_rbm);
-		last = rbm_bi(&last_rbm);
 		do
 			clear_bit(GBF_FULL, &start->bi_flags);
 		while (start++ != last);
@@ -661,7 +691,7 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
 {
 	struct gfs2_rgrpd *rgd;
 
-	rgd = rs->rs_rbm.rgd;
+	rgd = rs->rs_rgd;
 	if (rgd) {
 		spin_lock(&rgd->rd_rsspin);
 		__rs_deltree(rs);
@@ -1467,8 +1497,7 @@ static void rs_insert(struct gfs2_inode *ip)
 	struct rb_node **newn, *parent = NULL;
 	int rc;
 	struct gfs2_blkreserv *rs = &ip->i_res;
-	struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
-	u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
+	struct gfs2_rgrpd *rgd = rs->rs_rgd;
 
 	BUG_ON(gfs2_rs_active(rs));
 
@@ -1479,7 +1508,7 @@ static void rs_insert(struct gfs2_inode *ip)
 			rb_entry(*newn, struct gfs2_blkreserv, rs_node);
 
 		parent = *newn;
-		rc = rs_cmp(fsblock, rs->rs_free, cur);
+		rc = rs_cmp(rs->rs_start, rs->rs_free, cur);
 		if (rc > 0)
 			newn = &((*newn)->rb_right);
 		else if (rc < 0)
@@ -1567,7 +1596,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
 
 	ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true);
 	if (ret == 0) {
-		rs->rs_rbm = rbm;
+		rs->rs_start = gfs2_rbm_to_block(&rbm);
 		rs->rs_free = extlen;
 		rs_insert(ip);
 	} else {
@@ -1612,7 +1641,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
 
 	if (n) {
 		while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) {
-			block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
+			block = rs->rs_start + rs->rs_free;
 			n = n->rb_right;
 			if (n == NULL)
 				break;
@@ -1949,7 +1978,7 @@ static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
 	u64 tdiff;
 
 	tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
-                            rs->rs_rbm.rgd->rd_gl->gl_dstamp));
+                            rs->rs_rgd->rd_gl->gl_dstamp));
 
 	return tdiff > (msecs * 1000 * 1000);
 }
@@ -2027,45 +2056,45 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 	if (gfs2_assert_warn(sdp, ap->target))
 		return -EINVAL;
 	if (gfs2_rs_active(rs)) {
-		begin = rs->rs_rbm.rgd;
-	} else if (rs->rs_rbm.rgd &&
-		   rgrp_contains_block(rs->rs_rbm.rgd, ip->i_goal)) {
-		begin = rs->rs_rbm.rgd;
+		begin = rs->rs_rgd;
+	} else if (rs->rs_rgd &&
+		   rgrp_contains_block(rs->rs_rgd, ip->i_goal)) {
+		begin = rs->rs_rgd;
 	} else {
 		check_and_update_goal(ip);
-		rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
+		rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
 	}
 	if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
 		skip = gfs2_orlov_skip(ip);
-	if (rs->rs_rbm.rgd == NULL)
+	if (rs->rs_rgd == NULL)
 		return -EBADSLT;
 
 	while (loops < 3) {
 		rg_locked = 1;
 
-		if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
+		if (!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) {
 			rg_locked = 0;
 			if (skip && skip--)
 				goto next_rgrp;
 			if (!gfs2_rs_active(rs)) {
 				if (loops == 0 &&
-				    !fast_to_acquire(rs->rs_rbm.rgd))
+				    !fast_to_acquire(rs->rs_rgd))
 					goto next_rgrp;
 				if ((loops < 2) &&
 				    gfs2_rgrp_used_recently(rs, 1000) &&
-				    gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
+				    gfs2_rgrp_congested(rs->rs_rgd, loops))
 					goto next_rgrp;
 			}
-			error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
+			error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl,
 						   LM_ST_EXCLUSIVE, flags,
 						   &ip->i_rgd_gh);
 			if (unlikely(error))
 				return error;
 			if (!gfs2_rs_active(rs) && (loops < 2) &&
-			    gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
+			    gfs2_rgrp_congested(rs->rs_rgd, loops))
 				goto skip_rgrp;
 			if (sdp->sd_args.ar_rgrplvb) {
-				error = update_rgrp_lvb(rs->rs_rbm.rgd);
+				error = update_rgrp_lvb(rs->rs_rgd);
 				if (unlikely(error)) {
 					gfs2_glock_dq_uninit(&ip->i_rgd_gh);
 					return error;
@@ -2074,24 +2103,24 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 		}
 
 		/* Skip unusable resource groups */
-		if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
+		if ((rs->rs_rgd->rd_flags & (GFS2_RGF_NOALLOC |
 						 GFS2_RDF_ERROR)) ||
-		    (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
+		    (loops == 0 && ap->target > rs->rs_rgd->rd_extfail_pt))
 			goto skip_rgrp;
 
 		if (sdp->sd_args.ar_rgrplvb)
-			gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
+			gfs2_rgrp_bh_get(rs->rs_rgd);
 
 		/* Get a reservation if we don't already have one */
 		if (!gfs2_rs_active(rs))
-			rg_mblk_search(rs->rs_rbm.rgd, ip, ap);
+			rg_mblk_search(rs->rs_rgd, ip, ap);
 
 		/* Skip rgrps when we can't get a reservation on first pass */
 		if (!gfs2_rs_active(rs) && (loops < 1))
 			goto check_rgrp;
 
 		/* If rgrp has enough free space, use it */
-		free_blocks = rgd_free(rs->rs_rbm.rgd, rs);
+		free_blocks = rgd_free(rs->rs_rgd, rs);
 		if (free_blocks >= ap->target ||
 		    (loops == 2 && ap->min_target &&
 		     free_blocks >= ap->min_target)) {
@@ -2100,8 +2129,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 		}
 check_rgrp:
 		/* Check for unlinked inodes which can be reclaimed */
-		if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
-			try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
+		if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK)
+			try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
 					ip->i_no_addr);
 skip_rgrp:
 		/* Drop reservation, if we couldn't use reserved rgrp */
@@ -2113,7 +2142,7 @@ skip_rgrp:
 			gfs2_glock_dq_uninit(&ip->i_rgd_gh);
 next_rgrp:
 		/* Find the next rgrp, and continue looking */
-		if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
+		if (gfs2_select_rgrp(&rs->rs_rgd, begin))
 			continue;
 		if (skip)
 			continue;
@@ -2284,20 +2313,21 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
 {
 	struct gfs2_blkreserv *rs = &ip->i_res;
 	struct gfs2_rgrpd *rgd = rbm->rgd;
-	unsigned rlen;
-	u64 block;
-	int ret;
 
 	spin_lock(&rgd->rd_rsspin);
 	if (gfs2_rs_active(rs)) {
-		if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
-			block = gfs2_rbm_to_block(rbm);
-			ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
+		u64 start = gfs2_rbm_to_block(rbm);
+
+		if (rs->rs_start == start) {
+			unsigned int rlen;
+
+			rs->rs_start += len;
 			rlen = min(rs->rs_free, len);
 			rs->rs_free -= rlen;
 			rgd->rd_reserved -= rlen;
 			trace_gfs2_rs(rs, TRACE_RS_CLAIM);
-			if (rs->rs_free && !ret)
+			if (rs->rs_start < rgd->rd_data0 + rgd->rd_data &&
+			    rs->rs_free)
 				goto out;
 			/* We used up our block reservation, so we should
 			   reserve more blocks next time. */
@@ -2326,15 +2356,13 @@ static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
 	u64 goal;
 
 	if (gfs2_rs_active(&ip->i_res)) {
-		*rbm = ip->i_res.rs_rbm;
-		return;
+		goal = ip->i_res.rs_start;
+	} else {
+		if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
+			goal = ip->i_goal;
+		else
+			goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
 	}
-
-	if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
-		goal = ip->i_goal;
-	else
-		goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
-
 	if (WARN_ON_ONCE(gfs2_rbm_from_block(rbm, goal))) {
 		rbm->bii = 0;
 		rbm->offset = 0;
@@ -2357,7 +2385,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct buffer_head *dibh;
-	struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rbm.rgd, };
+	struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rgd, };
 	unsigned int ndata;
 	u64 block; /* block, within the file system scope */
 	int error;
@@ -2589,7 +2617,7 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
 			return;
 		rgd = gfs2_blk2rgrpd(sdp, block, 1);
 	} else {
-		rgd = ip->i_res.rs_rbm.rgd;
+		rgd = ip->i_res.rs_rgd;
 		if (!rgd || !rgrp_contains_block(rgd, block))
 			rgd = gfs2_blk2rgrpd(sdp, block, 1);
 	}
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 0b2f858d9a8c..e461a691e5e8 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -603,13 +603,13 @@ TRACE_EVENT(gfs2_rs,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev;
-		__entry->rd_addr	= rs->rs_rbm.rgd->rd_addr;
-		__entry->rd_free_clone	= rs->rs_rbm.rgd->rd_free_clone;
-		__entry->rd_reserved	= rs->rs_rbm.rgd->rd_reserved;
+		__entry->dev		= rs->rs_rgd->rd_sbd->sd_vfs->s_dev;
+		__entry->rd_addr	= rs->rs_rgd->rd_addr;
+		__entry->rd_free_clone	= rs->rs_rgd->rd_free_clone;
+		__entry->rd_reserved	= rs->rs_rgd->rd_reserved;
 		__entry->inum		= container_of(rs, struct gfs2_inode,
 						       i_res)->i_no_addr;
-		__entry->start		= gfs2_rbm_to_block(&rs->rs_rbm);
+		__entry->start		= rs->rs_start;
 		__entry->free		= rs->rs_free;
 		__entry->func		= func;
 	),
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index 83199ce5a5c5..b812c4958dc4 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -27,7 +27,7 @@ struct gfs2_glock;
  * block, or all of the blocks in the rg, whichever is smaller */
 static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned requested)
 {
-	struct gfs2_rgrpd *rgd = ip->i_res.rs_rbm.rgd;
+	struct gfs2_rgrpd *rgd = ip->i_res.rs_rgd;
 
 	if (requested < rgd->rd_length)
 		return requested + 1;
-- 
cgit v1.2.3-70-g09d2


From 3ed08befeb09e199c258fc4f3878935f57a2bd9a Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 16 Mar 2020 15:36:09 +0100
Subject: gfs2: Get rid of unnecessary variable in gfs2_alloc_blocks

Variable ndata is only used inside "if (!dinode)", so it can be replaced
entirely with *nblocks.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/rgrp.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 391186382cee..4f3337fd6b23 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -2386,7 +2386,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct buffer_head *dibh;
 	struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rgd, };
-	unsigned int ndata;
 	u64 block; /* block, within the file system scope */
 	int error;
 
@@ -2412,12 +2411,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
 	if (gfs2_rs_active(&ip->i_res))
 		gfs2_adjust_reservation(ip, &rbm, *nblocks);
-	ndata = *nblocks;
-	if (dinode)
-		ndata--;
-
 	if (!dinode) {
-		ip->i_goal = block + ndata - 1;
+		ip->i_goal = block + *nblocks - 1;
 		error = gfs2_meta_inode_buffer(ip, &dibh);
 		if (error == 0) {
 			struct gfs2_dinode *di =
-- 
cgit v1.2.3-70-g09d2


From 4272006d956f49e428f87c476959318301f2d4d2 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Fri, 20 Mar 2020 13:31:41 +0100
Subject: gfs2: Minor gfs2_inplace_reserve cleanup

Clean up the reservation size computation logic in gfs2_inplace_reserve a
little.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/rgrp.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 4f3337fd6b23..f759f79d2af7 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -2048,12 +2048,13 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 	struct gfs2_blkreserv *rs = &ip->i_res;
 	int error = 0, rg_locked, flags = 0;
 	u64 last_unlinked = NO_BLOCK;
+	u32 target = ap->target;
 	int loops = 0;
 	u32 free_blocks, skip = 0;
 
 	if (sdp->sd_args.ar_rgrplvb)
 		flags |= GL_SKIP;
-	if (gfs2_assert_warn(sdp, ap->target))
+	if (gfs2_assert_warn(sdp, target))
 		return -EINVAL;
 	if (gfs2_rs_active(rs)) {
 		begin = rs->rs_rgd;
@@ -2105,7 +2106,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 		/* Skip unusable resource groups */
 		if ((rs->rs_rgd->rd_flags & (GFS2_RGF_NOALLOC |
 						 GFS2_RDF_ERROR)) ||
-		    (loops == 0 && ap->target > rs->rs_rgd->rd_extfail_pt))
+		    (loops == 0 && target > rs->rs_rgd->rd_extfail_pt))
 			goto skip_rgrp;
 
 		if (sdp->sd_args.ar_rgrplvb)
@@ -2121,9 +2122,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 
 		/* If rgrp has enough free space, use it */
 		free_blocks = rgd_free(rs->rs_rgd, rs);
-		if (free_blocks >= ap->target ||
-		    (loops == 2 && ap->min_target &&
-		     free_blocks >= ap->min_target)) {
+		if (free_blocks >= target) {
 			ap->allowed = free_blocks;
 			return 0;
 		}
@@ -2159,9 +2158,12 @@ next_rgrp:
 				return error;
 		}
 		/* Flushing the log may release space */
-		if (loops == 2)
+		if (loops == 2) {
+			if (ap->min_target)
+				target = ap->min_target;
 			gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
 				       GFS2_LFC_INPLACE_RESERVE);
+		}
 	}
 
 	return -ENOSPC;
-- 
cgit v1.2.3-70-g09d2


From 2fdc2fa21bc72ec06c0c9f0e30b88fe1f2486b75 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 6 Oct 2020 14:29:04 +0200
Subject: Revert "gfs2: Don't reject a supposedly full bitmap if we have blocks
 reserved"

This reverts commit e79e0e1428188b24c3b57309ffa54a33c4ae40c4.

It turns out that we're only setting the GBF_FULL flag of a bitmap if we've
been scanning from the beginning of the bitmap until the end and we haven't
found a single free block, and we're not skipping reservations in that process,
either.  This means that in gfs2_rbm_find, we can always skip bitmaps with the
GBF_FULL flag set.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/rgrp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index f759f79d2af7..faf9f21f3e98 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1754,8 +1754,7 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
 
 	while(1) {
 		bi = rbm_bi(rbm);
-		if ((ip == NULL || !gfs2_rs_active(&ip->i_res)) &&
-		    test_bit(GBF_FULL, &bi->bi_flags) &&
+		if (test_bit(GBF_FULL, &bi->bi_flags) &&
 		    (state == GFS2_BLKST_FREE))
 			goto next_bitmap;
 
-- 
cgit v1.2.3-70-g09d2


From a12c6fa1de8472f0eb56f7e5a60de2ffa4f6c043 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 6 Oct 2020 15:16:16 +0200
Subject: gfs2: Don't clear GBF_FULL flags in rs_deltree

Removing a reservation doesn't make any actual space available, so don't clear
the GBF_FULL flags in that case.  Otherwise, we'll only spend more time
scanning the bitmaps unnecessarily.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/rgrp.c | 25 +------------------------
 1 file changed, 1 insertion(+), 24 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index faf9f21f3e98..f585a13cebde 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -327,20 +327,6 @@ static bool gfs2_rbm_add(struct gfs2_rbm *rbm, u32 blocks)
 	}
 }
 
-static struct gfs2_bitmap *gfs2_block_to_bitmap(struct gfs2_rgrpd *rgd,
-						u64 block)
-{
-	unsigned int delta = (sizeof(struct gfs2_rgrp) -
-			      sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
-	unsigned int rblock, bii;
-
-	if (!rgrp_contains_block(rgd, block))
-		return NULL;
-	rblock = block - rgd->rd_data0;
-	bii = (rblock + delta) / rgd->rd_sbd->sd_blocks_per_bitmap;
-	return rgd->rd_bits + bii;
-}
-
 /**
  * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned
  * @rbm: Position to search (value/result)
@@ -660,25 +646,16 @@ static void __rs_deltree(struct gfs2_blkreserv *rs)
 	RB_CLEAR_NODE(&rs->rs_node);
 
 	if (rs->rs_free) {
-		u64 last_block = rs->rs_start + rs->rs_free - 1;
-		struct gfs2_bitmap *start, *last;
-
 		/* return reserved blocks to the rgrp */
 		BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free);
 		rs->rs_rgd->rd_reserved -= rs->rs_free;
+
 		/* The rgrp extent failure point is likely not to increase;
 		   it will only do so if the freed blocks are somehow
 		   contiguous with a span of free blocks that follows. Still,
 		   it will force the number to be recalculated later. */
 		rgd->rd_extfail_pt += rs->rs_free;
 		rs->rs_free = 0;
-		start = gfs2_block_to_bitmap(rgd, rs->rs_start);
-		last = gfs2_block_to_bitmap(rgd, last_block);
-		if (!start || !last)
-			return;
-		do
-			clear_bit(GBF_FULL, &start->bi_flags);
-		while (start++ != last);
 	}
 }
 
-- 
cgit v1.2.3-70-g09d2


From 560b8eba6bf685bb2a29fd24e7b3455e7b8f795a Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 6 Oct 2020 15:26:22 +0200
Subject: gfs2: Set GBF_FULL flags when reading resource group

When reading a resource group from disk or when receiving the resource group
statistics from a Lock Value Block (LVB), set/clear the GBF_FULL flags of all
bitmaps in that resource group according to whether or not the resource group
is full.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/rgrp.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index f585a13cebde..f878d82750f7 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1165,6 +1165,23 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd)
 	return count;
 }
 
+static void rgrp_set_bitmap_flags(struct gfs2_rgrpd *rgd)
+{
+	struct gfs2_bitmap *bi;
+	int x;
+
+	if (rgd->rd_free) {
+		for (x = 0; x < rgd->rd_length; x++) {
+			bi = rgd->rd_bits + x;
+			clear_bit(GBF_FULL, &bi->bi_flags);
+		}
+	} else {
+		for (x = 0; x < rgd->rd_length; x++) {
+			bi = rgd->rd_bits + x;
+			set_bit(GBF_FULL, &bi->bi_flags);
+		}
+	}
+}
 
 /**
  * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
@@ -1208,9 +1225,8 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 	}
 
 	if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
-		for (x = 0; x < length; x++)
-			clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
 		gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
+		rgrp_set_bitmap_flags(rgd);
 		rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
 		rgd->rd_free_clone = rgd->rd_free;
 		/* max out the rgrp allocation failure point */
@@ -1260,6 +1276,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
 	if (rgd->rd_rgl->rl_unlinked == 0)
 		rgd->rd_flags &= ~GFS2_RDF_CHECK;
 	rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
+	rgrp_set_bitmap_flags(rgd);
 	rgd->rd_free_clone = rgd->rd_free;
 	rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
 	rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
-- 
cgit v1.2.3-70-g09d2


From 736b2f778f0a71ce54abe7e6d2c6f7591b95f7b8 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 7 Dec 2020 00:06:32 +0100
Subject: gfs2: Un-obfuscate function jdesc_find_i

Clean up this function to show that it is trivial.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/super.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 2f56acc41c04..ed7a829e9ffe 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -81,19 +81,12 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp)
 static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
 {
 	struct gfs2_jdesc *jd;
-	int found = 0;
 
 	list_for_each_entry(jd, head, jd_list) {
-		if (jd->jd_jid == jid) {
-			found = 1;
-			break;
-		}
+		if (jd->jd_jid == jid)
+			return jd;
 	}
-
-	if (!found)
-		jd = NULL;
-
-	return jd;
+	return NULL;
 }
 
 struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
-- 
cgit v1.2.3-70-g09d2


From 458094c2c6115cda721fd11a56f7e4f27cddd5b1 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sat, 5 Dec 2020 13:21:50 +0100
Subject: gfs2: Simplify the buf_limit and databuf_limit definitions

The BUF_OFFSET and DATABUF_OFFSET definitions are only used in buf_limit
and databuf_limit, respectively, and the rounding done in those
definitions is immediately wiped out by dividing by the element size.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/lops.h | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index fbdbb08dcec6..3fca5bf239d3 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -10,13 +10,6 @@
 #include <linux/list.h>
 #include "incore.h"
 
-#define BUF_OFFSET \
-	((sizeof(struct gfs2_log_descriptor) + sizeof(__be64) - 1) & \
-	 ~(sizeof(__be64) - 1))
-#define DATABUF_OFFSET \
-	((sizeof(struct gfs2_log_descriptor) + (2 * sizeof(__be64) - 1)) & \
-	 ~(2 * sizeof(__be64) - 1))
-
 extern const struct gfs2_log_operations *gfs2_log_ops[];
 extern void gfs2_log_incr_head(struct gfs2_sbd *sdp);
 extern u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lbn);
@@ -29,18 +22,12 @@ extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
 			   struct gfs2_log_header_host *head, bool keep_cache);
 static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
 {
-	unsigned int limit;
-
-	limit = (sdp->sd_sb.sb_bsize - BUF_OFFSET) / sizeof(__be64);
-	return limit;
+	return sdp->sd_ldptrs;
 }
 
 static inline unsigned int databuf_limit(struct gfs2_sbd *sdp)
 {
-	unsigned int limit;
-
-	limit = (sdp->sd_sb.sb_bsize - DATABUF_OFFSET) / (2 * sizeof(__be64));
-	return limit;
+	return sdp->sd_ldptrs / 2;
 }
 
 static inline void lops_before_commit(struct gfs2_sbd *sdp,
-- 
cgit v1.2.3-70-g09d2


From 5a4e9c607e03886815121fa975fcd8e0fa7252fe Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sun, 6 Dec 2020 13:32:54 +0100
Subject: gfs2: Minor gfs2_write_revokes cleanups

Clean up the computations in gfs2_write_revokes (no change in functionality).

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/log.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 2e9314091c81..ccce17fe605f 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -712,11 +712,13 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl)
 void gfs2_write_revokes(struct gfs2_sbd *sdp)
 {
 	/* number of revokes we still have room for */
-	int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
+	unsigned int max_revokes;
 
 	gfs2_log_lock(sdp);
-	while (sdp->sd_log_num_revoke > max_revokes)
-		max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
+	max_revokes = sdp->sd_ldptrs;
+	if (sdp->sd_log_num_revoke > sdp->sd_ldptrs)
+		max_revokes += roundup(sdp->sd_log_num_revoke - sdp->sd_ldptrs,
+				       sdp->sd_inptrs);
 	max_revokes -= sdp->sd_log_num_revoke;
 	if (!sdp->sd_log_num_revoke) {
 		atomic_dec(&sdp->sd_log_blks_free);
-- 
cgit v1.2.3-70-g09d2


From 6188e8777de48c8404b0bcca74a70ac83c37d773 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sun, 6 Dec 2020 13:30:14 +0100
Subject: gfs2: Some documentation updates

The calc_reserved description claims that buf_limit is 502 (on 4k
filesystems), but it is actually 503.  Fix / clarify the entire
description.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/log.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index ccce17fe605f..12e8280f0806 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -50,10 +50,12 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct)
 	unsigned int blks;
 	unsigned int first, second;
 
+	/* The initial struct gfs2_log_descriptor block */
 	blks = 1;
 	first = sdp->sd_ldptrs;
 
 	if (nstruct > first) {
+		/* Subsequent struct gfs2_meta_header blocks */
 		second = sdp->sd_inptrs;
 		blks += DIV_ROUND_UP(nstruct - first, second);
 	}
@@ -507,24 +509,20 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer
 }
 
 /**
- * calc_reserved - Calculate the number of blocks to reserve when
- *                 refunding a transaction's unused buffers.
+ * calc_reserved - Calculate the number of blocks to keep reserved
  * @sdp: The GFS2 superblock
  *
  * This is complex.  We need to reserve room for all our currently used
- * metadata buffers (e.g. normal file I/O rewriting file time stamps) and 
- * all our journaled data buffers for journaled files (e.g. files in the 
+ * metadata blocks (e.g. normal file I/O rewriting file time stamps) and
+ * all our journaled data blocks for journaled files (e.g. files in the
  * meta_fs like rindex, or files for which chattr +j was done.)
- * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush
- * will count it as free space (sd_log_blks_free) and corruption will follow.
+ * If we don't reserve enough space, corruption will follow.
  *
- * We can have metadata bufs and jdata bufs in the same journal.  So each
- * type gets its own log header, for which we need to reserve a block.
- * In fact, each type has the potential for needing more than one header 
- * in cases where we have more buffers than will fit on a journal page.
+ * We can have metadata blocks and jdata blocks in the same journal.  Each
+ * type gets its own log descriptor, for which we need to reserve a block.
+ * In fact, each type has the potential for needing more than one log descriptor
+ * in cases where we have more blocks than will fit in a log descriptor.
  * Metadata journal entries take up half the space of journaled buffer entries.
- * Thus, metadata entries have buf_limit (502) and journaled buffers have
- * databuf_limit (251) before they cause a wrap around.
  *
  * Also, we need to reserve blocks for revoke journal entries and one for an
  * overall header for the lot.
@@ -1008,7 +1006,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 	if (sdp->sd_log_head != sdp->sd_log_flush_head) {
 		log_flush_wait(sdp);
 		log_write_header(sdp, flags);
-	} else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
+	} else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle) {
 		atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
 		trace_gfs2_log_blocks(sdp, -1);
 		log_write_header(sdp, flags);
-- 
cgit v1.2.3-70-g09d2


From 625a8edd5e00e7c4c70a125a433ec7598d9f0c27 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sun, 6 Dec 2020 20:10:51 +0100
Subject: gfs2: Minor debugging improvement

Split the assert in gfs2_trans_end into two parts.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/trans.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 6d4bf7ea7b3b..7705f04621f4 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -109,8 +109,8 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
 	nbuf -= tr->tr_num_buf_rm;
 	nbuf -= tr->tr_num_databuf_rm;
 
-	if (gfs2_assert_withdraw(sdp, (nbuf <= tr->tr_blocks) &&
-				       (tr->tr_num_revoke <= tr->tr_revokes)))
+	if (gfs2_assert_withdraw(sdp, nbuf <= tr->tr_blocks) ||
+	    gfs2_assert_withdraw(sdp, tr->tr_num_revoke <= tr->tr_revokes))
 		gfs2_print_trans(sdp, tr);
 
 	gfs2_log_commit(sdp, tr);
-- 
cgit v1.2.3-70-g09d2


From e7501bf88cd77ed3a1bc65c451600a847c80485b Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sat, 19 Dec 2020 03:11:51 +0100
Subject: gfs2: Rename gfs2_{write => flush}_revokes

Function gfs2_write_revokes doesn't actually write any revokes; instead, it
adds revokes to the system transaction during a flush.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/log.c  | 4 ++--
 fs/gfs2/log.h  | 2 +-
 fs/gfs2/lops.c | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 12e8280f0806..7375c007bde5 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -696,7 +696,7 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl)
 }
 
 /**
- * gfs2_write_revokes - Add as many revokes to the system transaction as we can
+ * gfs2_flush_revokes - Add as many revokes to the system transaction as we can
  * @sdp: The GFS2 superblock
  *
  * Our usual strategy is to defer writing revokes as much as we can in the hope
@@ -707,7 +707,7 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl)
  * been written back.  This will basically come at no cost now, and will save
  * us from having to keep track of those blocks on the AIL2 list later.
  */
-void gfs2_write_revokes(struct gfs2_sbd *sdp)
+void gfs2_flush_revokes(struct gfs2_sbd *sdp)
 {
 	/* number of revokes we still have room for */
 	unsigned int max_revokes;
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 79f97290146e..a9cdbc990edf 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -78,6 +78,6 @@ extern void log_flush_wait(struct gfs2_sbd *sdp);
 extern int gfs2_logd(void *data);
 extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
 extern void gfs2_glock_remove_revoke(struct gfs2_glock *gl);
-extern void gfs2_write_revokes(struct gfs2_sbd *sdp);
+extern void gfs2_flush_revokes(struct gfs2_sbd *sdp);
 
 #endif /* __LOG_DOT_H__ */
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 3922b26264f5..8658ebbcb4a9 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -845,7 +845,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 	struct page *page;
 	unsigned int length;
 
-	gfs2_write_revokes(sdp);
+	gfs2_flush_revokes(sdp);
 	if (!sdp->sd_log_num_revoke)
 		return;
 
-- 
cgit v1.2.3-70-g09d2


From 6e80674af06c0dca6e0153d492d437f3f07fe3e9 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Fri, 11 Dec 2020 14:45:23 +0100
Subject: gfs2: Clean up ail2_empty

Clean up the logic in ail2_empty (no functional change).

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/log.c | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 7375c007bde5..e4dc23a24569 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -367,29 +367,33 @@ static void gfs2_ail_empty_tr(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
 	}
 }
 
+static void __ail2_empty(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
+{
+	gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
+	list_del(&tr->tr_list);
+	gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
+	gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
+	gfs2_trans_free(sdp, tr);
+}
+
 static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
 {
-	struct gfs2_trans *tr, *safe;
+	struct list_head *ail2_list = &sdp->sd_ail2_list;
 	unsigned int old_tail = sdp->sd_log_tail;
-	int wrap = (new_tail < old_tail);
-	int a, b, rm;
+	struct gfs2_trans *tr, *safe;
 
 	spin_lock(&sdp->sd_ail_lock);
-
-	list_for_each_entry_safe(tr, safe, &sdp->sd_ail2_list, tr_list) {
-		a = (old_tail <= tr->tr_first);
-		b = (tr->tr_first < new_tail);
-		rm = (wrap) ? (a || b) : (a && b);
-		if (!rm)
-			continue;
-
-		gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
-		list_del(&tr->tr_list);
-		gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
-		gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
-		gfs2_trans_free(sdp, tr);
+	if (old_tail <= new_tail) {
+		list_for_each_entry_safe(tr, safe, ail2_list, tr_list) {
+			if (old_tail <= tr->tr_first && tr->tr_first < new_tail)
+				__ail2_empty(sdp, tr);
+		}
+	} else {
+		list_for_each_entry_safe(tr, safe, ail2_list, tr_list) {
+			if (old_tail <= tr->tr_first || tr->tr_first < new_tail)
+				__ail2_empty(sdp, tr);
+		}
 	}
-
 	spin_unlock(&sdp->sd_ail_lock);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 15e20a301ab06575482c7ab3b442a6830cec928e Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Wed, 3 Feb 2021 16:15:27 +0100
Subject: gfs2: Use sb_start_intwrite in gfs2_ail_empty_gl

Commit 2e60d7683c8d ("GFS2: update freeze code to use freeze/thaw_super
on all nodes") optimized away the sb_start_intwrite ... sb_end_intwrite
protection for the on-stack transactions in gfs2_ail_empty_gl with no
explanation.  I can't think of a valid reason for doing that, so revert
that change.  This simplifies the next commit.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glops.c | 5 ++++-
 fs/gfs2/trans.c | 8 +++-----
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 3faa421568b0..fd1f52fff170 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -127,9 +127,12 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl)
          * on the stack */
 	tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes);
 	tr.tr_ip = _RET_IP_;
+	sb_start_intwrite(sdp->sd_vfs);
 	ret = gfs2_log_reserve(sdp, tr.tr_reserved);
-	if (ret < 0)
+	if (ret < 0) {
+		sb_end_intwrite(sdp->sd_vfs);
 		return ret;
+	}
 	WARN_ON_ONCE(current->journal_info);
 	current->journal_info = &tr;
 
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 7705f04621f4..ae040b570868 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -98,10 +98,9 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
 
 	if (!test_bit(TR_TOUCHED, &tr->tr_flags)) {
 		gfs2_log_release(sdp, tr->tr_reserved);
-		if (alloced) {
+		if (alloced)
 			gfs2_trans_free(sdp, tr);
-			sb_end_intwrite(sdp->sd_vfs);
-		}
+		sb_end_intwrite(sdp->sd_vfs);
 		return;
 	}
 
@@ -121,8 +120,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
 	if (sdp->sd_vfs->s_flags & SB_SYNCHRONOUS)
 		gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
 			       GFS2_LFC_TRANS_END);
-	if (alloced)
-		sb_end_intwrite(sdp->sd_vfs);
+	sb_end_intwrite(sdp->sd_vfs);
 }
 
 static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
-- 
cgit v1.2.3-70-g09d2


From c968f5788bc91fe4f86df1a68f0d6471396b4d78 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Fri, 29 Jan 2021 16:45:33 +0100
Subject: gfs2: Clean up on-stack transactions

Replace the TR_ALLOCED flag by its inverse, TR_ONSTACK: that way, the flag only
needs to be set in the exceptional case of on-stack transactions.  Split off
__gfs2_trans_begin from gfs2_trans_begin and use it to replace the open-coded
version in gfs2_ail_empty_gl.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glops.c  | 33 ++++++++++-----------------------
 fs/gfs2/incore.h |  2 +-
 fs/gfs2/log.c    |  2 +-
 fs/gfs2/trans.c  | 40 +++++++++++++++++++++++-----------------
 fs/gfs2/trans.h  |  3 +++
 5 files changed, 38 insertions(+), 42 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index fd1f52fff170..a067924341e3 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -86,16 +86,12 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl)
 {
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 	struct gfs2_trans tr;
+	unsigned int revokes;
 	int ret;
 
-	memset(&tr, 0, sizeof(tr));
-	INIT_LIST_HEAD(&tr.tr_buf);
-	INIT_LIST_HEAD(&tr.tr_databuf);
-	INIT_LIST_HEAD(&tr.tr_ail1_list);
-	INIT_LIST_HEAD(&tr.tr_ail2_list);
-	tr.tr_revokes = atomic_read(&gl->gl_ail_count);
+	revokes = atomic_read(&gl->gl_ail_count);
 
-	if (!tr.tr_revokes) {
+	if (!revokes) {
 		bool have_revokes;
 		bool log_in_flight;
 
@@ -122,23 +118,14 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl)
 		return 0;
 	}
 
-	/* A shortened, inline version of gfs2_trans_begin()
-         * tr->alloced is not set since the transaction structure is
-         * on the stack */
-	tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes);
-	tr.tr_ip = _RET_IP_;
-	sb_start_intwrite(sdp->sd_vfs);
-	ret = gfs2_log_reserve(sdp, tr.tr_reserved);
-	if (ret < 0) {
-		sb_end_intwrite(sdp->sd_vfs);
-		return ret;
-	}
-	WARN_ON_ONCE(current->journal_info);
-	current->journal_info = &tr;
-
-	__gfs2_ail_flush(gl, 0, tr.tr_revokes);
-
+	memset(&tr, 0, sizeof(tr));
+	set_bit(TR_ONSTACK, &tr.tr_flags);
+	ret = __gfs2_trans_begin(&tr, sdp, 0, revokes, _RET_IP_);
+	if (ret)
+		goto flush;
+	__gfs2_ail_flush(gl, 0, revokes);
 	gfs2_trans_end(sdp);
+
 flush:
 	gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
 		       GFS2_LFC_AIL_EMPTY_GL);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 8e1ab8ed4abc..c3f6dd378b10 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -490,7 +490,7 @@ struct gfs2_quota_data {
 enum {
 	TR_TOUCHED = 1,
 	TR_ATTACHED = 2,
-	TR_ALLOCED = 3,
+	TR_ONSTACK = 3,
 };
 
 struct gfs2_trans {
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index e4dc23a24569..0fceb60907a2 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -1114,7 +1114,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 	if (sdp->sd_log_tr) {
 		gfs2_merge_trans(sdp, tr);
 	} else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) {
-		gfs2_assert_withdraw(sdp, test_bit(TR_ALLOCED, &tr->tr_flags));
+		gfs2_assert_withdraw(sdp, !test_bit(TR_ONSTACK, &tr->tr_flags));
 		sdp->sd_log_tr = tr;
 		set_bit(TR_ATTACHED, &tr->tr_flags);
 	}
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index ae040b570868..db29ca253853 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -37,10 +37,10 @@ static void gfs2_print_trans(struct gfs2_sbd *sdp, const struct gfs2_trans *tr)
 		tr->tr_num_revoke, tr->tr_num_revoke_rm);
 }
 
-int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
-		     unsigned int revokes)
+int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
+		       unsigned int blocks, unsigned int revokes,
+		       unsigned long ip)
 {
-	struct gfs2_trans *tr;
 	int error;
 
 	if (current->journal_info) {
@@ -52,15 +52,10 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
 	if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
 		return -EROFS;
 
-	tr = kmem_cache_zalloc(gfs2_trans_cachep, GFP_NOFS);
-	if (!tr)
-		return -ENOMEM;
-
-	tr->tr_ip = _RET_IP_;
+	tr->tr_ip = ip;
 	tr->tr_blocks = blocks;
 	tr->tr_revokes = revokes;
 	tr->tr_reserved = 1;
-	set_bit(TR_ALLOCED, &tr->tr_flags);
 	if (blocks)
 		tr->tr_reserved += 6 + blocks;
 	if (revokes)
@@ -74,17 +69,28 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
 	sb_start_intwrite(sdp->sd_vfs);
 
 	error = gfs2_log_reserve(sdp, tr->tr_reserved);
-	if (error)
-		goto fail;
+	if (error) {
+		sb_end_intwrite(sdp->sd_vfs);
+		return error;
+	}
 
 	current->journal_info = tr;
 
 	return 0;
+}
 
-fail:
-	sb_end_intwrite(sdp->sd_vfs);
-	kmem_cache_free(gfs2_trans_cachep, tr);
+int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
+		     unsigned int revokes)
+{
+	struct gfs2_trans *tr;
+	int error;
 
+	tr = kmem_cache_zalloc(gfs2_trans_cachep, GFP_NOFS);
+	if (!tr)
+		return -ENOMEM;
+	error = __gfs2_trans_begin(tr, sdp, blocks, revokes, _RET_IP_);
+	if (error)
+		kmem_cache_free(gfs2_trans_cachep, tr);
 	return error;
 }
 
@@ -92,13 +98,12 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
 {
 	struct gfs2_trans *tr = current->journal_info;
 	s64 nbuf;
-	int alloced = test_bit(TR_ALLOCED, &tr->tr_flags);
 
 	current->journal_info = NULL;
 
 	if (!test_bit(TR_TOUCHED, &tr->tr_flags)) {
 		gfs2_log_release(sdp, tr->tr_reserved);
-		if (alloced)
+		if (!test_bit(TR_ONSTACK, &tr->tr_flags))
 			gfs2_trans_free(sdp, tr);
 		sb_end_intwrite(sdp->sd_vfs);
 		return;
@@ -113,7 +118,8 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
 		gfs2_print_trans(sdp, tr);
 
 	gfs2_log_commit(sdp, tr);
-	if (alloced && !test_bit(TR_ATTACHED, &tr->tr_flags))
+	if (!test_bit(TR_ONSTACK, &tr->tr_flags) &&
+	    !test_bit(TR_ATTACHED, &tr->tr_flags))
 		gfs2_trans_free(sdp, tr);
 	up_read(&sdp->sd_log_flush_lock);
 
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index 83199ce5a5c5..55f253015cf8 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -34,6 +34,9 @@ static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned
 	return rgd->rd_length;
 }
 
+extern int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
+			      unsigned int blocks, unsigned int revokes,
+			      unsigned long ip);
 extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
 			    unsigned int revokes);
 
-- 
cgit v1.2.3-70-g09d2


From f3708fb59f6c2498e8ec4f29010375f600b68642 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sun, 13 Dec 2020 09:21:34 +0100
Subject: gfs2: Get rid of sd_reserving_log

This counter and the associated wait queue are only used so that
gfs2_make_fs_ro can efficiently wait for all pending log space
allocations to fail after setting the filesystem to read-only.  This
comes at the cost of waking up that wait queue very frequently.

Instead, when gfs2_log_reserve fails because the filesystem has become
read-only, Wake up sd_log_waitq.  In gfs2_make_fs_ro, set the file
system read-only and then wait until all the log space has been
released.  Give up and report the problem after a while.  With that,
sd_reserving_log and sd_reserving_log_wait can be removed.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/incore.h     |  3 ---
 fs/gfs2/log.c        | 17 ++++++++++-------
 fs/gfs2/log.h        |  1 +
 fs/gfs2/ops_fstype.c |  2 --
 fs/gfs2/super.c      | 12 ++++++------
 fs/gfs2/trans.c      |  2 ++
 6 files changed, 19 insertions(+), 18 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index c3f6dd378b10..8f8676cf72ed 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -849,9 +849,6 @@ struct gfs2_sbd {
 	int sd_log_error; /* First log error */
 	wait_queue_head_t sd_withdraw_wait;
 
-	atomic_t sd_reserving_log;
-	wait_queue_head_t sd_reserving_log_wait;
-
 	unsigned int sd_log_flush_head;
 
 	spinlock_t sd_ail_lock;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 0fceb60907a2..0da05492e8b8 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -397,6 +397,15 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
 	spin_unlock(&sdp->sd_ail_lock);
 }
 
+/**
+ * gfs2_log_is_empty - Check if the log is empty
+ * @sdp: The GFS2 superblock
+ */
+
+bool gfs2_log_is_empty(struct gfs2_sbd *sdp) {
+	return atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks;
+}
+
 /**
  * gfs2_log_release - Release a given number of log blocks
  * @sdp: The GFS2 superblock
@@ -461,13 +470,9 @@ retry:
 		} while(free_blocks <= wanted);
 		finish_wait(&sdp->sd_log_waitq, &wait);
 	}
-	atomic_inc(&sdp->sd_reserving_log);
 	if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks,
-				free_blocks - blks) != free_blocks) {
-		if (atomic_dec_and_test(&sdp->sd_reserving_log))
-			wake_up(&sdp->sd_reserving_log_wait);
+				free_blocks - blks) != free_blocks)
 		goto retry;
-	}
 	atomic_sub(blks, &sdp->sd_log_blks_needed);
 	trace_gfs2_log_blocks(sdp, -blks);
 
@@ -483,8 +488,6 @@ retry:
 		gfs2_log_release(sdp, blks);
 		ret = -EROFS;
 	}
-	if (atomic_dec_and_test(&sdp->sd_reserving_log))
-		wake_up(&sdp->sd_reserving_log_wait);
 	return ret;
 }
 
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index a9cdbc990edf..16efbe614279 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -64,6 +64,7 @@ static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
 extern void gfs2_ordered_del_inode(struct gfs2_inode *ip);
 extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct);
 extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
+extern bool gfs2_log_is_empty(struct gfs2_sbd *sdp);
 extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
 extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
 extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 61fce59cb4d3..986dc2ebebf0 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -136,8 +136,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 
 	init_rwsem(&sdp->sd_log_flush_lock);
 	atomic_set(&sdp->sd_log_in_flight, 0);
-	atomic_set(&sdp->sd_reserving_log, 0);
-	init_waitqueue_head(&sdp->sd_reserving_log_wait);
 	init_waitqueue_head(&sdp->sd_log_flush_wait);
 	atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
 	mutex_init(&sdp->sd_freeze_mutex);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index ed7a829e9ffe..f188277f7d48 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -645,13 +645,13 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 
 		gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
 			       GFS2_LFC_MAKE_FS_RO);
-		wait_event(sdp->sd_reserving_log_wait,
-			   atomic_read(&sdp->sd_reserving_log) == 0);
-		gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) ==
-				 sdp->sd_jdesc->jd_blocks);
+		wait_event_timeout(sdp->sd_log_waitq,
+				   gfs2_log_is_empty(sdp),
+				   HZ * 5);
+		gfs2_assert_warn(sdp, gfs2_log_is_empty(sdp));
 	} else {
-		wait_event_timeout(sdp->sd_reserving_log_wait,
-				   atomic_read(&sdp->sd_reserving_log) == 0,
+		wait_event_timeout(sdp->sd_log_waitq,
+				   gfs2_log_is_empty(sdp),
 				   HZ * 5);
 	}
 	if (gfs2_holder_initialized(&freeze_gh))
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index db29ca253853..aefe450e009e 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -71,6 +71,8 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
 	error = gfs2_log_reserve(sdp, tr->tr_reserved);
 	if (error) {
 		sb_end_intwrite(sdp->sd_vfs);
+		if (error == -EROFS)
+			wake_up(&sdp->sd_log_waitq);
 		return error;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From c1eba1b0bca59316f34aa6f70fe5004abba8082d Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sat, 12 Dec 2020 23:30:22 +0100
Subject: gfs2: Move lock flush locking to gfs2_trans_{begin,end}

Move the read locking of sd_log_flush_lock from gfs2_log_reserve to
gfs2_trans_begin, and its unlocking from gfs2_log_release to
gfs2_trans_end.  Use gfs2_log_release in two places in which it was open
coded before.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/log.c   | 28 +++-------------------------
 fs/gfs2/log.h   |  2 +-
 fs/gfs2/trans.c | 19 ++++++++++++-------
 3 files changed, 16 insertions(+), 33 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 0da05492e8b8..e7183c84ffc0 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -420,7 +420,6 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
 	trace_gfs2_log_blocks(sdp, blks);
 	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
 				  sdp->sd_jdesc->jd_blocks);
-	up_read(&sdp->sd_log_flush_lock);
 }
 
 /**
@@ -439,22 +438,16 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
  * with queued waiters, we use an exclusive wait. This means that when we
  * get woken with enough journal space to get our reservation, we need to
  * wake the next waiter on the list.
- *
- * Returns: errno
  */
 
-int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
+void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
 {
-	int ret = 0;
 	unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize);
 	unsigned wanted = blks + reserved_blks;
 	DEFINE_WAIT(wait);
 	int did_wait = 0;
 	unsigned int free_blocks;
 
-	if (gfs2_assert_warn(sdp, blks) ||
-	    gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
-		return -EINVAL;
 	atomic_add(blks, &sdp->sd_log_blks_needed);
 retry:
 	free_blocks = atomic_read(&sdp->sd_log_blks_free);
@@ -482,13 +475,6 @@ retry:
 	 */
 	if (unlikely(did_wait))
 		wake_up(&sdp->sd_log_waitq);
-
-	down_read(&sdp->sd_log_flush_lock);
-	if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
-		gfs2_log_release(sdp, blks);
-		ret = -EROFS;
-	}
-	return ret;
 }
 
 /**
@@ -585,12 +571,7 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
 	unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
 
 	ail2_empty(sdp, new_tail);
-
-	atomic_add(dist, &sdp->sd_log_blks_free);
-	trace_gfs2_log_blocks(sdp, dist);
-	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
-			     sdp->sd_jdesc->jd_blocks);
-
+	gfs2_log_release(sdp, dist);
 	sdp->sd_log_tail = new_tail;
 }
 
@@ -1127,10 +1108,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 	maxres = sdp->sd_log_blks_reserved + tr->tr_reserved;
 	gfs2_assert_withdraw(sdp, maxres >= reserved);
 	unused = maxres - reserved;
-	atomic_add(unused, &sdp->sd_log_blks_free);
-	trace_gfs2_log_blocks(sdp, unused);
-	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
-			     sdp->sd_jdesc->jd_blocks);
+	gfs2_log_release(sdp, unused);
 	sdp->sd_log_blks_reserved = reserved;
 
 	gfs2_log_unlock(sdp);
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 16efbe614279..cbc097ca9244 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -66,7 +66,7 @@ extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct);
 extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
 extern bool gfs2_log_is_empty(struct gfs2_sbd *sdp);
 extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
-extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
+extern void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
 extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
 				  u64 seq, u32 tail, u32 lblock, u32 flags,
 				  int op_flags);
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index aefe450e009e..2269aa7ad69d 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -41,8 +41,6 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
 		       unsigned int blocks, unsigned int revokes,
 		       unsigned long ip)
 {
-	int error;
-
 	if (current->journal_info) {
 		gfs2_print_trans(sdp, current->journal_info);
 		BUG();
@@ -66,14 +64,20 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
 	INIT_LIST_HEAD(&tr->tr_ail1_list);
 	INIT_LIST_HEAD(&tr->tr_ail2_list);
 
+	if (gfs2_assert_warn(sdp, tr->tr_reserved <= sdp->sd_jdesc->jd_blocks))
+		return -EINVAL;
+
 	sb_start_intwrite(sdp->sd_vfs);
 
-	error = gfs2_log_reserve(sdp, tr->tr_reserved);
-	if (error) {
+	gfs2_log_reserve(sdp, tr->tr_reserved);
+
+	down_read(&sdp->sd_log_flush_lock);
+	if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
+		gfs2_log_release(sdp, tr->tr_reserved);
+		up_read(&sdp->sd_log_flush_lock);
 		sb_end_intwrite(sdp->sd_vfs);
-		if (error == -EROFS)
-			wake_up(&sdp->sd_log_waitq);
-		return error;
+		wake_up(&sdp->sd_log_waitq);
+		return -EROFS;
 	}
 
 	current->journal_info = tr;
@@ -105,6 +109,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
 
 	if (!test_bit(TR_TOUCHED, &tr->tr_flags)) {
 		gfs2_log_release(sdp, tr->tr_reserved);
+		up_read(&sdp->sd_log_flush_lock);
 		if (!test_bit(TR_ONSTACK, &tr->tr_flags))
 			gfs2_trans_free(sdp, tr);
 		sb_end_intwrite(sdp->sd_vfs);
-- 
cgit v1.2.3-70-g09d2


From 4a3d049db42b42a36ae84eb8b59d2f5119737253 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Fri, 11 Dec 2020 01:36:25 +0100
Subject: gfs2: Don't wait for journal flush in clean_journal

Commit 588bff95c94e added gfs2_write_log_header() and started using it in
clean_journal(), with an additional call to log_flush_wait() at the end of
gfs2_write_log_header() which is unnecessary for clean_journal().  Move
that call out of gfs2_write_log_header() to restore the previous behavior.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/log.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index e7183c84ffc0..cff95889b6f4 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -757,7 +757,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
 	u64 dblock;
 
 	if (gfs2_withdrawn(sdp))
-		goto out;
+		return;
 
 	page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
 	lh = page_address(page);
@@ -812,8 +812,6 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
 
 	gfs2_log_write(sdp, page, sb->s_blocksize, 0, dblock);
 	gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE | op_flags);
-out:
-	log_flush_wait(sdp);
 }
 
 /**
@@ -842,6 +840,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
 	gfs2_write_log_header(sdp, sdp->sd_jdesc, sdp->sd_log_sequence++, tail,
 			      sdp->sd_log_flush_head, flags, op_flags);
 	gfs2_log_incr_head(sdp);
+	log_flush_wait(sdp);
 
 	if (sdp->sd_log_tail != tail)
 		log_pull_tail(sdp, tail);
-- 
cgit v1.2.3-70-g09d2


From 5ae8fff8d031b5728f4c0e36e971bba42bb78bea Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sun, 13 Dec 2020 11:37:17 +0100
Subject: gfs2: Clean up gfs2_log_reserve

Wake up log waiters in gfs2_log_release when log space has actually become
available.  This is a much better place for the wakeup than gfs2_logd.

Check if enough log space is immeditely available before anything else.  If
there isn't, use io_wait_event to wait instead of open-coding it.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/log.c   | 58 +++++++++++++++++++++++++--------------------------------
 fs/gfs2/trans.c |  3 +--
 2 files changed, 26 insertions(+), 35 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index cff95889b6f4..ca9fa481913d 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -415,11 +415,12 @@ bool gfs2_log_is_empty(struct gfs2_sbd *sdp) {
 
 void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
 {
-
 	atomic_add(blks, &sdp->sd_log_blks_free);
 	trace_gfs2_log_blocks(sdp, blks);
 	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
 				  sdp->sd_jdesc->jd_blocks);
+	if (atomic_read(&sdp->sd_log_blks_needed))
+		wake_up(&sdp->sd_log_waitq);
 }
 
 /**
@@ -444,36 +445,33 @@ void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
 {
 	unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize);
 	unsigned wanted = blks + reserved_blks;
-	DEFINE_WAIT(wait);
-	int did_wait = 0;
 	unsigned int free_blocks;
 
-	atomic_add(blks, &sdp->sd_log_blks_needed);
-retry:
 	free_blocks = atomic_read(&sdp->sd_log_blks_free);
-	if (unlikely(free_blocks <= wanted)) {
-		do {
-			prepare_to_wait_exclusive(&sdp->sd_log_waitq, &wait,
-					TASK_UNINTERRUPTIBLE);
+	while (free_blocks >= wanted) {
+		if (atomic_try_cmpxchg(&sdp->sd_log_blks_free, &free_blocks,
+				       free_blocks - blks))
+			return;
+	}
+
+	atomic_add(blks, &sdp->sd_log_blks_needed);
+	for (;;) {
+		if (current != sdp->sd_logd_process)
 			wake_up(&sdp->sd_logd_waitq);
-			did_wait = 1;
-			if (atomic_read(&sdp->sd_log_blks_free) <= wanted)
-				io_schedule();
-			free_blocks = atomic_read(&sdp->sd_log_blks_free);
-		} while(free_blocks <= wanted);
-		finish_wait(&sdp->sd_log_waitq, &wait);
+		io_wait_event(sdp->sd_log_waitq,
+			(free_blocks = atomic_read(&sdp->sd_log_blks_free),
+			 free_blocks >= wanted));
+		do {
+			if (atomic_try_cmpxchg(&sdp->sd_log_blks_free,
+					       &free_blocks,
+					       free_blocks - blks))
+				goto reserved;
+		} while (free_blocks >= wanted);
 	}
-	if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks,
-				free_blocks - blks) != free_blocks)
-		goto retry;
-	atomic_sub(blks, &sdp->sd_log_blks_needed);
-	trace_gfs2_log_blocks(sdp, -blks);
 
-	/*
-	 * If we waited, then so might others, wake them up _after_ we get
-	 * our share of the log.
-	 */
-	if (unlikely(did_wait))
+reserved:
+	trace_gfs2_log_blocks(sdp, -blks);
+	if (atomic_sub_return(blks, &sdp->sd_log_blks_needed))
 		wake_up(&sdp->sd_log_waitq);
 }
 
@@ -1107,7 +1105,8 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 	maxres = sdp->sd_log_blks_reserved + tr->tr_reserved;
 	gfs2_assert_withdraw(sdp, maxres >= reserved);
 	unused = maxres - reserved;
-	gfs2_log_release(sdp, unused);
+	if (unused)
+		gfs2_log_release(sdp, unused);
 	sdp->sd_log_blks_reserved = reserved;
 
 	gfs2_log_unlock(sdp);
@@ -1192,7 +1191,6 @@ int gfs2_logd(void *data)
 	struct gfs2_sbd *sdp = data;
 	unsigned long t = 1;
 	DEFINE_WAIT(wait);
-	bool did_flush;
 
 	while (!kthread_should_stop()) {
 
@@ -1211,12 +1209,10 @@ int gfs2_logd(void *data)
 			continue;
 		}
 
-		did_flush = false;
 		if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
 			gfs2_ail1_empty(sdp, 0);
 			gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
 				       GFS2_LFC_LOGD_JFLUSH_REQD);
-			did_flush = true;
 		}
 
 		if (gfs2_ail_flush_reqd(sdp)) {
@@ -1225,12 +1221,8 @@ int gfs2_logd(void *data)
 			gfs2_ail1_empty(sdp, 0);
 			gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
 				       GFS2_LFC_LOGD_AIL_FLUSH_REQD);
-			did_flush = true;
 		}
 
-		if (!gfs2_ail_flush_reqd(sdp) || did_flush)
-			wake_up(&sdp->sd_log_waitq);
-
 		t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
 
 		try_to_freeze();
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 2269aa7ad69d..cac93b2004cf 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -73,10 +73,9 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
 
 	down_read(&sdp->sd_log_flush_lock);
 	if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
-		gfs2_log_release(sdp, tr->tr_reserved);
 		up_read(&sdp->sd_log_flush_lock);
+		gfs2_log_release(sdp, tr->tr_reserved);
 		sb_end_intwrite(sdp->sd_vfs);
-		wake_up(&sdp->sd_log_waitq);
 		return -EROFS;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 297de3180dd7ecbb3798f32e58691168587a8f85 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sun, 6 Dec 2020 14:04:36 +0100
Subject: gfs2: Use a tighter bound in gfs2_trans_begin

Use a tighter bound for the number of blocks required by transactions in
gfs2_trans_begin: in the worst case, we'll have mixed data and metadata,
so we'll need a log desciptor for each type.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/trans.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index cac93b2004cf..f73d6b8f3b53 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -54,8 +54,14 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
 	tr->tr_blocks = blocks;
 	tr->tr_revokes = revokes;
 	tr->tr_reserved = 1;
-	if (blocks)
-		tr->tr_reserved += 6 + blocks;
+	if (blocks) {
+		/*
+		 * The reserved blocks are either used for data or metadata.
+		 * We can have mixed data and metadata, each with its own log
+		 * descriptor block; see calc_reserved().
+		 */
+		tr->tr_reserved += blocks + 1 + DIV_ROUND_UP(blocks - 1, databuf_limit(sdp));
+	}
 	if (revokes)
 		tr->tr_reserved += gfs2_struct2blk(sdp, revokes);
 	INIT_LIST_HEAD(&tr->tr_databuf);
-- 
cgit v1.2.3-70-g09d2


From 5cb738b5fbd2f3ebe9dec0e428577a4f2128adbe Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sat, 19 Dec 2020 07:15:17 +0100
Subject: gfs2: Get rid of current_tail()

Keep the current value of the updated log tail in the super block as
sb_log_flush_tail instead of computing it on the fly.  This avoids
unnecessary sd_ail_lock taking and cleans up the code.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/incore.h |  5 ++--
 fs/gfs2/log.c    | 72 ++++++++++++++++++++++++++++++--------------------------
 fs/gfs2/log.h    |  4 +++-
 3 files changed, 44 insertions(+), 37 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 8f8676cf72ed..51656b053170 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -838,8 +838,6 @@ struct gfs2_sbd {
 	wait_queue_head_t sd_logd_waitq;
 
 	u64 sd_log_sequence;
-	unsigned int sd_log_head;
-	unsigned int sd_log_tail;
 	int sd_log_idle;
 
 	struct rw_semaphore sd_log_flush_lock;
@@ -849,6 +847,9 @@ struct gfs2_sbd {
 	int sd_log_error; /* First log error */
 	wait_queue_head_t sd_withdraw_wait;
 
+	unsigned int sd_log_tail;
+	unsigned int sd_log_flush_tail;
+	unsigned int sd_log_head;
 	unsigned int sd_log_flush_head;
 
 	spinlock_t sd_ail_lock;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index ca9fa481913d..6b5584fd8ff7 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -242,6 +242,28 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
 	return gfs2_ail1_flush(sdp, &wbc);
 }
 
+static void gfs2_log_update_flush_tail(struct gfs2_sbd *sdp)
+{
+	unsigned int new_flush_tail = sdp->sd_log_head;
+	struct gfs2_trans *tr;
+
+	if (!list_empty(&sdp->sd_ail1_list)) {
+		tr = list_last_entry(&sdp->sd_ail1_list,
+				     struct gfs2_trans, tr_list);
+		new_flush_tail = tr->tr_first;
+	}
+	sdp->sd_log_flush_tail = new_flush_tail;
+}
+
+static void gfs2_log_update_head(struct gfs2_sbd *sdp)
+{
+	unsigned int new_head = sdp->sd_log_flush_head;
+
+	if (sdp->sd_log_flush_tail == sdp->sd_log_head)
+		sdp->sd_log_flush_tail = new_head;
+	sdp->sd_log_head = new_head;
+}
+
 /**
  * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
  * @sdp: the filesystem
@@ -317,6 +339,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes)
 		else
 			oldest_tr = 0;
 	}
+	gfs2_log_update_flush_tail(sdp);
 	ret = list_empty(&sdp->sd_ail1_list);
 	spin_unlock(&sdp->sd_ail_lock);
 
@@ -544,30 +567,14 @@ static unsigned int calc_reserved(struct gfs2_sbd *sdp)
 	return reserved;
 }
 
-static unsigned int current_tail(struct gfs2_sbd *sdp)
-{
-	struct gfs2_trans *tr;
-	unsigned int tail;
-
-	spin_lock(&sdp->sd_ail_lock);
-
-	if (list_empty(&sdp->sd_ail1_list)) {
-		tail = sdp->sd_log_head;
-	} else {
-		tr = list_last_entry(&sdp->sd_ail1_list, struct gfs2_trans,
-				tr_list);
-		tail = tr->tr_first;
-	}
-
-	spin_unlock(&sdp->sd_ail_lock);
-
-	return tail;
-}
-
-static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
+static void log_pull_tail(struct gfs2_sbd *sdp)
 {
-	unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
+	unsigned int new_tail = sdp->sd_log_flush_tail;
+	unsigned int dist;
 
+	if (new_tail == sdp->sd_log_tail)
+		return;
+	dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
 	ail2_empty(sdp, new_tail);
 	gfs2_log_release(sdp, dist);
 	sdp->sd_log_tail = new_tail;
@@ -822,26 +829,23 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
 
 static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
 {
-	unsigned int tail;
 	int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC;
 	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
 
 	gfs2_assert_withdraw(sdp, (state != SFS_FROZEN));
-	tail = current_tail(sdp);
 
 	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
 		gfs2_ordered_wait(sdp);
 		log_flush_wait(sdp);
 		op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
 	}
-	sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
-	gfs2_write_log_header(sdp, sdp->sd_jdesc, sdp->sd_log_sequence++, tail,
-			      sdp->sd_log_flush_head, flags, op_flags);
+	sdp->sd_log_idle = (sdp->sd_log_flush_tail == sdp->sd_log_flush_head);
+	gfs2_write_log_header(sdp, sdp->sd_jdesc, sdp->sd_log_sequence++,
+			      sdp->sd_log_flush_tail, sdp->sd_log_flush_head,
+			      flags, op_flags);
 	gfs2_log_incr_head(sdp);
 	log_flush_wait(sdp);
-
-	if (sdp->sd_log_tail != tail)
-		log_pull_tail(sdp, tail);
+	log_pull_tail(sdp);
 }
 
 /**
@@ -991,7 +995,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 	if (sdp->sd_log_head != sdp->sd_log_flush_head) {
 		log_flush_wait(sdp);
 		log_write_header(sdp, flags);
-	} else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle) {
+	} else if (sdp->sd_log_tail != sdp->sd_log_flush_tail && !sdp->sd_log_idle) {
 		atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
 		trace_gfs2_log_blocks(sdp, -1);
 		log_write_header(sdp, flags);
@@ -1001,7 +1005,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 	lops_after_commit(sdp, tr);
 
 	gfs2_log_lock(sdp);
-	sdp->sd_log_head = sdp->sd_log_flush_head;
+	gfs2_log_update_head(sdp);
 	sdp->sd_log_blks_reserved = 0;
 	sdp->sd_log_committed_revoke = 0;
 
@@ -1021,7 +1025,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 			atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
 			trace_gfs2_log_blocks(sdp, -1);
 			log_write_header(sdp, flags);
-			sdp->sd_log_head = sdp->sd_log_flush_head;
+			gfs2_log_update_head(sdp);
 		}
 		if (flags & (GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
 			     GFS2_LOG_HEAD_FLUSH_FREEZE))
@@ -1156,7 +1160,7 @@ static void gfs2_log_shutdown(struct gfs2_sbd *sdp)
 	gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
 	gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
 
-	sdp->sd_log_head = sdp->sd_log_flush_head;
+	gfs2_log_update_head(sdp);
 	sdp->sd_log_tail = sdp->sd_log_head;
 }
 
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index cbc097ca9244..b36a3539f352 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -43,7 +43,9 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
 	if (++value == sdp->sd_jdesc->jd_blocks) {
 		value = 0;
 	}
-	sdp->sd_log_head = sdp->sd_log_tail = value;
+	sdp->sd_log_tail = value;
+	sdp->sd_log_flush_tail = value;
+	sdp->sd_log_head = value;
 }
 
 static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
-- 
cgit v1.2.3-70-g09d2


From 76fce6548961a0c6246c4796e71800cdc63d5851 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sat, 19 Dec 2020 10:54:51 +0100
Subject: gfs2: Move function gfs2_ail_empty_tr

Move this function further up in log.c so that we can use it in the next patch.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/log.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 6b5584fd8ff7..6fd4ded1e357 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -264,6 +264,23 @@ static void gfs2_log_update_head(struct gfs2_sbd *sdp)
 	sdp->sd_log_head = new_head;
 }
 
+/**
+ * gfs2_ail_empty_tr - empty one of the ail lists of a transaction
+ */
+
+static void gfs2_ail_empty_tr(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+			      struct list_head *head)
+{
+	struct gfs2_bufdata *bd;
+
+	while (!list_empty(head)) {
+		bd = list_first_entry(head, struct gfs2_bufdata,
+				      bd_ail_st_list);
+		gfs2_assert(sdp, bd->bd_tr == tr);
+		gfs2_remove_from_ail(bd);
+	}
+}
+
 /**
  * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
  * @sdp: the filesystem
@@ -373,23 +390,6 @@ static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
 	spin_unlock(&sdp->sd_ail_lock);
 }
 
-/**
- * gfs2_ail_empty_tr - empty one of the ail lists for a transaction
- */
-
-static void gfs2_ail_empty_tr(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
-			      struct list_head *head)
-{
-	struct gfs2_bufdata *bd;
-
-	while (!list_empty(head)) {
-		bd = list_first_entry(head, struct gfs2_bufdata,
-				      bd_ail_st_list);
-		gfs2_assert(sdp, bd->bd_tr == tr);
-		gfs2_remove_from_ail(bd);
-	}
-}
-
 static void __ail2_empty(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 {
 	gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
-- 
cgit v1.2.3-70-g09d2


From f38e998fbbb5da6a097ecd4b2700ba95eabab0c9 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 5 Oct 2020 19:39:18 +0200
Subject: gfs2: Also reflect single-block allocations in rgd->rd_extfail_pt

Pass a non-NULL minext to gfs2_rbm_find even for single-block allocations.  In
gfs2_rbm_find, also set rgd->rd_extfail_pt when a single-block allocation
fails in a resource group: there is no reason for treating that case
differently.  In gfs2_reservation_check_and_update, only check how many free
blocks we have if more than one block is requested; we already know there's at
least one free block.

In addition, when allocating N blocks fails in gfs2_rbm_find, we need to set
rd_extfail_pt to N - 1 rather than N:  rd_extfail_pt defines the biggest
allocation that might still succeed.

Finally, reset rd_extfail_pt when updating the resource group statistics in
update_rgrp_lvb, as we already do in gfs2_rgrp_bh_get.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/rgrp.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index f878d82750f7..ad14a920f321 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1278,6 +1278,8 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
 	rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
 	rgrp_set_bitmap_flags(rgd);
 	rgd->rd_free_clone = rgd->rd_free;
+	/* max out the rgrp allocation failure point */
+	rgd->rd_extfail_pt = rgd->rd_free;
 	rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
 	rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
 	return 0;
@@ -1676,7 +1678,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
 	 * If we have a minimum extent length, then skip over any extent
 	 * which is less than the min extent length in size.
 	 */
-	if (minext) {
+	if (minext > 1) {
 		extlen = gfs2_free_extlen(rbm, minext);
 		if (extlen <= maxext->len)
 			goto fail;
@@ -1711,7 +1713,7 @@ fail:
  * gfs2_rbm_find - Look for blocks of a particular state
  * @rbm: Value/result starting position and final position
  * @state: The state which we want to find
- * @minext: Pointer to the requested extent length (NULL for a single block)
+ * @minext: Pointer to the requested extent length
  *          This is updated to be the actual reservation size.
  * @ip: If set, check for reservations
  * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
@@ -1767,8 +1769,7 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
 		if (ip == NULL)
 			return 0;
 
-		ret = gfs2_reservation_check_and_update(rbm, ip,
-							minext ? *minext : 0,
+		ret = gfs2_reservation_check_and_update(rbm, ip, *minext,
 							&maxext);
 		if (ret == 0)
 			return 0;
@@ -1800,7 +1801,7 @@ next_iter:
 			break;
 	}
 
-	if (minext == NULL || state != GFS2_BLKST_FREE)
+	if (state != GFS2_BLKST_FREE)
 		return -ENOSPC;
 
 	/* If the extent was too small, and it's smaller than the smallest
@@ -1808,7 +1809,7 @@ next_iter:
 	   useless to search this rgrp again for this amount or more. */
 	if (wrapped && (scan_from_start || rbm->bii > last_bii) &&
 	    *minext < rbm->rgd->rd_extfail_pt)
-		rbm->rgd->rd_extfail_pt = *minext;
+		rbm->rgd->rd_extfail_pt = *minext - 1;
 
 	/* If the maximum extent we found is big enough to fulfill the
 	   minimum requirements, use it anyway. */
@@ -2382,14 +2383,15 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	struct buffer_head *dibh;
 	struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rgd, };
 	u64 block; /* block, within the file system scope */
+	u32 minext = 1;
 	int error;
 
 	gfs2_set_alloc_start(&rbm, ip, dinode);
-	error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false);
+	error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, ip, false);
 
 	if (error == -ENOSPC) {
 		gfs2_set_alloc_start(&rbm, ip, dinode);
-		error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false);
+		error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, NULL, false);
 	}
 
 	/* Since all blocks are reserved in advance, this shouldn't happen */
-- 
cgit v1.2.3-70-g09d2


From 3d39fcd16d885ebb0d9a2e345accb0e5ae13fed9 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Fri, 9 Oct 2020 19:30:16 +0200
Subject: gfs2: Only pass reservation down to gfs2_rbm_find

Only pass the current reservation down to gfs2_rbm_find rather than the entire
inode; we don't need any of the other information.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/rgrp.c | 26 +++++++++++++-------------
 fs/gfs2/rgrp.h |  2 +-
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index ad14a920f321..deb553624b3c 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -79,7 +79,7 @@ static const char valid_change[16] = {
 };
 
 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
-			 const struct gfs2_inode *ip, bool nowrap);
+			 struct gfs2_blkreserv *rs, bool nowrap);
 
 
 /**
@@ -1590,7 +1590,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
 	if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
 		return;
 
-	ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true);
+	ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, &ip->i_res, true);
 	if (ret == 0) {
 		rs->rs_start = gfs2_rbm_to_block(&rbm);
 		rs->rs_free = extlen;
@@ -1606,7 +1606,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
  * @rgd: The resource group
  * @block: The starting block
  * @length: The required length
- * @ip: Ignore any reservations for this inode
+ * @ignore_rs: Reservation to ignore
  *
  * If the block does not appear in any reservation, then return the
  * block number unchanged. If it does appear in the reservation, then
@@ -1616,7 +1616,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
 
 static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
 				      u32 length,
-				      const struct gfs2_inode *ip)
+				      struct gfs2_blkreserv *ignore_rs)
 {
 	struct gfs2_blkreserv *rs;
 	struct rb_node *n;
@@ -1636,7 +1636,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
 	}
 
 	if (n) {
-		while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) {
+		while (rs_cmp(block, length, rs) == 0 && rs != ignore_rs) {
 			block = rs->rs_start + rs->rs_free;
 			n = n->rb_right;
 			if (n == NULL)
@@ -1652,7 +1652,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
 /**
  * gfs2_reservation_check_and_update - Check for reservations during block alloc
  * @rbm: The current position in the resource group
- * @ip: The inode for which we are searching for blocks
+ * @rs: Our own reservation
  * @minext: The minimum extent length
  * @maxext: A pointer to the maximum extent structure
  *
@@ -1666,7 +1666,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
  */
 
 static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
-					     const struct gfs2_inode *ip,
+					     struct gfs2_blkreserv *rs,
 					     u32 minext,
 					     struct gfs2_extent *maxext)
 {
@@ -1688,7 +1688,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
 	 * Check the extent which has been found against the reservations
 	 * and skip if parts of it are already reserved
 	 */
-	nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
+	nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, rs);
 	if (nblock == block) {
 		if (!minext || extlen >= minext)
 			return 0;
@@ -1715,7 +1715,7 @@ fail:
  * @state: The state which we want to find
  * @minext: Pointer to the requested extent length
  *          This is updated to be the actual reservation size.
- * @ip: If set, check for reservations
+ * @rs: Our own reservation (NULL to skip checking for reservations)
  * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
  *          around until we've reached the starting point.
  *
@@ -1729,7 +1729,7 @@ fail:
  */
 
 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
-			 const struct gfs2_inode *ip, bool nowrap)
+			 struct gfs2_blkreserv *rs, bool nowrap)
 {
 	bool scan_from_start = rbm->bii == 0 && rbm->offset == 0;
 	struct buffer_head *bh;
@@ -1766,10 +1766,10 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
 			goto next_bitmap;
 		}
 		rbm->offset = offset;
-		if (ip == NULL)
+		if (!rs)
 			return 0;
 
-		ret = gfs2_reservation_check_and_update(rbm, ip, *minext,
+		ret = gfs2_reservation_check_and_update(rbm, rs, *minext,
 							&maxext);
 		if (ret == 0)
 			return 0;
@@ -2387,7 +2387,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	int error;
 
 	gfs2_set_alloc_start(&rbm, ip, dinode);
-	error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, ip, false);
+	error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false);
 
 	if (error == -ENOSPC) {
 		gfs2_set_alloc_start(&rbm, ip, dinode);
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 9a587ada51ed..be1b2034f5ee 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -77,7 +77,7 @@ extern int gfs2_fitrim(struct file *filp, void __user *argp);
 /* This is how to tell if a reservation is in the rgrp tree: */
 static inline bool gfs2_rs_active(const struct gfs2_blkreserv *rs)
 {
-	return rs && !RB_EMPTY_NODE(&rs->rs_node);
+	return !RB_EMPTY_NODE(&rs->rs_node);
 }
 
 static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
-- 
cgit v1.2.3-70-g09d2


From b2598965dc84bbcf8dd54accc80f60820e5d4965 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 12 Oct 2020 17:23:50 +0200
Subject: gfs2: Don't search for unreserved space twice

If gfs2_inplace_reserve has chosen a resource group but it couldn't make a
reservation there, there are too many other reservations in that resource
group.  In that case, don't even try to respect existing reservations in
gfs2_alloc_blocks.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/rgrp.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index deb553624b3c..95ff4a3c89de 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -2384,11 +2384,12 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rgd, };
 	u64 block; /* block, within the file system scope */
 	u32 minext = 1;
-	int error;
-
-	gfs2_set_alloc_start(&rbm, ip, dinode);
-	error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false);
+	int error = -ENOSPC;
 
+	if (gfs2_rs_active(&ip->i_res)) {
+		gfs2_set_alloc_start(&rbm, ip, dinode);
+		error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false);
+	}
 	if (error == -ENOSPC) {
 		gfs2_set_alloc_start(&rbm, ip, dinode);
 		error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, NULL, false);
-- 
cgit v1.2.3-70-g09d2


From 0ec9b9ea4f83303bfd8f052a3d8b2bd179b002e1 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Wed, 21 Oct 2020 16:37:54 +0200
Subject: gfs2: Check for active reservation in gfs2_release

In gfs2_release, check if the inode has an active reservation to avoid
unnecessary lock taking.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index b39b339feddc..177c4d74ca30 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -716,10 +716,10 @@ static int gfs2_release(struct inode *inode, struct file *file)
 	kfree(file->private_data);
 	file->private_data = NULL;
 
-	if (file->f_mode & FMODE_WRITE) {
+	if (gfs2_rs_active(&ip->i_res))
 		gfs2_rs_delete(ip, &inode->i_writecount);
+	if (file->f_mode & FMODE_WRITE)
 		gfs2_qa_put(ip);
-	}
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 07974d2a2a98bc3ce15f3411ebe5175c0af8f1c3 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 22 Oct 2020 20:34:29 +0200
Subject: gfs2: Rename rs_{free -> requested} and rd_{reserved -> requested}

We keep track of what we've so far been referring to as reservations in
rd_rstree: the nodes in that tree indicate where in a resource group we'd
like to allocate the next couple of blocks for a particular inode.  Local
processes take those as hints, but they may still "steal" blocks from those
extents, so when actually allocating a block, we must double check in the
bitmap whether that block is actually still free.  Likewise, other cluster
nodes may "steal" such blocks as well.

One of the following patches introduces resource group glock sharing, i.e.,
sharing of an exclusively locked resource group glock among local processes to
speed up allocations.  To make that work, we'll need to keep track of how many
blocks we've actually reserved for each inode, so we end up with two different
kinds of reservations.

Distinguish these two kinds by referring to blocks which are reserved but may
still be "stolen" as "requested".  This rename also makes it more obvious that
rs_requested and rd_requested are strongly related.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/incore.h     |  6 +++---
 fs/gfs2/rgrp.c       | 42 +++++++++++++++++++++---------------------
 fs/gfs2/trace_gfs2.h | 18 +++++++++---------
 3 files changed, 33 insertions(+), 33 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index cca806ff611c..0640d0c70a75 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -106,7 +106,7 @@ struct gfs2_rgrpd {
 	u32 rd_data;			/* num of data blocks in rgrp */
 	u32 rd_bitbytes;		/* number of bytes in data bitmaps */
 	u32 rd_free;
-	u32 rd_reserved;                /* number of blocks reserved */
+	u32 rd_requested;		/* number of blocks in rd_rstree */
 	u32 rd_free_clone;
 	u32 rd_dinodes;
 	u64 rd_igeneration;
@@ -290,8 +290,8 @@ struct gfs2_qadata { /* quota allocation data */
 struct gfs2_blkreserv {
 	struct rb_node rs_node;       /* node within rd_rstree */
 	struct gfs2_rgrpd *rs_rgd;
-	u64 rs_start;		      /* start of reservation */
-	u32 rs_free;                  /* how many blocks are still free */
+	u64 rs_start;
+	u32 rs_requested;
 };
 
 /*
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 95ff4a3c89de..bc8d1ab9e07f 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -203,7 +203,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
  */
 static inline int rs_cmp(u64 start, u32 len, struct gfs2_blkreserv *rs)
 {
-	if (start >= rs->rs_start + rs->rs_free)
+	if (start >= rs->rs_start + rs->rs_requested)
 		return 1;
 	if (rs->rs_start >= start + len)
 		return -1;
@@ -625,7 +625,7 @@ static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs,
 		       fs_id_buf,
 		       (unsigned long long)ip->i_no_addr,
 		       (unsigned long long)rs->rs_start,
-		       rs->rs_free);
+		       rs->rs_requested);
 }
 
 /**
@@ -645,17 +645,17 @@ static void __rs_deltree(struct gfs2_blkreserv *rs)
 	rb_erase(&rs->rs_node, &rgd->rd_rstree);
 	RB_CLEAR_NODE(&rs->rs_node);
 
-	if (rs->rs_free) {
-		/* return reserved blocks to the rgrp */
-		BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free);
-		rs->rs_rgd->rd_reserved -= rs->rs_free;
+	if (rs->rs_requested) {
+		/* return requested blocks to the rgrp */
+		BUG_ON(rs->rs_rgd->rd_requested < rs->rs_requested);
+		rs->rs_rgd->rd_requested -= rs->rs_requested;
 
 		/* The rgrp extent failure point is likely not to increase;
 		   it will only do so if the freed blocks are somehow
 		   contiguous with a span of free blocks that follows. Still,
 		   it will force the number to be recalculated later. */
-		rgd->rd_extfail_pt += rs->rs_free;
-		rs->rs_free = 0;
+		rgd->rd_extfail_pt += rs->rs_requested;
+		rs->rs_requested = 0;
 	}
 }
 
@@ -672,7 +672,7 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
 	if (rgd) {
 		spin_lock(&rgd->rd_rsspin);
 		__rs_deltree(rs);
-		BUG_ON(rs->rs_free);
+		BUG_ON(rs->rs_requested);
 		spin_unlock(&rgd->rd_rsspin);
 	}
 }
@@ -1504,7 +1504,7 @@ static void rs_insert(struct gfs2_inode *ip)
 			rb_entry(*newn, struct gfs2_blkreserv, rs_node);
 
 		parent = *newn;
-		rc = rs_cmp(rs->rs_start, rs->rs_free, cur);
+		rc = rs_cmp(rs->rs_start, rs->rs_requested, cur);
 		if (rc > 0)
 			newn = &((*newn)->rb_right);
 		else if (rc < 0)
@@ -1520,7 +1520,7 @@ static void rs_insert(struct gfs2_inode *ip)
 	rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
 
 	/* Do our rgrp accounting for the reservation */
-	rgd->rd_reserved += rs->rs_free; /* blocks reserved */
+	rgd->rd_requested += rs->rs_requested; /* blocks requested */
 	spin_unlock(&rgd->rd_rsspin);
 	trace_gfs2_rs(rs, TRACE_RS_INSERT);
 }
@@ -1541,9 +1541,9 @@ static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs)
 {
 	u32 tot_reserved, tot_free;
 
-	if (WARN_ON_ONCE(rgd->rd_reserved < rs->rs_free))
+	if (WARN_ON_ONCE(rgd->rd_requested < rs->rs_requested))
 		return 0;
-	tot_reserved = rgd->rd_reserved - rs->rs_free;
+	tot_reserved = rgd->rd_requested - rs->rs_requested;
 
 	if (rgd->rd_free_clone < tot_reserved)
 		tot_reserved = 0;
@@ -1578,7 +1578,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
 		extlen = max_t(u32, atomic_read(&ip->i_sizehint), ap->target);
 		extlen = clamp(extlen, (u32)RGRP_RSRV_MINBLKS, free_blocks);
 	}
-	if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
+	if ((rgd->rd_free_clone < rgd->rd_requested) || (free_blocks < extlen))
 		return;
 
 	/* Find bitmap block that contains bits for goal block */
@@ -1593,7 +1593,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
 	ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, &ip->i_res, true);
 	if (ret == 0) {
 		rs->rs_start = gfs2_rbm_to_block(&rbm);
-		rs->rs_free = extlen;
+		rs->rs_requested = extlen;
 		rs_insert(ip);
 	} else {
 		if (goal == rgd->rd_last_alloc + rgd->rd_data0)
@@ -1637,7 +1637,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
 
 	if (n) {
 		while (rs_cmp(block, length, rs) == 0 && rs != ignore_rs) {
-			block = rs->rs_start + rs->rs_free;
+			block = rs->rs_start + rs->rs_requested;
 			n = n->rb_right;
 			if (n == NULL)
 				break;
@@ -2263,7 +2263,7 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
 		       fs_id_buf,
 		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
 		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
-		       rgd->rd_reserved, rgd->rd_extfail_pt);
+		       rgd->rd_requested, rgd->rd_extfail_pt);
 	if (rgd->rd_sbd->sd_args.ar_rgrplvb) {
 		struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
 
@@ -2318,12 +2318,12 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
 			unsigned int rlen;
 
 			rs->rs_start += len;
-			rlen = min(rs->rs_free, len);
-			rs->rs_free -= rlen;
-			rgd->rd_reserved -= rlen;
+			rlen = min(rs->rs_requested, len);
+			rs->rs_requested -= rlen;
+			rgd->rd_requested -= rlen;
 			trace_gfs2_rs(rs, TRACE_RS_CLAIM);
 			if (rs->rs_start < rgd->rd_data0 + rgd->rd_data &&
-			    rs->rs_free)
+			    rs->rs_requested)
 				goto out;
 			/* We used up our block reservation, so we should
 			   reserve more blocks next time. */
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index e461a691e5e8..d24bdcdd42e5 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -560,7 +560,7 @@ TRACE_EVENT(gfs2_block_alloc,
 		__field(	u8,	block_state		)
 		__field(        u64,	rd_addr			)
 		__field(        u32,	rd_free_clone		)
-		__field(	u32,	rd_reserved		)
+		__field(	u32,	rd_requested		)
 	),
 
 	TP_fast_assign(
@@ -571,7 +571,7 @@ TRACE_EVENT(gfs2_block_alloc,
 		__entry->block_state	= block_state;
 		__entry->rd_addr	= rgd->rd_addr;
 		__entry->rd_free_clone	= rgd->rd_free_clone;
-		__entry->rd_reserved	= rgd->rd_reserved;
+		__entry->rd_requested	= rgd->rd_requested;
 	),
 
 	TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu",
@@ -581,7 +581,7 @@ TRACE_EVENT(gfs2_block_alloc,
 		  (unsigned long)__entry->len,
 		  block_state_name(__entry->block_state),
 		  (unsigned long long)__entry->rd_addr,
-		  __entry->rd_free_clone, (unsigned long)__entry->rd_reserved)
+		  __entry->rd_free_clone, (unsigned long)__entry->rd_requested)
 );
 
 /* Keep track of multi-block reservations as they are allocated/freed */
@@ -595,10 +595,10 @@ TRACE_EVENT(gfs2_rs,
 		__field(        dev_t,  dev                     )
 		__field(	u64,	rd_addr			)
 		__field(	u32,	rd_free_clone		)
-		__field(	u32,	rd_reserved		)
+		__field(	u32,	rd_requested		)
 		__field(	u64,	inum			)
 		__field(	u64,	start			)
-		__field(	u32,	free			)
+		__field(	u32,	requested		)
 		__field(	u8,	func			)
 	),
 
@@ -606,11 +606,11 @@ TRACE_EVENT(gfs2_rs,
 		__entry->dev		= rs->rs_rgd->rd_sbd->sd_vfs->s_dev;
 		__entry->rd_addr	= rs->rs_rgd->rd_addr;
 		__entry->rd_free_clone	= rs->rs_rgd->rd_free_clone;
-		__entry->rd_reserved	= rs->rs_rgd->rd_reserved;
+		__entry->rd_requested	= rs->rs_rgd->rd_requested;
 		__entry->inum		= container_of(rs, struct gfs2_inode,
 						       i_res)->i_no_addr;
 		__entry->start		= rs->rs_start;
-		__entry->free		= rs->rs_free;
+		__entry->requested	= rs->rs_requested;
 		__entry->func		= func;
 	),
 
@@ -620,8 +620,8 @@ TRACE_EVENT(gfs2_rs,
 		  (unsigned long long)__entry->start,
 		  (unsigned long long)__entry->rd_addr,
 		  (unsigned long)__entry->rd_free_clone,
-		  (unsigned long)__entry->rd_reserved,
-		  rs_func_name(__entry->func), (unsigned long)__entry->free)
+		  (unsigned long)__entry->rd_requested,
+		  rs_func_name(__entry->func), (unsigned long)__entry->requested)
 );
 
 #endif /* _TRACE_GFS2_H */
-- 
cgit v1.2.3-70-g09d2


From 725d0e9d464d567cd9290e29879d8bffc92013f8 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 2 Oct 2018 14:59:54 +0100
Subject: gfs2: Add per-reservation reserved block accounting

Add a rs_reserved field to struct gfs2_blkreserv to keep track of the number of
blocks reserved by this particular reservation, and a rd_reserved field to
struct gfs2_rgrpd to keep track of the total number of reserved blocks in the
resource group.  Those blocks are exclusively reserved, as opposed to the
rs_requested / rd_requested blocks which are tracked in the reservation tree
(rd_rstree) and which can be stolen if necessary.

When making a reservation with gfs2_inplace_reserve, rs_reserved is set to
somewhere between ap->min_target and ap->target depending on the number of free
blocks in the resource group.  When allocating blocks with gfs2_alloc_blocks,
rs_reserved is decremented accordingly.  Eventually, any reserved but not
consumed blocks are returned to the resource group by gfs2_inplace_release.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/file.c       |  4 +--
 fs/gfs2/incore.h     |  2 ++
 fs/gfs2/lops.c       |  1 +
 fs/gfs2/rgrp.c       | 80 +++++++++++++++++++++++++++++++++++++++-------------
 fs/gfs2/trace_gfs2.h | 23 +++++++++++----
 5 files changed, 82 insertions(+), 28 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 177c4d74ca30..294087516ce0 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1115,8 +1115,8 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
 			goto out_qunlock;
 
 		/* check if the selected rgrp limits our max_blks further */
-		if (ap.allowed && ap.allowed < max_blks)
-			max_blks = ap.allowed;
+		if (ip->i_res.rs_reserved < max_blks)
+			max_blks = ip->i_res.rs_reserved;
 
 		/* Almost done. Calculate bytes that can be written using
 		 * max_blks. We also recompute max_bytes, data_blocks and
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 0640d0c70a75..2679ba54798c 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -107,6 +107,7 @@ struct gfs2_rgrpd {
 	u32 rd_bitbytes;		/* number of bytes in data bitmaps */
 	u32 rd_free;
 	u32 rd_requested;		/* number of blocks in rd_rstree */
+	u32 rd_reserved;		/* number of reserved blocks */
 	u32 rd_free_clone;
 	u32 rd_dinodes;
 	u64 rd_igeneration;
@@ -292,6 +293,7 @@ struct gfs2_blkreserv {
 	struct gfs2_rgrpd *rs_rgd;
 	u64 rs_start;
 	u32 rs_requested;
+	u32 rs_reserved;              /* number of reserved blocks */
 };
 
 /*
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 3922b26264f5..802bc15f9f11 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -84,6 +84,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
 	       bd->bd_bh->b_data + bi->bi_offset, bi->bi_bytes);
 	clear_bit(GBF_FULL, &bi->bi_flags);
 	rgd->rd_free_clone = rgd->rd_free;
+	BUG_ON(rgd->rd_free_clone < rgd->rd_reserved);
 	rgd->rd_extfail_pt = rgd->rd_free;
 }
 
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index bc8d1ab9e07f..f1df5e75364a 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1229,6 +1229,7 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 		rgrp_set_bitmap_flags(rgd);
 		rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
 		rgd->rd_free_clone = rgd->rd_free;
+		BUG_ON(rgd->rd_reserved);
 		/* max out the rgrp allocation failure point */
 		rgd->rd_extfail_pt = rgd->rd_free;
 	}
@@ -1278,6 +1279,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
 	rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
 	rgrp_set_bitmap_flags(rgd);
 	rgd->rd_free_clone = rgd->rd_free;
+	BUG_ON(rgd->rd_reserved);
 	/* max out the rgrp allocation failure point */
 	rgd->rd_extfail_pt = rgd->rd_free;
 	rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
@@ -1568,17 +1570,26 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
 	u64 goal;
 	struct gfs2_blkreserv *rs = &ip->i_res;
 	u32 extlen;
-	u32 free_blocks = rgd_free(rgd, rs);
+	u32 free_blocks, blocks_available;
 	int ret;
 	struct inode *inode = &ip->i_inode;
 
+	spin_lock(&rgd->rd_rsspin);
+	free_blocks = rgd_free(rgd, rs);
+	if (rgd->rd_free_clone < rgd->rd_requested)
+		free_blocks = 0;
+	blocks_available = rgd->rd_free_clone - rgd->rd_reserved;
+	if (rgd == rs->rs_rgd)
+		blocks_available += rs->rs_reserved;
+	spin_unlock(&rgd->rd_rsspin);
+
 	if (S_ISDIR(inode->i_mode))
 		extlen = 1;
 	else {
 		extlen = max_t(u32, atomic_read(&ip->i_sizehint), ap->target);
 		extlen = clamp(extlen, (u32)RGRP_RSRV_MINBLKS, free_blocks);
 	}
-	if ((rgd->rd_free_clone < rgd->rd_requested) || (free_blocks < extlen))
+	if (free_blocks < extlen || blocks_available < extlen)
 		return;
 
 	/* Find bitmap block that contains bits for goal block */
@@ -2027,8 +2038,7 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
  * We try our best to find an rgrp that has at least ap->target blocks
  * available. After a couple of passes (loops == 2), the prospects of finding
  * such an rgrp diminish. At this stage, we return the first rgrp that has
- * at least ap->min_target blocks available. Either way, we set ap->allowed to
- * the number of blocks available in the chosen rgrp.
+ * at least ap->min_target blocks available.
  *
  * Returns: 0 on success,
  *          -ENOMEM if a suitable rgrp can't be found
@@ -2044,7 +2054,9 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 	u64 last_unlinked = NO_BLOCK;
 	u32 target = ap->target;
 	int loops = 0;
-	u32 free_blocks, skip = 0;
+	u32 free_blocks, blocks_available, skip = 0;
+
+	BUG_ON(rs->rs_reserved);
 
 	if (sdp->sd_args.ar_rgrplvb)
 		flags |= GL_SKIP;
@@ -2065,6 +2077,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 		return -EBADSLT;
 
 	while (loops < 3) {
+		struct gfs2_rgrpd *rgd;
+
 		rg_locked = 1;
 
 		if (!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) {
@@ -2115,11 +2129,20 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 			goto check_rgrp;
 
 		/* If rgrp has enough free space, use it */
-		free_blocks = rgd_free(rs->rs_rgd, rs);
-		if (free_blocks >= target) {
-			ap->allowed = free_blocks;
-			return 0;
+		rgd = rs->rs_rgd;
+		spin_lock(&rgd->rd_rsspin);
+		free_blocks = rgd_free(rgd, rs);
+		blocks_available = rgd->rd_free_clone - rgd->rd_reserved;
+		if (free_blocks < target || blocks_available < target) {
+			spin_unlock(&rgd->rd_rsspin);
+			goto check_rgrp;
 		}
+		rs->rs_reserved = ap->target;
+		if (rs->rs_reserved > blocks_available)
+			rs->rs_reserved = blocks_available;
+		rgd->rd_reserved += rs->rs_reserved;
+		spin_unlock(&rgd->rd_rsspin);
+		return 0;
 check_rgrp:
 		/* Check for unlinked inodes which can be reclaimed */
 		if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK)
@@ -2172,6 +2195,17 @@ next_rgrp:
 
 void gfs2_inplace_release(struct gfs2_inode *ip)
 {
+	struct gfs2_blkreserv *rs = &ip->i_res;
+
+	if (rs->rs_reserved) {
+		struct gfs2_rgrpd *rgd = rs->rs_rgd;
+
+		spin_lock(&rgd->rd_rsspin);
+		BUG_ON(rgd->rd_reserved < rs->rs_reserved);
+		rgd->rd_reserved -= rs->rs_reserved;
+		spin_unlock(&rgd->rd_rsspin);
+		rs->rs_reserved = 0;
+	}
 	if (gfs2_holder_initialized(&ip->i_rgd_gh))
 		gfs2_glock_dq_uninit(&ip->i_rgd_gh);
 }
@@ -2259,11 +2293,11 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
 	struct gfs2_blkreserv *trs;
 	const struct rb_node *n;
 
-	gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
+	gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u q:%u r:%u e:%u\n",
 		       fs_id_buf,
 		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
 		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
-		       rgd->rd_requested, rgd->rd_extfail_pt);
+		       rgd->rd_requested, rgd->rd_reserved, rgd->rd_extfail_pt);
 	if (rgd->rd_sbd->sd_args.ar_rgrplvb) {
 		struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
 
@@ -2310,7 +2344,8 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
 	struct gfs2_blkreserv *rs = &ip->i_res;
 	struct gfs2_rgrpd *rgd = rbm->rgd;
 
-	spin_lock(&rgd->rd_rsspin);
+	BUG_ON(rs->rs_reserved < len);
+	rs->rs_reserved -= len;
 	if (gfs2_rs_active(rs)) {
 		u64 start = gfs2_rbm_to_block(rbm);
 
@@ -2324,15 +2359,13 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
 			trace_gfs2_rs(rs, TRACE_RS_CLAIM);
 			if (rs->rs_start < rgd->rd_data0 + rgd->rd_data &&
 			    rs->rs_requested)
-				goto out;
+				return;
 			/* We used up our block reservation, so we should
 			   reserve more blocks next time. */
 			atomic_add(RGRP_RSRV_ADDBLKS, &ip->i_sizehint);
 		}
 		__rs_deltree(rs);
 	}
-out:
-	spin_unlock(&rgd->rd_rsspin);
 }
 
 /**
@@ -2386,6 +2419,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	u32 minext = 1;
 	int error = -ENOSPC;
 
+	BUG_ON(ip->i_res.rs_reserved < *nblocks);
+
 	if (gfs2_rs_active(&ip->i_res)) {
 		gfs2_set_alloc_start(&rbm, ip, dinode);
 		error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false);
@@ -2407,8 +2442,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	gfs2_alloc_extent(&rbm, dinode, nblocks);
 	block = gfs2_rbm_to_block(&rbm);
 	rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
-	if (gfs2_rs_active(&ip->i_res))
-		gfs2_adjust_reservation(ip, &rbm, *nblocks);
 	if (!dinode) {
 		ip->i_goal = block + *nblocks - 1;
 		error = gfs2_meta_inode_buffer(ip, &dibh);
@@ -2421,12 +2454,20 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 			brelse(dibh);
 		}
 	}
-	if (rbm.rgd->rd_free < *nblocks) {
+	spin_lock(&rbm.rgd->rd_rsspin);
+	gfs2_adjust_reservation(ip, &rbm, *nblocks);
+	if (rbm.rgd->rd_free < *nblocks || rbm.rgd->rd_reserved < *nblocks) {
 		fs_warn(sdp, "nblocks=%u\n", *nblocks);
+		spin_unlock(&rbm.rgd->rd_rsspin);
 		goto rgrp_error;
 	}
-
+	BUG_ON(rbm.rgd->rd_reserved < *nblocks);
+	BUG_ON(rbm.rgd->rd_free_clone < *nblocks);
+	BUG_ON(rbm.rgd->rd_free < *nblocks);
+	rbm.rgd->rd_reserved -= *nblocks;
+	rbm.rgd->rd_free_clone -= *nblocks;
 	rbm.rgd->rd_free -= *nblocks;
+	spin_unlock(&rbm.rgd->rd_rsspin);
 	if (dinode) {
 		rbm.rgd->rd_dinodes++;
 		*generation = rbm.rgd->rd_igeneration++;
@@ -2443,7 +2484,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 
 	gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
 
-	rbm.rgd->rd_free_clone -= *nblocks;
 	trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
 			       dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
 	*bn = block;
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index d24bdcdd42e5..bd6c8e9e49db 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -561,6 +561,7 @@ TRACE_EVENT(gfs2_block_alloc,
 		__field(        u64,	rd_addr			)
 		__field(        u32,	rd_free_clone		)
 		__field(	u32,	rd_requested		)
+		__field(	u32,	rd_reserved		)
 	),
 
 	TP_fast_assign(
@@ -572,16 +573,19 @@ TRACE_EVENT(gfs2_block_alloc,
 		__entry->rd_addr	= rgd->rd_addr;
 		__entry->rd_free_clone	= rgd->rd_free_clone;
 		__entry->rd_requested	= rgd->rd_requested;
+		__entry->rd_reserved	= rgd->rd_reserved;
 	),
 
-	TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu",
+	TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rq:%u rr:%u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long long)__entry->inum,
 		  (unsigned long long)__entry->start,
 		  (unsigned long)__entry->len,
 		  block_state_name(__entry->block_state),
 		  (unsigned long long)__entry->rd_addr,
-		  __entry->rd_free_clone, (unsigned long)__entry->rd_requested)
+		  __entry->rd_free_clone,
+		  __entry->rd_requested,
+		  __entry->rd_reserved)
 );
 
 /* Keep track of multi-block reservations as they are allocated/freed */
@@ -596,9 +600,11 @@ TRACE_EVENT(gfs2_rs,
 		__field(	u64,	rd_addr			)
 		__field(	u32,	rd_free_clone		)
 		__field(	u32,	rd_requested		)
+		__field(	u32,	rd_reserved		)
 		__field(	u64,	inum			)
 		__field(	u64,	start			)
 		__field(	u32,	requested		)
+		__field(	u32,	reserved		)
 		__field(	u8,	func			)
 	),
 
@@ -607,21 +613,26 @@ TRACE_EVENT(gfs2_rs,
 		__entry->rd_addr	= rs->rs_rgd->rd_addr;
 		__entry->rd_free_clone	= rs->rs_rgd->rd_free_clone;
 		__entry->rd_requested	= rs->rs_rgd->rd_requested;
+		__entry->rd_reserved	= rs->rs_rgd->rd_reserved;
 		__entry->inum		= container_of(rs, struct gfs2_inode,
 						       i_res)->i_no_addr;
 		__entry->start		= rs->rs_start;
 		__entry->requested	= rs->rs_requested;
+		__entry->reserved	= rs->rs_reserved;
 		__entry->func		= func;
 	),
 
-	TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu",
+	TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%u rq:%u rr:%u %s q:%u r:%u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long long)__entry->inum,
 		  (unsigned long long)__entry->start,
 		  (unsigned long long)__entry->rd_addr,
-		  (unsigned long)__entry->rd_free_clone,
-		  (unsigned long)__entry->rd_requested,
-		  rs_func_name(__entry->func), (unsigned long)__entry->requested)
+		  __entry->rd_free_clone,
+		  __entry->rd_requested,
+		  __entry->rd_reserved,
+		  rs_func_name(__entry->func),
+		  __entry->requested,
+		  __entry->reserved)
 );
 
 #endif /* _TRACE_GFS2_H */
-- 
cgit v1.2.3-70-g09d2


From 9e514605c77451745ea9fca5a26fc3153893686a Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 8 Feb 2021 20:44:26 +0100
Subject: gfs2: Add local resource group locking

Prepare for treating resource group glocks as exclusive among nodes but
shared among all tasks running on a node: introduce another layer of
node-specific locking that the local tasks can use to coordinate their
accesses.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/incore.h |  2 ++
 fs/gfs2/lops.c   |  6 +++++-
 fs/gfs2/rgrp.c   | 54 ++++++++++++++++++++++++++++++++++++++++++++++++------
 fs/gfs2/rgrp.h   |  4 ++++
 4 files changed, 59 insertions(+), 7 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 2679ba54798c..92f5a920ce61 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -20,6 +20,7 @@
 #include <linux/percpu.h>
 #include <linux/lockref.h>
 #include <linux/rhashtable.h>
+#include <linux/mutex.h>
 
 #define DIO_WAIT	0x00000010
 #define DIO_METADATA	0x00000020
@@ -123,6 +124,7 @@ struct gfs2_rgrpd {
 #define GFS2_RDF_PREFERRED	0x80000000 /* This rgrp is preferred */
 #define GFS2_RDF_MASK		0xf0000000 /* mask for internal flags */
 	spinlock_t rd_rsspin;           /* protects reservation related vars */
+	struct mutex rd_mutex;
 	struct rb_root rd_rstree;       /* multi-block reservation tree */
 };
 
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 802bc15f9f11..dffa6cc8b5b8 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -76,8 +76,9 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
 	unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
 	struct gfs2_bitmap *bi = rgd->rd_bits + index;
 
+	rgrp_lock_local(rgd);
 	if (bi->bi_clone == NULL)
-		return;
+		goto out;
 	if (sdp->sd_args.ar_discard)
 		gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
 	memcpy(bi->bi_clone + bi->bi_offset,
@@ -86,6 +87,9 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
 	rgd->rd_free_clone = rgd->rd_free;
 	BUG_ON(rgd->rd_free_clone < rgd->rd_reserved);
 	rgd->rd_extfail_pt = rgd->rd_free;
+
+out:
+	rgrp_unlock_local(rgd);
 }
 
 /**
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index f1df5e75364a..f45b787307d0 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -920,6 +920,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
 	rgd->rd_data = be32_to_cpu(buf.ri_data);
 	rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
 	spin_lock_init(&rgd->rd_rsspin);
+	mutex_init(&rgd->rd_mutex);
 
 	error = compute_bitstructs(rgd);
 	if (error)
@@ -1449,9 +1450,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
 			/* Trim each bitmap in the rgrp */
 			for (x = 0; x < rgd->rd_length; x++) {
 				struct gfs2_bitmap *bi = rgd->rd_bits + x;
+				rgrp_lock_local(rgd);
 				ret = gfs2_rgrp_send_discards(sdp,
 						rgd->rd_data0, NULL, bi, minlen,
 						&amt);
+				rgrp_unlock_local(rgd);
 				if (ret) {
 					gfs2_glock_dq_uninit(&gh);
 					goto out;
@@ -1463,9 +1466,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
 			ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
 			if (ret == 0) {
 				bh = rgd->rd_bits[0].bi_bh;
+				rgrp_lock_local(rgd);
 				rgd->rd_flags |= GFS2_RGF_TRIMMED;
 				gfs2_trans_add_meta(rgd->rd_gl, bh);
 				gfs2_rgrp_out(rgd, bh->b_data);
+				rgrp_unlock_local(rgd);
 				gfs2_trans_end(sdp);
 			}
 		}
@@ -2050,7 +2055,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrpd *begin = NULL;
 	struct gfs2_blkreserv *rs = &ip->i_res;
-	int error = 0, rg_locked, flags = 0;
+	int error = 0, flags = 0;
+	bool rg_locked;
 	u64 last_unlinked = NO_BLOCK;
 	u32 target = ap->target;
 	int loops = 0;
@@ -2079,10 +2085,10 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 	while (loops < 3) {
 		struct gfs2_rgrpd *rgd;
 
-		rg_locked = 1;
-
-		if (!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) {
-			rg_locked = 0;
+		rg_locked = gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl);
+		if (rg_locked) {
+			rgrp_lock_local(rs->rs_rgd);
+		} else {
 			if (skip && skip--)
 				goto next_rgrp;
 			if (!gfs2_rs_active(rs)) {
@@ -2099,12 +2105,14 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 						   &ip->i_rgd_gh);
 			if (unlikely(error))
 				return error;
+			rgrp_lock_local(rs->rs_rgd);
 			if (!gfs2_rs_active(rs) && (loops < 2) &&
 			    gfs2_rgrp_congested(rs->rs_rgd, loops))
 				goto skip_rgrp;
 			if (sdp->sd_args.ar_rgrplvb) {
 				error = update_rgrp_lvb(rs->rs_rgd);
 				if (unlikely(error)) {
+					rgrp_unlock_local(rs->rs_rgd);
 					gfs2_glock_dq_uninit(&ip->i_rgd_gh);
 					return error;
 				}
@@ -2142,6 +2150,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 			rs->rs_reserved = blocks_available;
 		rgd->rd_reserved += rs->rs_reserved;
 		spin_unlock(&rgd->rd_rsspin);
+		rgrp_unlock_local(rs->rs_rgd);
 		return 0;
 check_rgrp:
 		/* Check for unlinked inodes which can be reclaimed */
@@ -2149,6 +2158,8 @@ check_rgrp:
 			try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
 					ip->i_no_addr);
 skip_rgrp:
+		rgrp_unlock_local(rs->rs_rgd);
+
 		/* Drop reservation, if we couldn't use reserved rgrp */
 		if (gfs2_rs_active(rs))
 			gfs2_rs_deltree(rs);
@@ -2293,6 +2304,7 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
 	struct gfs2_blkreserv *trs;
 	const struct rb_node *n;
 
+	spin_lock(&rgd->rd_rsspin);
 	gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u q:%u r:%u e:%u\n",
 		       fs_id_buf,
 		       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
@@ -2306,7 +2318,6 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
 			       be32_to_cpu(rgl->rl_free),
 			       be32_to_cpu(rgl->rl_dinodes));
 	}
-	spin_lock(&rgd->rd_rsspin);
 	for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
 		trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
 		dump_rs(seq, trs, fs_id_buf);
@@ -2421,6 +2432,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 
 	BUG_ON(ip->i_res.rs_reserved < *nblocks);
 
+	rgrp_lock_local(rbm.rgd);
 	if (gfs2_rs_active(&ip->i_res)) {
 		gfs2_set_alloc_start(&rbm, ip, dinode);
 		error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false);
@@ -2477,6 +2489,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 
 	gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
 	gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
+	rgrp_unlock_local(rbm.rgd);
 
 	gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
 	if (dinode)
@@ -2490,6 +2503,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	return 0;
 
 rgrp_error:
+	rgrp_unlock_local(rbm.rgd);
 	gfs2_rgrp_error(rbm.rgd);
 	return -EIO;
 }
@@ -2509,12 +2523,14 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 
+	rgrp_lock_local(rgd);
 	rgblk_free(sdp, rgd, bstart, blen, GFS2_BLKST_FREE);
 	trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
 	rgd->rd_free += blen;
 	rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+	rgrp_unlock_local(rgd);
 
 	/* Directories keep their data in the metadata address space */
 	if (meta || ip->i_depth || gfs2_is_jdata(ip))
@@ -2550,17 +2566,20 @@ void gfs2_unlink_di(struct inode *inode)
 	rgd = gfs2_blk2rgrpd(sdp, blkno, true);
 	if (!rgd)
 		return;
+	rgrp_lock_local(rgd);
 	rgblk_free(sdp, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
 	trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
 	be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1);
+	rgrp_unlock_local(rgd);
 }
 
 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
 
+	rgrp_lock_local(rgd);
 	rgblk_free(sdp, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
 	if (!rgd->rd_dinodes)
 		gfs2_consist_rgrpd(rgd);
@@ -2569,6 +2588,7 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 
 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+	rgrp_unlock_local(rgd);
 	be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1);
 
 	gfs2_statfs_change(sdp, 0, +1, -1);
@@ -2583,6 +2603,10 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
  * @no_addr: The block number to check
  * @type: The block type we are looking for
  *
+ * The inode glock of @no_addr must be held.  The @type to check for is either
+ * GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED; checking for type GFS2_BLKST_FREE
+ * or GFS2_BLKST_USED would make no sense.
+ *
  * Returns: 0 if the block type matches the expected type
  *          -ESTALE if it doesn't match
  *          or -ve errno if something went wrong while checking
@@ -2606,6 +2630,13 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
 	rbm.rgd = rgd;
 	error = gfs2_rbm_from_block(&rbm, no_addr);
 	if (!WARN_ON_ONCE(error)) {
+		/*
+		 * No need to take the local resource group lock here; the
+		 * inode glock of @no_addr provides the necessary
+		 * synchronization in case the block is an inode.  (In case
+		 * the block is not an inode, the block type will not match
+		 * the @type we are looking for.)
+		 */
 		if (gfs2_testbit(&rbm, false) != type)
 			error = -ESTALE;
 	}
@@ -2730,3 +2761,14 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
 	}
 }
 
+void rgrp_lock_local(struct gfs2_rgrpd *rgd)
+{
+	BUG_ON(!gfs2_glock_is_held_excl(rgd->rd_gl) &&
+	       !test_bit(SDF_NORECOVERY, &rgd->rd_sbd->sd_flags));
+	mutex_lock(&rgd->rd_mutex);
+}
+
+void rgrp_unlock_local(struct gfs2_rgrpd *rgd)
+{
+	mutex_unlock(&rgd->rd_mutex);
+}
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index be1b2034f5ee..a6855fd796e0 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -88,4 +88,8 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
 }
 
 extern void check_and_update_goal(struct gfs2_inode *ip);
+
+extern void rgrp_lock_local(struct gfs2_rgrpd *rgd);
+extern void rgrp_unlock_local(struct gfs2_rgrpd *rgd);
+
 #endif /* __RGRP_DOT_H__ */
-- 
cgit v1.2.3-70-g09d2


From 06e908cd9ead1f215cc30132aac5ce132a352fbe Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Wed, 18 Apr 2018 13:58:19 -0700
Subject: gfs2: Allow node-wide exclusive glock sharing

Introduce a new LM_FLAG_NODE_SCOPE glock holder flag: when taking a
glock in LM_ST_EXCLUSIVE (EX) mode and with the LM_FLAG_NODE_SCOPE flag
set, the exclusive lock is shared among all local processes who are
holding the glock in EX mode and have the LM_FLAG_NODE_SCOPE flag set.
From the point of view of other nodes, the lock is still held
exclusively.

A future patch will start using this flag to improve performance with
rgrp sharing.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 22 +++++++++++++++++++---
 fs/gfs2/glock.h |  6 ++++++
 2 files changed, 25 insertions(+), 3 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index d87a5bc3607b..9567520d79f7 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -313,9 +313,23 @@ void gfs2_glock_put(struct gfs2_glock *gl)
 static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh)
 {
 	const struct gfs2_holder *gh_head = list_first_entry(&gl->gl_holders, const struct gfs2_holder, gh_list);
-	if ((gh->gh_state == LM_ST_EXCLUSIVE ||
-	     gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head)
-		return 0;
+
+	if (gh != gh_head) {
+		/**
+		 * Here we make a special exception to grant holders who agree
+		 * to share the EX lock with other holders who also have the
+		 * bit set. If the original holder has the LM_FLAG_NODE_SCOPE bit
+		 * is set, we grant more holders with the bit set.
+		 */
+		if (gh_head->gh_state == LM_ST_EXCLUSIVE &&
+		    (gh_head->gh_flags & LM_FLAG_NODE_SCOPE) &&
+		    gh->gh_state == LM_ST_EXCLUSIVE &&
+		    (gh->gh_flags & LM_FLAG_NODE_SCOPE))
+			return 1;
+		if ((gh->gh_state == LM_ST_EXCLUSIVE ||
+		     gh_head->gh_state == LM_ST_EXCLUSIVE))
+			return 0;
+	}
 	if (gl->gl_state == gh->gh_state)
 		return 1;
 	if (gh->gh_flags & GL_EXACT)
@@ -2030,6 +2044,8 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
 		*p++ = 'A';
 	if (flags & LM_FLAG_PRIORITY)
 		*p++ = 'p';
+	if (flags & LM_FLAG_NODE_SCOPE)
+		*p++ = 'n';
 	if (flags & GL_ASYNC)
 		*p++ = 'a';
 	if (flags & GL_EXACT)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 53813364517b..31a8f2f649b5 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -75,6 +75,11 @@ enum {
  * request and directly join the other shared lock.  A shared lock request
  * without the priority flag might be forced to wait until the deferred
  * requested had acquired and released the lock.
+ *
+ * LM_FLAG_NODE_SCOPE
+ * This holder agrees to share the lock within this node. In other words,
+ * the glock is held in EX mode according to DLM, but local holders on the
+ * same node can share it.
  */
 
 #define LM_FLAG_TRY		0x0001
@@ -82,6 +87,7 @@ enum {
 #define LM_FLAG_NOEXP		0x0004
 #define LM_FLAG_ANY		0x0008
 #define LM_FLAG_PRIORITY	0x0010
+#define LM_FLAG_NODE_SCOPE	0x0020
 #define GL_ASYNC		0x0040
 #define GL_EXACT		0x0080
 #define GL_SKIP			0x0100
-- 
cgit v1.2.3-70-g09d2


From 4fc7ec31c3c44031e8a8872bb8432cf4f6cb6ddd Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Tue, 24 Apr 2018 10:35:02 -0700
Subject: gfs2: Use resource group glock sharing

This patch takes advantage of the new glock holder sharing feature for
resource groups.  We have already introduced local resource group
locking in a previous patch, so competing accesses of local processes
are already under control.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/bmap.c  |  2 +-
 fs/gfs2/inode.c |  6 +++---
 fs/gfs2/rgrp.c  | 10 +++++-----
 fs/gfs2/super.c |  3 ++-
 fs/gfs2/xattr.c |  6 ++++--
 5 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index e2ee3703f7df..cf6ccdd00587 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1538,7 +1538,7 @@ more_rgrps:
 				goto out;
 			}
 			ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
-						 0, rd_gh);
+						 LM_FLAG_NODE_SCOPE, rd_gh);
 			if (ret)
 				goto out;
 
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index c1b77e8d6b1c..4d3d8624a95b 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1145,7 +1145,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
 	if (!rgd)
 		goto out_inodes;
 
-	gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
+	gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, LM_FLAG_NODE_SCOPE, ghs + 2);
 
 
 	error = gfs2_glock_nq(ghs); /* parent */
@@ -1450,8 +1450,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 			error = -ENOENT;
 			goto out_gunlock;
 		}
-		error = gfs2_glock_nq_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0,
-					   &rd_gh);
+		error = gfs2_glock_nq_init(nrgd->rd_gl, LM_ST_EXCLUSIVE,
+					   LM_FLAG_NODE_SCOPE, &rd_gh);
 		if (error)
 			goto out_gunlock;
 	}
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index f45b787307d0..89c37a845e64 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1442,7 +1442,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
 
 	while (1) {
 
-		ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
+		ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+					 LM_FLAG_NODE_SCOPE, &gh);
 		if (ret)
 			goto out;
 
@@ -2055,7 +2056,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrpd *begin = NULL;
 	struct gfs2_blkreserv *rs = &ip->i_res;
-	int error = 0, flags = 0;
+	int error = 0, flags = LM_FLAG_NODE_SCOPE;
 	bool rg_locked;
 	u64 last_unlinked = NO_BLOCK;
 	u32 target = ap->target;
@@ -2736,9 +2737,8 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist)
 				      sizeof(struct gfs2_holder),
 				      GFP_NOFS | __GFP_NOFAIL);
 	for (x = 0; x < rlist->rl_rgrps; x++)
-		gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
-				LM_ST_EXCLUSIVE, 0,
-				&rlist->rl_ghs[x]);
+		gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, LM_ST_EXCLUSIVE,
+				 LM_FLAG_NODE_SCOPE, &rlist->rl_ghs[x]);
 }
 
 /**
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 2f56acc41c04..e138a2d15c77 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1229,7 +1229,8 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
 		goto out_qs;
 	}
 
-	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
+	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+				   LM_FLAG_NODE_SCOPE, &gh);
 	if (error)
 		goto out_qs;
 
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 9d7667bc4292..d332bf31f0ae 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -259,7 +259,8 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 		return -EIO;
 	}
 
-	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
+	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+				   LM_FLAG_NODE_SCOPE, &rg_gh);
 	if (error)
 		return error;
 
@@ -1385,7 +1386,8 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
 		return -EIO;
 	}
 
-	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
+	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+				   LM_FLAG_NODE_SCOPE, &gh);
 	if (error)
 		return error;
 
-- 
cgit v1.2.3-70-g09d2


From 71b219f4e50b12efffbc8107408e17904f9c47e6 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Fri, 29 Jan 2021 19:56:37 +0100
Subject: gfs2: Minor calc_reserved cleanup

No functional change.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/log.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 6fd4ded1e357..0c28ddd0e410 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -546,17 +546,14 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer
 static unsigned int calc_reserved(struct gfs2_sbd *sdp)
 {
 	unsigned int reserved = 0;
-	unsigned int mbuf;
-	unsigned int dbuf;
+	unsigned int blocks;
 	struct gfs2_trans *tr = sdp->sd_log_tr;
 
 	if (tr) {
-		mbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm;
-		dbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
-		reserved = mbuf + dbuf;
-		/* Account for header blocks */
-		reserved += DIV_ROUND_UP(mbuf, buf_limit(sdp));
-		reserved += DIV_ROUND_UP(dbuf, databuf_limit(sdp));
+		blocks = tr->tr_num_buf_new - tr->tr_num_buf_rm;
+		reserved += blocks + DIV_ROUND_UP(blocks, buf_limit(sdp));
+		blocks = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
+		reserved += blocks + DIV_ROUND_UP(blocks, databuf_limit(sdp));
 	}
 
 	if (sdp->sd_log_committed_revoke > 0)
-- 
cgit v1.2.3-70-g09d2


From fe3e397668775e20ad0962459733158838b926af Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 10 Dec 2020 12:49:56 +0100
Subject: gfs2: Rework the log space allocation logic

The current log space allocation logic is hard to understand or extend.
The principle it that when the log is flushed, we may or may not have a
transaction active that has space allocated in the log.  To deal with
that, we set aside a magical number of blocks to be used in case we
don't have an active transaction.  It isn't clear that the pool will
always be big enough.  In addition, we can't return unused log space at
the end of a transaction, so the number of blocks allocated must exactly
match the number of blocks used.

Simplify this as follows:
 * When transactions are allocated or merged, always reserve enough
   blocks to flush the transaction (err on the safe side).
 * In gfs2_log_flush, return any allocated blocks that haven't been used.
 * Maintain a pool of spare blocks big enough to do one log flush, as
   before.
 * In gfs2_log_flush, when we have no active transaction, allocate a
   suitable number of blocks.  For that, use the spare pool when
   called from logd, and leave the pool alone otherwise.  This means
   that when the log is almost full, logd will still be able to do one
   more log flush, which will result in more log space becoming
   available.

This will make the log space allocator code easier to work with in
the future.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/log.c   | 162 +++++++++++++++++++++++++++++++++-----------------------
 fs/gfs2/log.h   |   7 +++
 fs/gfs2/trans.c |   4 +-
 3 files changed, 104 insertions(+), 69 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 0c28ddd0e410..913150e60f52 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -447,15 +447,42 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
 }
 
 /**
- * gfs2_log_reserve - Make a log reservation
+ * __gfs2_log_try_reserve - Try to make a log reservation
+ * @sdp: The GFS2 superblock
+ * @blks: The number of blocks to reserve
+ * @taboo_blks: The number of blocks to leave free
+ *
+ * Try to do the same as __gfs2_log_reserve(), but fail if no more log
+ * space is immediately available.
+ */
+static bool __gfs2_log_try_reserve(struct gfs2_sbd *sdp, unsigned int blks,
+				   unsigned int taboo_blks)
+{
+	unsigned wanted = blks + taboo_blks;
+	unsigned int free_blocks;
+
+	free_blocks = atomic_read(&sdp->sd_log_blks_free);
+	while (free_blocks >= wanted) {
+		if (atomic_try_cmpxchg(&sdp->sd_log_blks_free, &free_blocks,
+				       free_blocks - blks)) {
+			trace_gfs2_log_blocks(sdp, -blks);
+			return true;
+		}
+	}
+	return false;
+}
+
+/**
+ * __gfs2_log_reserve - Make a log reservation
  * @sdp: The GFS2 superblock
  * @blks: The number of blocks to reserve
+ * @taboo_blks: The number of blocks to leave free
  *
- * Note that we never give out the last few blocks of the journal. Thats
- * due to the fact that there is a small number of header blocks
- * associated with each log flush. The exact number can't be known until
- * flush time, so we ensure that we have just enough free blocks at all
- * times to avoid running out during a log flush.
+ * @taboo_blks is set to 0 for logd, and to GFS2_LOG_FLUSH_MIN_BLOCKS
+ * for all other processes.  This ensures that when the log is almost full,
+ * logd will still be able to call gfs2_log_flush one more time  without
+ * blocking, which will advance the tail and make some more log space
+ * available.
  *
  * We no longer flush the log here, instead we wake up logd to do that
  * for us. To avoid the thundering herd and to ensure that we deal fairly
@@ -464,19 +491,12 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
  * wake the next waiter on the list.
  */
 
-void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
+static void __gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks,
+			       unsigned int taboo_blks)
 {
-	unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize);
-	unsigned wanted = blks + reserved_blks;
+	unsigned wanted = blks + taboo_blks;
 	unsigned int free_blocks;
 
-	free_blocks = atomic_read(&sdp->sd_log_blks_free);
-	while (free_blocks >= wanted) {
-		if (atomic_try_cmpxchg(&sdp->sd_log_blks_free, &free_blocks,
-				       free_blocks - blks))
-			return;
-	}
-
 	atomic_add(blks, &sdp->sd_log_blks_needed);
 	for (;;) {
 		if (current != sdp->sd_logd_process)
@@ -498,6 +518,19 @@ reserved:
 		wake_up(&sdp->sd_log_waitq);
 }
 
+/**
+ * gfs2_log_reserve - Make a log reservation
+ * @sdp: The GFS2 superblock
+ * @blks: The number of blocks to reserve
+ */
+
+void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
+{
+	if (__gfs2_log_try_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS))
+		return;
+	__gfs2_log_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS);
+}
+
 /**
  * log_distance - Compute distance between two journal blocks
  * @sdp: The GFS2 superblock
@@ -545,7 +578,7 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer
  */
 static unsigned int calc_reserved(struct gfs2_sbd *sdp)
 {
-	unsigned int reserved = 0;
+	unsigned int reserved = GFS2_LOG_FLUSH_MIN_BLOCKS;
 	unsigned int blocks;
 	struct gfs2_trans *tr = sdp->sd_log_tr;
 
@@ -557,10 +590,7 @@ static unsigned int calc_reserved(struct gfs2_sbd *sdp)
 	}
 
 	if (sdp->sd_log_committed_revoke > 0)
-		reserved += gfs2_struct2blk(sdp, sdp->sd_log_committed_revoke);
-	/* One for the overall header */
-	if (reserved)
-		reserved++;
+		reserved += gfs2_struct2blk(sdp, sdp->sd_log_committed_revoke) - 1;
 	return reserved;
 }
 
@@ -708,29 +738,8 @@ void gfs2_flush_revokes(struct gfs2_sbd *sdp)
 		max_revokes += roundup(sdp->sd_log_num_revoke - sdp->sd_ldptrs,
 				       sdp->sd_inptrs);
 	max_revokes -= sdp->sd_log_num_revoke;
-	if (!sdp->sd_log_num_revoke) {
-		atomic_dec(&sdp->sd_log_blks_free);
-		/* If no blocks have been reserved, we need to also
-		 * reserve a block for the header */
-		if (!sdp->sd_log_blks_reserved) {
-			atomic_dec(&sdp->sd_log_blks_free);
-			trace_gfs2_log_blocks(sdp, -2);
-		} else {
-			trace_gfs2_log_blocks(sdp, -1);
-		}
-	}
 	gfs2_ail1_empty(sdp, max_revokes);
 	gfs2_log_unlock(sdp);
-
-	if (!sdp->sd_log_num_revoke) {
-		atomic_inc(&sdp->sd_log_blks_free);
-		if (!sdp->sd_log_blks_reserved) {
-			atomic_inc(&sdp->sd_log_blks_free);
-			trace_gfs2_log_blocks(sdp, 2);
-		} else {
-			trace_gfs2_log_blocks(sdp, 1);
-		}
-	}
 }
 
 /**
@@ -843,6 +852,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
 	gfs2_log_incr_head(sdp);
 	log_flush_wait(sdp);
 	log_pull_tail(sdp);
+	gfs2_log_update_head(sdp);
 }
 
 /**
@@ -942,10 +952,14 @@ static void trans_drain(struct gfs2_trans *tr)
 void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 {
 	struct gfs2_trans *tr = NULL;
+	unsigned int reserved_blocks = 0, used_blocks = 0;
 	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
+	unsigned int first_log_head;
 
 	down_write(&sdp->sd_log_flush_lock);
+	trace_gfs2_log_flush(sdp, 1, flags);
 
+repeat:
 	/*
 	 * Do this check while holding the log_flush_lock to prevent new
 	 * buffers from being added to the ail via gfs2_pin()
@@ -956,22 +970,41 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 	/* Log might have been flushed while we waited for the flush lock */
 	if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags))
 		goto out;
-	trace_gfs2_log_flush(sdp, 1, flags);
 
-	if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
-		clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+	first_log_head = sdp->sd_log_head;
+	sdp->sd_log_flush_head = first_log_head;
 
-	sdp->sd_log_flush_head = sdp->sd_log_head;
 	tr = sdp->sd_log_tr;
-	if (tr) {
-		sdp->sd_log_tr = NULL;
-		tr->tr_first = sdp->sd_log_flush_head;
-		if (unlikely (state == SFS_FROZEN))
-			if (gfs2_assert_withdraw_delayed(sdp,
-			       !tr->tr_num_buf_new && !tr->tr_num_databuf_new))
-				goto out_withdraw;
+	if (tr || sdp->sd_log_num_revoke) {
+		if (reserved_blocks)
+			gfs2_log_release(sdp, reserved_blocks);
+		reserved_blocks = sdp->sd_log_blks_reserved;
+		if (tr) {
+			sdp->sd_log_tr = NULL;
+			tr->tr_first = first_log_head;
+			if (unlikely (state == SFS_FROZEN))
+				if (gfs2_assert_withdraw_delayed(sdp,
+				       !tr->tr_num_buf_new && !tr->tr_num_databuf_new))
+					goto out_withdraw;
+		}
+	} else if (!reserved_blocks) {
+		unsigned int taboo_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS;
+
+		reserved_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS;
+		if (current == sdp->sd_logd_process)
+			taboo_blocks = 0;
+
+		if (!__gfs2_log_try_reserve(sdp, reserved_blocks, taboo_blocks)) {
+			up_write(&sdp->sd_log_flush_lock);
+			__gfs2_log_reserve(sdp, reserved_blocks, taboo_blocks);
+			down_write(&sdp->sd_log_flush_lock);
+			goto repeat;
+		}
 	}
 
+	if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
+		clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+
 	if (unlikely(state == SFS_FROZEN))
 		if (gfs2_assert_withdraw_delayed(sdp, !sdp->sd_log_num_revoke))
 			goto out_withdraw;
@@ -993,8 +1026,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 		log_flush_wait(sdp);
 		log_write_header(sdp, flags);
 	} else if (sdp->sd_log_tail != sdp->sd_log_flush_tail && !sdp->sd_log_idle) {
-		atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
-		trace_gfs2_log_blocks(sdp, -1);
 		log_write_header(sdp, flags);
 	}
 	if (gfs2_withdrawn(sdp))
@@ -1002,7 +1033,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 	lops_after_commit(sdp, tr);
 
 	gfs2_log_lock(sdp);
-	gfs2_log_update_head(sdp);
 	sdp->sd_log_blks_reserved = 0;
 	sdp->sd_log_committed_revoke = 0;
 
@@ -1019,10 +1049,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 			empty_ail1_list(sdp);
 			if (gfs2_withdrawn(sdp))
 				goto out_withdraw;
-			atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
-			trace_gfs2_log_blocks(sdp, -1);
 			log_write_header(sdp, flags);
-			gfs2_log_update_head(sdp);
 		}
 		if (flags & (GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
 			     GFS2_LOG_HEAD_FLUSH_FREEZE))
@@ -1032,12 +1059,17 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 	}
 
 out_end:
-	trace_gfs2_log_flush(sdp, 0, flags);
+	used_blocks = log_distance(sdp, sdp->sd_log_flush_head, first_log_head);
+	if (gfs2_assert_withdraw_delayed(sdp, used_blocks <= reserved_blocks))
+		goto out;
 out:
+	if (used_blocks != reserved_blocks)
+		gfs2_log_release(sdp, reserved_blocks - used_blocks);
 	up_write(&sdp->sd_log_flush_lock);
 	gfs2_trans_free(sdp, tr);
 	if (gfs2_withdrawing(sdp))
 		gfs2_withdraw(sdp);
+	trace_gfs2_log_flush(sdp, 0, flags);
 	return;
 
 out_withdraw:
@@ -1150,15 +1182,11 @@ static void gfs2_log_shutdown(struct gfs2_sbd *sdp)
 	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
 	gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
 
-	sdp->sd_log_flush_head = sdp->sd_log_head;
-
 	log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT | GFS2_LFC_SHUTDOWN);
+	log_pull_tail(sdp);
 
 	gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
 	gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
-
-	gfs2_log_update_head(sdp);
-	sdp->sd_log_tail = sdp->sd_log_head;
 }
 
 static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
@@ -1213,7 +1241,7 @@ int gfs2_logd(void *data)
 		if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
 			gfs2_ail1_empty(sdp, 0);
 			gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
-				       GFS2_LFC_LOGD_JFLUSH_REQD);
+						  GFS2_LFC_LOGD_JFLUSH_REQD);
 		}
 
 		if (gfs2_ail_flush_reqd(sdp)) {
@@ -1221,7 +1249,7 @@ int gfs2_logd(void *data)
 			gfs2_ail1_wait(sdp);
 			gfs2_ail1_empty(sdp, 0);
 			gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
-				       GFS2_LFC_LOGD_AIL_FLUSH_REQD);
+						  GFS2_LFC_LOGD_AIL_FLUSH_REQD);
 		}
 
 		t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index b36a3539f352..e7f4a8d6be64 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -13,6 +13,13 @@
 #include "incore.h"
 #include "inode.h"
 
+/*
+ * The minimum amount of log space required for a log flush is one block for
+ * revokes and one block for the log header.  Log flushes other than
+ * GFS2_LOG_HEAD_FLUSH_NORMAL may write one or two more log headers.
+ */
+#define GFS2_LOG_FLUSH_MIN_BLOCKS 4
+
 /**
  * gfs2_log_lock - acquire the right to mess with the log manager
  * @sdp: the filesystem
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index f73d6b8f3b53..231ca1a41a73 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -53,7 +53,7 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
 	tr->tr_ip = ip;
 	tr->tr_blocks = blocks;
 	tr->tr_revokes = revokes;
-	tr->tr_reserved = 1;
+	tr->tr_reserved = GFS2_LOG_FLUSH_MIN_BLOCKS;
 	if (blocks) {
 		/*
 		 * The reserved blocks are either used for data or metadata.
@@ -63,7 +63,7 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
 		tr->tr_reserved += blocks + 1 + DIV_ROUND_UP(blocks - 1, databuf_limit(sdp));
 	}
 	if (revokes)
-		tr->tr_reserved += gfs2_struct2blk(sdp, revokes);
+		tr->tr_reserved += gfs2_struct2blk(sdp, revokes) - 1;
 	INIT_LIST_HEAD(&tr->tr_databuf);
 	INIT_LIST_HEAD(&tr->tr_buf);
 	INIT_LIST_HEAD(&tr->tr_list);
-- 
cgit v1.2.3-70-g09d2


From 2129b4288852cf872c42870c7f6e813ce0611199 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 17 Dec 2020 16:14:30 +0100
Subject: gfs2: Per-revoke accounting in transactions

In the log, revokes are stored as a revoke descriptor (struct
gfs2_log_descriptor), followed by zero or more additional revoke blocks
(struct gfs2_meta_header).  On filesystems with a blocksize of 4k, the
revoke descriptor contains up to 503 revokes, and the metadata blocks
contain up to 509 revokes each.  We've so far been reserving space for
revokes in transactions in block granularity, so a lot more space than
necessary was being allocated and then released again.

This patch switches to assigning revokes to transactions individually
instead.  Initially, space for the revoke descriptor is reserved and
handed out to transactions.  When more revokes than that are reserved,
additional revoke blocks are added.  When the log is flushed, the space
for the additional revoke blocks is released, but we keep the space for
the revoke descriptor block allocated.

Transactions may still reserve more revokes than they will actually need
in the end, but now we won't overshoot the target as much, and by only
returning the space for excess revokes at log flush time, we further
reduce the amount of contention between processes.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glops.c      |   8 +---
 fs/gfs2/incore.h     |   3 +-
 fs/gfs2/log.c        | 114 ++++++++++++++++++++++++++++++++++++++++-----------
 fs/gfs2/log.h        |   6 ++-
 fs/gfs2/lops.c       |   1 -
 fs/gfs2/ops_fstype.c |   7 ++++
 fs/gfs2/trans.c      |  34 +++++++++++----
 7 files changed, 131 insertions(+), 42 deletions(-)

(limited to 'fs/gfs2')

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index a067924341e3..8e32d569c8bf 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -136,19 +136,15 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
 {
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 	unsigned int revokes = atomic_read(&gl->gl_ail_count);
-	unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
 	int ret;
 
 	if (!revokes)
 		return;
 
-	while (revokes > max_revokes)
-		max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
-
-	ret = gfs2_trans_begin(sdp, 0, max_revokes);
+	ret = gfs2_trans_begin(sdp, 0, revokes);
 	if (ret)
 		return;
-	__gfs2_ail_flush(gl, fsync, max_revokes);
+	__gfs2_ail_flush(gl, fsync, revokes);
 	gfs2_trans_end(sdp);
 	gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
 		       GFS2_LFC_AIL_FLUSH);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 51656b053170..30f67c3c87b0 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -506,7 +506,6 @@ struct gfs2_trans {
 	unsigned int tr_num_buf_rm;
 	unsigned int tr_num_databuf_rm;
 	unsigned int tr_num_revoke;
-	unsigned int tr_num_revoke_rm;
 
 	struct list_head tr_list;
 	struct list_head tr_databuf;
@@ -821,7 +820,6 @@ struct gfs2_sbd {
 
 	struct gfs2_trans *sd_log_tr;
 	unsigned int sd_log_blks_reserved;
-	int sd_log_committed_revoke;
 
 	atomic_t sd_log_pinned;
 	unsigned int sd_log_num_revoke;
@@ -834,6 +832,7 @@ struct gfs2_sbd {
 	atomic_t sd_log_thresh2;
 	atomic_t sd_log_blks_free;
 	atomic_t sd_log_blks_needed;
+	atomic_t sd_log_revokes_available;
 	wait_queue_head_t sd_log_waitq;
 	wait_queue_head_t sd_logd_waitq;
 
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 913150e60f52..68e7afdd19a8 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -429,6 +429,32 @@ bool gfs2_log_is_empty(struct gfs2_sbd *sdp) {
 	return atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks;
 }
 
+static bool __gfs2_log_try_reserve_revokes(struct gfs2_sbd *sdp, unsigned int revokes)
+{
+	unsigned int available;
+
+	available = atomic_read(&sdp->sd_log_revokes_available);
+	while (available >= revokes) {
+		if (atomic_try_cmpxchg(&sdp->sd_log_revokes_available,
+				       &available, available - revokes))
+			return true;
+	}
+	return false;
+}
+
+/**
+ * gfs2_log_release_revokes - Release a given number of revokes
+ * @sdp: The GFS2 superblock
+ * @revokes: The number of revokes to release
+ *
+ * sdp->sd_log_flush_lock must be held.
+ */
+void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes)
+{
+	if (revokes)
+		atomic_add(revokes, &sdp->sd_log_revokes_available);
+}
+
 /**
  * gfs2_log_release - Release a given number of log blocks
  * @sdp: The GFS2 superblock
@@ -519,15 +545,59 @@ reserved:
 }
 
 /**
- * gfs2_log_reserve - Make a log reservation
+ * gfs2_log_try_reserve - Try to make a log reservation
  * @sdp: The GFS2 superblock
- * @blks: The number of blocks to reserve
+ * @tr: The transaction
+ * @extra_revokes: The number of additional revokes reserved (output)
+ *
+ * This is similar to gfs2_log_reserve, but sdp->sd_log_flush_lock must be
+ * held for correct revoke accounting.
  */
 
-void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
+bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+			  unsigned int *extra_revokes)
 {
+	unsigned int blks = tr->tr_reserved;
+	unsigned int revokes = tr->tr_revokes;
+	unsigned int revoke_blks = 0;
+
+	*extra_revokes = 0;
+	if (revokes && !__gfs2_log_try_reserve_revokes(sdp, revokes)) {
+		revoke_blks = DIV_ROUND_UP(revokes, sdp->sd_inptrs);
+		*extra_revokes = revoke_blks * sdp->sd_inptrs - revokes;
+		blks += revoke_blks;
+	}
+	if (!blks)
+		return true;
 	if (__gfs2_log_try_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS))
-		return;
+		return true;
+	if (!revoke_blks)
+		gfs2_log_release_revokes(sdp, revokes);
+	return false;
+}
+
+/**
+ * gfs2_log_reserve - Make a log reservation
+ * @sdp: The GFS2 superblock
+ * @tr: The transaction
+ * @extra_revokes: The number of additional revokes reserved (output)
+ *
+ * sdp->sd_log_flush_lock must not be held.
+ */
+
+void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+		      unsigned int *extra_revokes)
+{
+	unsigned int blks = tr->tr_reserved;
+	unsigned int revokes = tr->tr_revokes;
+	unsigned int revoke_blks = 0;
+
+	*extra_revokes = 0;
+	if (revokes) {
+		revoke_blks = DIV_ROUND_UP(revokes, sdp->sd_inptrs);
+		*extra_revokes = revoke_blks * sdp->sd_inptrs - revokes;
+		blks += revoke_blks;
+	}
 	__gfs2_log_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS);
 }
 
@@ -588,9 +658,6 @@ static unsigned int calc_reserved(struct gfs2_sbd *sdp)
 		blocks = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
 		reserved += blocks + DIV_ROUND_UP(blocks, databuf_limit(sdp));
 	}
-
-	if (sdp->sd_log_committed_revoke > 0)
-		reserved += gfs2_struct2blk(sdp, sdp->sd_log_committed_revoke) - 1;
 	return reserved;
 }
 
@@ -730,14 +797,9 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl)
 void gfs2_flush_revokes(struct gfs2_sbd *sdp)
 {
 	/* number of revokes we still have room for */
-	unsigned int max_revokes;
+	unsigned int max_revokes = atomic_read(&sdp->sd_log_revokes_available);
 
 	gfs2_log_lock(sdp);
-	max_revokes = sdp->sd_ldptrs;
-	if (sdp->sd_log_num_revoke > sdp->sd_ldptrs)
-		max_revokes += roundup(sdp->sd_log_num_revoke - sdp->sd_ldptrs,
-				       sdp->sd_inptrs);
-	max_revokes -= sdp->sd_log_num_revoke;
 	gfs2_ail1_empty(sdp, max_revokes);
 	gfs2_log_unlock(sdp);
 }
@@ -955,6 +1017,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 	unsigned int reserved_blocks = 0, used_blocks = 0;
 	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
 	unsigned int first_log_head;
+	unsigned int reserved_revokes = 0;
 
 	down_write(&sdp->sd_log_flush_lock);
 	trace_gfs2_log_flush(sdp, 1, flags);
@@ -979,13 +1042,15 @@ repeat:
 		if (reserved_blocks)
 			gfs2_log_release(sdp, reserved_blocks);
 		reserved_blocks = sdp->sd_log_blks_reserved;
+		reserved_revokes = sdp->sd_log_num_revoke;
 		if (tr) {
 			sdp->sd_log_tr = NULL;
 			tr->tr_first = first_log_head;
-			if (unlikely (state == SFS_FROZEN))
+			if (unlikely (state == SFS_FROZEN)) {
 				if (gfs2_assert_withdraw_delayed(sdp,
 				       !tr->tr_num_buf_new && !tr->tr_num_databuf_new))
 					goto out_withdraw;
+			}
 		}
 	} else if (!reserved_blocks) {
 		unsigned int taboo_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS;
@@ -1000,17 +1065,15 @@ repeat:
 			down_write(&sdp->sd_log_flush_lock);
 			goto repeat;
 		}
+		BUG_ON(sdp->sd_log_num_revoke);
 	}
 
 	if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
 		clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 
 	if (unlikely(state == SFS_FROZEN))
-		if (gfs2_assert_withdraw_delayed(sdp, !sdp->sd_log_num_revoke))
+		if (gfs2_assert_withdraw_delayed(sdp, !reserved_revokes))
 			goto out_withdraw;
-	if (gfs2_assert_withdraw_delayed(sdp,
-			sdp->sd_log_num_revoke == sdp->sd_log_committed_revoke))
-		goto out_withdraw;
 
 	gfs2_ordered_write(sdp);
 	if (gfs2_withdrawn(sdp))
@@ -1034,7 +1097,6 @@ repeat:
 
 	gfs2_log_lock(sdp);
 	sdp->sd_log_blks_reserved = 0;
-	sdp->sd_log_committed_revoke = 0;
 
 	spin_lock(&sdp->sd_ail_lock);
 	if (tr && !list_empty(&tr->tr_ail1_list)) {
@@ -1060,11 +1122,16 @@ repeat:
 
 out_end:
 	used_blocks = log_distance(sdp, sdp->sd_log_flush_head, first_log_head);
-	if (gfs2_assert_withdraw_delayed(sdp, used_blocks <= reserved_blocks))
-		goto out;
+	reserved_revokes += atomic_read(&sdp->sd_log_revokes_available);
+	atomic_set(&sdp->sd_log_revokes_available, sdp->sd_ldptrs);
+	gfs2_assert_withdraw(sdp, reserved_revokes % sdp->sd_inptrs == sdp->sd_ldptrs);
+	if (reserved_revokes > sdp->sd_ldptrs)
+		reserved_blocks += (reserved_revokes - sdp->sd_ldptrs) / sdp->sd_inptrs;
 out:
-	if (used_blocks != reserved_blocks)
+	if (used_blocks != reserved_blocks) {
+		gfs2_assert_withdraw_delayed(sdp, used_blocks < reserved_blocks);
 		gfs2_log_release(sdp, reserved_blocks - used_blocks);
+	}
 	up_write(&sdp->sd_log_flush_lock);
 	gfs2_trans_free(sdp, tr);
 	if (gfs2_withdrawing(sdp))
@@ -1105,8 +1172,8 @@ static void gfs2_merge_trans(struct gfs2_sbd *sdp, struct gfs2_trans *new)
 	old->tr_num_databuf_new	+= new->tr_num_databuf_new;
 	old->tr_num_buf_rm	+= new->tr_num_buf_rm;
 	old->tr_num_databuf_rm	+= new->tr_num_databuf_rm;
+	old->tr_revokes		+= new->tr_revokes;
 	old->tr_num_revoke	+= new->tr_num_revoke;
-	old->tr_num_revoke_rm	+= new->tr_num_revoke_rm;
 
 	list_splice_tail_init(&new->tr_databuf, &old->tr_databuf);
 	list_splice_tail_init(&new->tr_buf, &old->tr_buf);
@@ -1133,7 +1200,6 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 		set_bit(TR_ATTACHED, &tr->tr_flags);
 	}
 
-	sdp->sd_log_committed_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
 	reserved = calc_reserved(sdp);
 	maxres = sdp->sd_log_blks_reserved + tr->tr_reserved;
 	gfs2_assert_withdraw(sdp, maxres >= reserved);
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index e7f4a8d6be64..eea58015710e 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -74,8 +74,12 @@ extern void gfs2_ordered_del_inode(struct gfs2_inode *ip);
 extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct);
 extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
 extern bool gfs2_log_is_empty(struct gfs2_sbd *sdp);
+extern void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes);
 extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
-extern void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
+extern bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+				 unsigned int *extra_revokes);
+extern void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+			     unsigned int *extra_revokes);
 extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
 				  u64 seq, u32 tail, u32 lblock, u32 flags,
 				  int op_flags);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 8658ebbcb4a9..dfe8537cb88e 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -857,7 +857,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 		sdp->sd_log_num_revoke--;
 
 		if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
-
 			gfs2_log_write_page(sdp, page);
 			page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
 			mh = page_address(page);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 986dc2ebebf0..316ca5fc99e8 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -315,6 +315,13 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
 				     sizeof(struct gfs2_meta_header))
 		* GFS2_NBBY; /* not the rgrp bitmap, subsequent bitmaps only */
 
+	/*
+	 * We always keep at least one block reserved for revokes in
+	 * transactions.  This greatly simplifies allocating additional
+	 * revoke blocks.
+	 */
+	atomic_set(&sdp->sd_log_revokes_available, sdp->sd_ldptrs);
+
 	/* Compute maximum reservation required to add a entry to a directory */
 
 	hash_blocks = DIV_ROUND_UP(sizeof(u64) * BIT(GFS2_DIR_MAX_DEPTH),
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 231ca1a41a73..ab96cf0bf26b 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -31,16 +31,18 @@ static void gfs2_print_trans(struct gfs2_sbd *sdp, const struct gfs2_trans *tr)
 	fs_warn(sdp, "blocks=%u revokes=%u reserved=%u touched=%u\n",
 		tr->tr_blocks, tr->tr_revokes, tr->tr_reserved,
 		test_bit(TR_TOUCHED, &tr->tr_flags));
-	fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u/%u\n",
+	fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u\n",
 		tr->tr_num_buf_new, tr->tr_num_buf_rm,
 		tr->tr_num_databuf_new, tr->tr_num_databuf_rm,
-		tr->tr_num_revoke, tr->tr_num_revoke_rm);
+		tr->tr_num_revoke);
 }
 
 int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
 		       unsigned int blocks, unsigned int revokes,
 		       unsigned long ip)
 {
+	unsigned int extra_revokes;
+
 	if (current->journal_info) {
 		gfs2_print_trans(sdp, current->journal_info);
 		BUG();
@@ -62,8 +64,6 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
 		 */
 		tr->tr_reserved += blocks + 1 + DIV_ROUND_UP(blocks - 1, databuf_limit(sdp));
 	}
-	if (revokes)
-		tr->tr_reserved += gfs2_struct2blk(sdp, revokes) - 1;
 	INIT_LIST_HEAD(&tr->tr_databuf);
 	INIT_LIST_HEAD(&tr->tr_buf);
 	INIT_LIST_HEAD(&tr->tr_list);
@@ -75,10 +75,26 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
 
 	sb_start_intwrite(sdp->sd_vfs);
 
-	gfs2_log_reserve(sdp, tr->tr_reserved);
+	/*
+	 * Try the reservations under sd_log_flush_lock to prevent log flushes
+	 * from creating inconsistencies between the number of allocated and
+	 * reserved revokes.  If that fails, do a full-block allocation outside
+	 * of the lock to avoid stalling log flushes.  Then, allot the
+	 * appropriate number of blocks to revokes, use as many revokes locally
+	 * as needed, and "release" the surplus into the revokes pool.
+	 */
 
 	down_read(&sdp->sd_log_flush_lock);
+	if (gfs2_log_try_reserve(sdp, tr, &extra_revokes))
+		goto reserved;
+	up_read(&sdp->sd_log_flush_lock);
+	gfs2_log_reserve(sdp, tr, &extra_revokes);
+	down_read(&sdp->sd_log_flush_lock);
+
+reserved:
+	gfs2_log_release_revokes(sdp, extra_revokes);
 	if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
+		gfs2_log_release_revokes(sdp, tr->tr_revokes);
 		up_read(&sdp->sd_log_flush_lock);
 		gfs2_log_release(sdp, tr->tr_reserved);
 		sb_end_intwrite(sdp->sd_vfs);
@@ -113,14 +129,17 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
 	current->journal_info = NULL;
 
 	if (!test_bit(TR_TOUCHED, &tr->tr_flags)) {
-		gfs2_log_release(sdp, tr->tr_reserved);
+		gfs2_log_release_revokes(sdp, tr->tr_revokes);
 		up_read(&sdp->sd_log_flush_lock);
+		gfs2_log_release(sdp, tr->tr_reserved);
 		if (!test_bit(TR_ONSTACK, &tr->tr_flags))
 			gfs2_trans_free(sdp, tr);
 		sb_end_intwrite(sdp->sd_vfs);
 		return;
 	}
 
+	gfs2_log_release_revokes(sdp, tr->tr_revokes - tr->tr_num_revoke);
+
 	nbuf = tr->tr_num_buf_new + tr->tr_num_databuf_new;
 	nbuf -= tr->tr_num_buf_rm;
 	nbuf -= tr->tr_num_databuf_rm;
@@ -278,7 +297,6 @@ void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
 void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
 {
 	struct gfs2_bufdata *bd, *tmp;
-	struct gfs2_trans *tr = current->journal_info;
 	unsigned int n = len;
 
 	gfs2_log_lock(sdp);
@@ -290,7 +308,7 @@ void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
 			if (bd->bd_gl)
 				gfs2_glock_remove_revoke(bd->bd_gl);
 			kmem_cache_free(gfs2_bufdata_cachep, bd);
-			tr->tr_num_revoke_rm++;
+			gfs2_log_release_revokes(sdp, 1);
 			if (--n == 0)
 				break;
 		}
-- 
cgit v1.2.3-70-g09d2