From 0d0d110720d7960b77c03c9f2597faaff4b484ae Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 16 Sep 2013 14:52:00 +0200 Subject: GFS2: d_splice_alias() can't return error unless it was given an IS_ERR(inode), which isn't the case here. So clean up the unnecessary error handling in gfs2_create_inode(). This paves the way for real fixes (hence the stable Cc). Signed-off-by: Miklos Szeredi Signed-off-by: Steven Whitehouse Cc: stable@vger.kernel.org --- fs/gfs2/inode.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 64915eeae5a7..6d7f976aa328 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -584,7 +584,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (!IS_ERR(inode)) { d = d_splice_alias(inode, dentry); error = 0; - if (file && !IS_ERR(d)) { + if (file) { if (d == NULL) d = dentry; if (S_ISREG(inode->i_mode)) @@ -593,8 +593,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = finish_no_open(file, d); } gfs2_glock_dq_uninit(ghs); - if (IS_ERR(d)) - return PTR_ERR(d); return error; } else if (error != -ENOENT) { goto fail_gunlock; -- cgit v1.2.3-70-g09d2 From 6aa7640f306a1dd0923d29d9190c5686907d5140 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 11 Sep 2013 13:44:01 -0500 Subject: GFS2: optimize rbm_from_block wrt bi_start In function gfs2_rbm_from_block, it starts by checking if the block falls within the first bitmap. It does so by checking if the rbm's offset is less than (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY. However, the first bitmap will always have bi_start==0. Therefore this is an unnecessary calculation in a function that gets called billions of times. This patch removes the reference to bi_start. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 69317435faa7..7a6fa03bb32a 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -262,7 +262,7 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) rbm->bi = rbm->rgd->rd_bits; rbm->offset = (u32)(rblock); /* Check if the block is within the first block */ - if (rbm->offset < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) + if (rbm->offset < rbm->bi->bi_len * GFS2_NBBY) return 0; /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */ -- cgit v1.2.3-70-g09d2 From 7e230f5774336c5c180d8aeab0e2cb1326c73fa3 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 11 Sep 2013 13:44:02 -0500 Subject: GFS2: introduce bi_blocks for optimization This patch introduces a new field in the bitmap structure called bi_blocks. Its purpose is to save us from constantly multiplying bi_len by the constant GFS2_NBBY. It also paves the way for more optimization in a future patch. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 1 + fs/gfs2/rgrp.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 26aabd7caba7..f1a3243dfaf2 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -71,6 +71,7 @@ struct gfs2_bitmap { u32 bi_offset; u32 bi_start; u32 bi_len; + u32 bi_blocks; }; struct gfs2_rgrpd { diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 7a6fa03bb32a..7d64a27683d9 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -262,7 +262,7 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) rbm->bi = rbm->rgd->rd_bits; rbm->offset = (u32)(rblock); /* Check if the block is within the first block */ - if (rbm->offset < rbm->bi->bi_len * GFS2_NBBY) + if (rbm->offset < rbm->bi->bi_blocks) return 0; /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */ @@ -743,18 +743,21 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) bi->bi_offset = sizeof(struct gfs2_rgrp); bi->bi_start = 0; bi->bi_len = bytes; + bi->bi_blocks = bytes * GFS2_NBBY; /* header block */ } else if (x == 0) { bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp); bi->bi_offset = sizeof(struct gfs2_rgrp); bi->bi_start = 0; bi->bi_len = bytes; + bi->bi_blocks = bytes * GFS2_NBBY; /* last block */ } else if (x + 1 == length) { bytes = bytes_left; bi->bi_offset = sizeof(struct gfs2_meta_header); bi->bi_start = rgd->rd_bitbytes - bytes_left; bi->bi_len = bytes; + bi->bi_blocks = bytes * GFS2_NBBY; /* other blocks */ } else { bytes = sdp->sd_sb.sb_bsize - @@ -762,6 +765,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) bi->bi_offset = sizeof(struct gfs2_meta_header); bi->bi_start = rgd->rd_bitbytes - bytes_left; bi->bi_len = bytes; + bi->bi_blocks = bytes * GFS2_NBBY; } bytes_left -= bytes; -- cgit v1.2.3-70-g09d2 From b8708905199a85eebbd820f98d18e045c32077bf Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 11 Sep 2013 13:44:05 -0500 Subject: GFS2: Do not reset flags on active reservations When we used try locks for rgrps on block allocations, it was important to clear the flags field so that we used a blocking hold on the glock. Now that we're not doing try locks, clearing flags is unnecessary, and a waste of time. In fact, it's probably doing the wrong thing because it clears the GL_SKIP bit that was set for the lvb tracking purposes. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 7d64a27683d9..8e5003820aa7 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1825,7 +1825,6 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) return -EINVAL; if (gfs2_rs_active(rs)) { begin = rs->rs_rbm.rgd; - flags = 0; /* Yoda: Do or do not. There is no try */ } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { rs->rs_rbm.rgd = begin = ip->i_rgd; } else { -- cgit v1.2.3-70-g09d2 From e579ed4f446e64748a2d26eed8f8b28f728495bd Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 17 Sep 2013 13:12:15 -0400 Subject: GFS2: Introduce rbm field bii This is a respin of the original patch. As Steve pointed out, the introduction of field bii makes it easy to eliminate bi itself. This revised patch does just that, replacing bi with bii. This patch adds a new field to the rbm structure, called bii, which is an index into the array of bitmaps for an rgrp. This replaces *bi which was a pointer to the bitmap. This is being done for further optimizations. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 12 +++++-- fs/gfs2/rgrp.c | 107 +++++++++++++++++++++++++++++-------------------------- 2 files changed, 65 insertions(+), 54 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index f1a3243dfaf2..8c8f110d8e35 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -102,19 +102,25 @@ struct gfs2_rgrpd { struct gfs2_rbm { struct gfs2_rgrpd *rgd; - struct gfs2_bitmap *bi; /* Bitmap must belong to the rgd */ u32 offset; /* The offset is bitmap relative */ + int bii; /* Bitmap index */ }; +static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm) +{ + return rbm->rgd->rd_bits + rbm->bii; +} + static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm) { - return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + rbm->offset; + return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) + + rbm->offset; } static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1, const struct gfs2_rbm *rbm2) { - return (rbm1->rgd == rbm2->rgd) && (rbm1->bi == rbm2->bi) && + return (rbm1->rgd == rbm2->rgd) && (rbm1->bii == rbm2->bii) && (rbm1->offset == rbm2->offset); } diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 8e5003820aa7..dd3c4d3d7f41 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -81,11 +81,12 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, unsigned char new_state) { unsigned char *byte1, *byte2, *end, cur_state; - unsigned int buflen = rbm->bi->bi_len; + struct gfs2_bitmap *bi = rbm_bi(rbm); + unsigned int buflen = bi->bi_len; const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; - byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); - end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen; + byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY); + end = bi->bi_bh->b_data + bi->bi_offset + buflen; BUG_ON(byte1 >= end); @@ -95,18 +96,17 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, " "new_state=%d\n", rbm->offset, cur_state, new_state); printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n", - (unsigned long long)rbm->rgd->rd_addr, - rbm->bi->bi_start); + (unsigned long long)rbm->rgd->rd_addr, bi->bi_start); printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n", - rbm->bi->bi_offset, rbm->bi->bi_len); + bi->bi_offset, bi->bi_len); dump_stack(); gfs2_consist_rgrpd(rbm->rgd); return; } *byte1 ^= (cur_state ^ new_state) << bit; - if (do_clone && rbm->bi->bi_clone) { - byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); + if (do_clone && bi->bi_clone) { + byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY); cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; *byte2 ^= (cur_state ^ new_state) << bit; } @@ -121,7 +121,8 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) { - const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset; + struct gfs2_bitmap *bi = rbm_bi(rbm); + const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset; const u8 *byte; unsigned int bit; @@ -252,25 +253,23 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) { u64 rblock = block - rbm->rgd->rd_data0; - u32 x; if (WARN_ON_ONCE(rblock > UINT_MAX)) return -EINVAL; if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) return -E2BIG; - rbm->bi = rbm->rgd->rd_bits; + rbm->bii = 0; rbm->offset = (u32)(rblock); /* Check if the block is within the first block */ - if (rbm->offset < rbm->bi->bi_blocks) + if (rbm->offset < rbm_bi(rbm)->bi_blocks) return 0; /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */ rbm->offset += (sizeof(struct gfs2_rgrp) - sizeof(struct gfs2_meta_header)) * GFS2_NBBY; - x = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap; - rbm->offset -= x * rbm->rgd->rd_sbd->sd_blocks_per_bitmap; - rbm->bi += x; + rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap; + rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap; return 0; } @@ -328,6 +327,7 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) u32 chunk_size; u8 *ptr, *start, *end; u64 block; + struct gfs2_bitmap *bi; if (n_unaligned && gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) @@ -336,11 +336,12 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) n_unaligned = len & 3; /* Start is now byte aligned */ while (len > 3) { - start = rbm.bi->bi_bh->b_data; - if (rbm.bi->bi_clone) - start = rbm.bi->bi_clone; - end = start + rbm.bi->bi_bh->b_size; - start += rbm.bi->bi_offset; + bi = rbm_bi(&rbm); + start = bi->bi_bh->b_data; + if (bi->bi_clone) + start = bi->bi_clone; + end = start + bi->bi_bh->b_size; + start += bi->bi_offset; BUG_ON(rbm.offset & 3); start += (rbm.offset / GFS2_NBBY); bytes = min_t(u32, len / GFS2_NBBY, (end - start)); @@ -605,11 +606,13 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) RB_CLEAR_NODE(&rs->rs_node); if (rs->rs_free) { + struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm); + /* return reserved blocks to the rgrp */ BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; rs->rs_free = 0; - clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags); + clear_bit(GBF_FULL, &bi->bi_flags); smp_mb__after_clear_bit(); } } @@ -1558,14 +1561,14 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, const struct gfs2_inode *ip, bool nowrap) { struct buffer_head *bh; - struct gfs2_bitmap *initial_bi; + int initial_bii; u32 initial_offset; u32 offset; u8 *buffer; - int index; int n = 0; int iters = rbm->rgd->rd_length; int ret; + struct gfs2_bitmap *bi; /* If we are not starting at the beginning of a bitmap, then we * need to add one to the bitmap count to ensure that we search @@ -1575,52 +1578,53 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, iters++; while(1) { - if (test_bit(GBF_FULL, &rbm->bi->bi_flags) && + bi = rbm_bi(rbm); + if (test_bit(GBF_FULL, &bi->bi_flags) && (state == GFS2_BLKST_FREE)) goto next_bitmap; - bh = rbm->bi->bi_bh; - buffer = bh->b_data + rbm->bi->bi_offset; + bh = bi->bi_bh; + buffer = bh->b_data + bi->bi_offset; WARN_ON(!buffer_uptodate(bh)); - if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone) - buffer = rbm->bi->bi_clone + rbm->bi->bi_offset; + if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) + buffer = bi->bi_clone + bi->bi_offset; initial_offset = rbm->offset; - offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state); + offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state); if (offset == BFITNOENT) goto bitmap_full; rbm->offset = offset; if (ip == NULL) return 0; - initial_bi = rbm->bi; + initial_bii = rbm->bii; ret = gfs2_reservation_check_and_update(rbm, ip, minext); if (ret == 0) return 0; if (ret > 0) { - n += (rbm->bi - initial_bi); + n += (rbm->bii - initial_bii); goto next_iter; } if (ret == -E2BIG) { - index = 0; + rbm->bii = 0; rbm->offset = 0; - n += (rbm->bi - initial_bi); + n += (rbm->bii - initial_bii); goto res_covered_end_of_rgrp; } return ret; bitmap_full: /* Mark bitmap as full and fall through */ - if ((state == GFS2_BLKST_FREE) && initial_offset == 0) - set_bit(GBF_FULL, &rbm->bi->bi_flags); + if ((state == GFS2_BLKST_FREE) && initial_offset == 0) { + struct gfs2_bitmap *bi = rbm_bi(rbm); + set_bit(GBF_FULL, &bi->bi_flags); + } next_bitmap: /* Find next bitmap in the rgrp */ rbm->offset = 0; - index = rbm->bi - rbm->rgd->rd_bits; - index++; - if (index == rbm->rgd->rd_length) - index = 0; + rbm->bii++; + if (rbm->bii == rbm->rgd->rd_length) + rbm->bii = 0; res_covered_end_of_rgrp: - rbm->bi = &rbm->rgd->rd_bits[index]; - if ((index == 0) && nowrap) + if ((rbm->bii == 0) && nowrap) break; n++; next_iter: @@ -1649,7 +1653,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip struct gfs2_inode *ip; int error; int found = 0; - struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 }; + struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 }; while (1) { down_write(&sdp->sd_log_flush_lock); @@ -1976,14 +1980,14 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, *n = 1; block = gfs2_rbm_to_block(rbm); - gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm->bi->bi_bh); + gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh); gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); block++; while (*n < elen) { ret = gfs2_rbm_from_block(&pos, block); if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) break; - gfs2_trans_add_meta(pos.rgd->rd_gl, pos.bi->bi_bh); + gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh); gfs2_setbit(&pos, true, GFS2_BLKST_USED); (*n)++; block++; @@ -2004,6 +2008,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, u32 blen, unsigned char new_state) { struct gfs2_rbm rbm; + struct gfs2_bitmap *bi; rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1); if (!rbm.rgd) { @@ -2014,15 +2019,15 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, while (blen--) { gfs2_rbm_from_block(&rbm, bstart); + bi = rbm_bi(&rbm); bstart++; - if (!rbm.bi->bi_clone) { - rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size, - GFP_NOFS | __GFP_NOFAIL); - memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset, - rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, - rbm.bi->bi_len); + if (!bi->bi_clone) { + bi->bi_clone = kmalloc(bi->bi_bh->b_size, + GFP_NOFS | __GFP_NOFAIL); + memcpy(bi->bi_clone + bi->bi_offset, + bi->bi_bh->b_data + bi->bi_offset, bi->bi_len); } - gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.bi->bi_bh); + gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh); gfs2_setbit(&rbm, false, new_state); } -- cgit v1.2.3-70-g09d2 From 149ed7f51e279916e7a7a1eef5cec50da67d7cfe Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Tue, 17 Sep 2013 13:14:35 -0400 Subject: GFS2: new function gfs2_rbm_incr Since the previous patch eliminated bi in favor of bii, this follow-on patch needed to be adjusted accordingly. Here is the revised version. This patch adds a new function, gfs2_rbm_incr, which increments an rbm structure. This is more efficient than calling gfs2_rbm_to_block, incrementing, then calling gfs2_rbm_from_block. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index dd3c4d3d7f41..285dd363199a 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -273,6 +273,32 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) return 0; } +/** + * gfs2_rbm_incr - increment an rbm structure + * @rbm: The rbm with rgd already set correctly + * + * This function takes an existing rbm structure and increments it to the next + * viable block offset. + * + * Returns: If incrementing the offset would cause the rbm to go past the + * end of the rgrp, true is returned, otherwise false. + * + */ + +static bool gfs2_rbm_incr(struct gfs2_rbm *rbm) +{ + if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */ + rbm->offset++; + return false; + } + if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */ + return true; + + rbm->offset = 0; + rbm->bii++; + return false; +} + /** * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned * @rbm: Position to search (value/result) @@ -284,7 +310,6 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) { - u64 block; u32 n; u8 res; @@ -295,8 +320,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le (*len)--; if (*len == 0) return true; - block = gfs2_rbm_to_block(rbm); - if (gfs2_rbm_from_block(rbm, block + 1)) + if (gfs2_rbm_incr(rbm)) return true; } -- cgit v1.2.3-70-g09d2 From 5ca1db41ecdeb0358b968265fadb755213558a85 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 23 Sep 2013 13:21:04 +0100 Subject: GFS2: fix dentry leaks We need to dput() the result of d_splice_alias(), unless it is passed to finish_no_open(). Edited by Steven Whitehouse in order to make it apply to the current GFS2 git tree, and taking account of a prerequisite patch which hasn't been applied. Signed-off-by: Miklos Szeredi Signed-off-by: Steven Whitehouse Cc: stable@vger.kernel.org --- fs/gfs2/inode.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 6d7f976aa328..cd58611912f5 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -585,12 +585,14 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, d = d_splice_alias(inode, dentry); error = 0; if (file) { - if (d == NULL) - d = dentry; - if (S_ISREG(inode->i_mode)) - error = finish_open(file, d, gfs2_open_common, opened); - else + if (S_ISREG(inode->i_mode)) { + WARN_ON(d != NULL); + error = finish_open(file, dentry, gfs2_open_common, opened); + } else { error = finish_no_open(file, d); + } + } else { + dput(d); } gfs2_glock_dq_uninit(ghs); return error; @@ -777,8 +779,10 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry, error = finish_open(file, dentry, gfs2_open_common, opened); gfs2_glock_dq_uninit(&gh); - if (error) + if (error) { + dput(d); return ERR_PTR(error); + } return d; } @@ -1159,14 +1163,16 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, d = __gfs2_lookup(dir, dentry, file, opened); if (IS_ERR(d)) return PTR_ERR(d); - if (d == NULL) - d = dentry; - if (d->d_inode) { + if (d != NULL) + dentry = d; + if (dentry->d_inode) { if (!(*opened & FILE_OPENED)) - return finish_no_open(file, d); + return finish_no_open(file, dentry); + dput(d); return 0; } + BUG_ON(d != NULL); if (!(flags & O_CREAT)) return -ENOENT; -- cgit v1.2.3-70-g09d2 From af5c269799feaef110e59ce55b497cdd08712b0c Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 27 Sep 2013 12:49:33 +0100 Subject: GFS2: Clean up reservation removal The reservation for an inode should be cleared when it is truncated so that we can start again at a different offset for future allocations. We could try and do better than that, by resetting the search based on where the truncation started from, but this is only a first step. In addition, there are three callers of gfs2_rs_delete() but only one of those should really be testing the value of i_writecount. While we get away with that in the other cases currently, I think it would be better if we made that test specific to the one case which requires it. Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 4 +++- fs/gfs2/file.c | 2 +- fs/gfs2/inode.c | 2 +- fs/gfs2/rgrp.c | 7 +++---- fs/gfs2/rgrp.h | 2 +- fs/gfs2/super.c | 2 +- 6 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 62a65fc448dc..21ad0f11cad4 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1279,6 +1279,7 @@ do_grow_qunlock: int gfs2_setattr_size(struct inode *inode, u64 newsize) { + struct gfs2_inode *ip = GFS2_I(inode); int ret; u64 oldsize; @@ -1294,7 +1295,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) inode_dio_wait(inode); - ret = gfs2_rs_alloc(GFS2_I(inode)); + ret = gfs2_rs_alloc(ip); if (ret) goto out; @@ -1304,6 +1305,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) goto out; } + gfs2_rs_deltree(ip->i_res); ret = do_shrink(inode, oldsize, newsize); out: put_write_access(inode); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 0621b46d474d..9ad20edc9c27 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -620,7 +620,7 @@ static int gfs2_release(struct inode *inode, struct file *file) if (!(file->f_mode & FMODE_WRITE)) return 0; - gfs2_rs_delete(ip); + gfs2_rs_delete(ip, &inode->i_writecount); return 0; } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index cd58611912f5..4b79c19100d2 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -711,7 +711,7 @@ fail_gunlock2: fail_free_inode: if (ip->i_gl) gfs2_glock_put(ip->i_gl); - gfs2_rs_delete(ip); + gfs2_rs_delete(ip, NULL); free_inode_nonrcu(inode); inode = NULL; fail_gunlock: diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 285dd363199a..d4d10fadab79 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -661,14 +661,13 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) /** * gfs2_rs_delete - delete a multi-block reservation * @ip: The inode for this reservation + * @wcount: The inode's write count, or NULL * */ -void gfs2_rs_delete(struct gfs2_inode *ip) +void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount) { - struct inode *inode = &ip->i_inode; - down_write(&ip->i_rw_mutex); - if (ip->i_res && atomic_read(&inode->i_writecount) <= 1) { + if (ip->i_res && ((wcount == NULL) || (atomic_read(wcount) <= 1))) { gfs2_rs_deltree(ip->i_res); BUG_ON(ip->i_res->rs_free); kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 5b3f4a896e6c..57ea16ba3414 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -48,7 +48,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, extern int gfs2_rs_alloc(struct gfs2_inode *ip); extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); -extern void gfs2_rs_delete(struct gfs2_inode *ip); +extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount); extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index e5639dec66c4..35da5b19c0de 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1526,7 +1526,7 @@ out_unlock: out: /* Case 3 starts here */ truncate_inode_pages(&inode->i_data, 0); - gfs2_rs_delete(ip); + gfs2_rs_delete(ip, NULL); gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); -- cgit v1.2.3-70-g09d2 From 7b9cff467144c8c62268db1b0948df089caa0999 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 2 Oct 2013 11:13:25 +0100 Subject: GFS2: Add allocation parameters structure This patch adds a structure to contain allocation parameters with the intention of future expansion of this structure. The idea is that we should be able to add more information about the allocation in the future in order to allow the allocator to make a better job of placing the requests on-disk. There is no functional difference from applying this patch. Signed-off-by: Steven Whitehouse --- fs/gfs2/aops.c | 4 +++- fs/gfs2/bmap.c | 3 ++- fs/gfs2/file.c | 8 ++++++-- fs/gfs2/incore.h | 14 ++++++++++++++ fs/gfs2/inode.c | 12 ++++++++---- fs/gfs2/quota.c | 8 ++++++-- fs/gfs2/rgrp.c | 18 +++++++++--------- fs/gfs2/rgrp.h | 2 +- fs/gfs2/xattr.c | 3 ++- 9 files changed, 51 insertions(+), 21 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 1f7d8057ea68..b7fc035a6943 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -611,12 +611,14 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); if (alloc_required) { + struct gfs2_alloc_parms ap = { .aflags = 0, }; error = gfs2_quota_lock_check(ip); if (error) goto out_unlock; requested = data_blocks + ind_blocks; - error = gfs2_inplace_reserve(ip, requested, 0); + ap.target = requested; + error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_qunlock; } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 21ad0f11cad4..fe0500c0af7a 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1216,6 +1216,7 @@ static int do_grow(struct inode *inode, u64 size) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_alloc_parms ap = { .target = 1, }; struct buffer_head *dibh; int error; int unstuff = 0; @@ -1226,7 +1227,7 @@ static int do_grow(struct inode *inode, u64 size) if (error) return error; - error = gfs2_inplace_reserve(ip, 1, 0); + error = gfs2_inplace_reserve(ip, &ap); if (error) goto do_grow_qunlock; unstuff = 1; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 9ad20edc9c27..efc078f0ee4e 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -383,6 +383,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct inode *inode = file_inode(vma->vm_file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned long last_index; u64 pos = page->index << PAGE_CACHE_SHIFT; unsigned int data_blocks, ind_blocks, rblocks; @@ -430,7 +431,8 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret) goto out_unlock; gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); - ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); + ap.target = data_blocks + ind_blocks; + ret = gfs2_inplace_reserve(ip, &ap); if (ret) goto out_quota_unlock; @@ -800,6 +802,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, struct inode *inode = file_inode(file); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned int data_blocks = 0, ind_blocks = 0, rblocks; loff_t bytes, max_bytes; int error; @@ -850,7 +853,8 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, retry: gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); - error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); + ap.target = data_blocks + ind_blocks; + error = gfs2_inplace_reserve(ip, &ap); if (error) { if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { bytes >>= 1; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 8c8f110d8e35..082c8fa7fab9 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -285,6 +285,20 @@ struct gfs2_blkreserv { unsigned int rs_qa_qd_num; }; +/* + * Allocation parameters + * @target: The number of blocks we'd ideally like to allocate + * @aflags: The flags (e.g. Orlov flag) + * + * The intent is to gradually expand this structure over time in + * order to give more information, e.g. alignment, min extent size + * to the allocation code. + */ +struct gfs2_alloc_parms { + u32 target; + u32 aflags; +}; + enum { GLF_LOCK = 1, GLF_DEMOTE = 3, diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 4b79c19100d2..5a7ca3d1d1cf 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -379,6 +379,7 @@ static void munge_mode_uid_gid(const struct gfs2_inode *dip, static int alloc_dinode(struct gfs2_inode *ip, u32 flags) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct gfs2_alloc_parms ap = { .target = RES_DINODE, .aflags = flags, }; int error; int dblocks = 1; @@ -386,7 +387,7 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags) if (error) goto out; - error = gfs2_inplace_reserve(ip, RES_DINODE, flags); + error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_quota; @@ -472,6 +473,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, struct gfs2_inode *ip, int arq) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); + struct gfs2_alloc_parms ap = { .target = sdp->sd_max_dirres, }; int error; if (arq) { @@ -479,7 +481,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, if (error) goto fail_quota_locks; - error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); + error = gfs2_inplace_reserve(dip, &ap); if (error) goto fail_quota_locks; @@ -874,11 +876,12 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, error = 0; if (alloc_required) { + struct gfs2_alloc_parms ap = { .target = sdp->sd_max_dirres, }; error = gfs2_quota_lock_check(dip); if (error) goto out_gunlock; - error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); + error = gfs2_inplace_reserve(dip, &ap); if (error) goto out_gunlock_q; @@ -1387,11 +1390,12 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, goto out_gunlock; if (alloc_required) { + struct gfs2_alloc_parms ap = { .target = sdp->sd_max_dirres, }; error = gfs2_quota_lock_check(ndip); if (error) goto out_gunlock; - error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres, 0); + error = gfs2_inplace_reserve(ndip, &ap); if (error) goto out_gunlock_q; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index db441359ee8c..8fe7a0a87c80 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -763,6 +763,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) { struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); + struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned int data_blocks, ind_blocks; struct gfs2_holder *ghs, i_gh; unsigned int qx, x; @@ -815,7 +816,8 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; reserved = 1 + (nalloc * (data_blocks + ind_blocks)); - error = gfs2_inplace_reserve(ip, reserved, 0); + ap.target = reserved; + error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_alloc; @@ -1573,10 +1575,12 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, if (gfs2_is_stuffed(ip)) alloc_required = 1; if (alloc_required) { + struct gfs2_alloc_parms ap = { .aflags = 0, }; gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), &data_blocks, &ind_blocks); blocks = 1 + data_blocks + ind_blocks; - error = gfs2_inplace_reserve(ip, blocks, 0); + ap.target = blocks; + error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_i; blocks += gfs2_rg_blocks(ip, blocks); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index d4d10fadab79..4f0984a607b3 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1422,12 +1422,12 @@ static void rs_insert(struct gfs2_inode *ip) * rg_mblk_search - find a group of multiple free blocks to form a reservation * @rgd: the resource group descriptor * @ip: pointer to the inode for which we're reserving blocks - * @requested: number of blocks required for this allocation + * @ap: the allocation parameters * */ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, - unsigned requested) + const struct gfs2_alloc_parms *ap) { struct gfs2_rbm rbm = { .rgd = rgd, }; u64 goal; @@ -1440,7 +1440,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, if (S_ISDIR(inode->i_mode)) extlen = 1; else { - extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested); + extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target); extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks); } if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen)) @@ -1831,12 +1831,12 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b /** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for - * @requested: the number of blocks to be reserved + * @ap: the allocation parameters * * Returns: errno */ -int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) +int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *begin = NULL; @@ -1848,7 +1848,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; - if (gfs2_assert_warn(sdp, requested)) + if (gfs2_assert_warn(sdp, ap->target)) return -EINVAL; if (gfs2_rs_active(rs)) { begin = rs->rs_rbm.rgd; @@ -1857,7 +1857,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) } else { rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); } - if (S_ISDIR(ip->i_inode.i_mode) && (aflags & GFS2_AF_ORLOV)) + if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV)) skip = gfs2_orlov_skip(ip); if (rs->rs_rbm.rgd == NULL) return -EBADSLT; @@ -1899,14 +1899,14 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) /* Get a reservation if we don't already have one */ if (!gfs2_rs_active(rs)) - rg_mblk_search(rs->rs_rbm.rgd, ip, requested); + rg_mblk_search(rs->rs_rbm.rgd, ip, ap); /* Skip rgrps when we can't get a reservation on first pass */ if (!gfs2_rs_active(rs) && (loops < 1)) goto check_rgrp; /* If rgrp has enough free space, use it */ - if (rs->rs_rbm.rgd->rd_free_clone >= requested) { + if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) { ip->i_rgd = rs->rs_rbm.rgd; return 0; } diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 57ea16ba3414..3a10d2ffbbe7 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -40,7 +40,7 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); #define GFS2_AF_ORLOV 1 -extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 flags); +extern int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap); extern void gfs2_inplace_release(struct gfs2_inode *ip); extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index ecd37f30ab91..8c6a6f6bdba9 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -723,6 +723,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, unsigned int blks, ea_skeleton_call_t skeleton_call, void *private) { + struct gfs2_alloc_parms ap = { .target = blks }; struct buffer_head *dibh; int error; @@ -734,7 +735,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, if (error) return error; - error = gfs2_inplace_reserve(ip, blks, 0); + error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_gunlock_q; -- cgit v1.2.3-70-g09d2 From 9e07f2cb3d7a93f4b1b18fc6e4dd6911dcba2442 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 2 Oct 2013 14:42:45 +0100 Subject: GFS2: Speed up starting point selection for block allocation When setting the starting point for block allocation, there were calls to both gfs2_rbm_to_block() and gfs2_rbm_from_block() in the common case of there being an active reservation. The gfs2_rbm_from_block() function can be quite slow, and since the two conversions were effectively a no-op, it makes sense to avoid them entirely in this case. There is no functional change here, but the code should be a bit more efficient after this patch. Signed-off-by: Steven Whitehouse --- fs/gfs2/rgrp.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 4f0984a607b3..4d83abdd5635 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -2133,6 +2133,35 @@ out: spin_unlock(&rgd->rd_rsspin); } +/** + * gfs2_set_alloc_start - Set starting point for block allocation + * @rbm: The rbm which will be set to the required location + * @ip: The gfs2 inode + * @dinode: Flag to say if allocation includes a new inode + * + * This sets the starting point from the reservation if one is active + * otherwise it falls back to guessing a start point based on the + * inode's goal block or the last allocation point in the rgrp. + */ + +static void gfs2_set_alloc_start(struct gfs2_rbm *rbm, + const struct gfs2_inode *ip, bool dinode) +{ + u64 goal; + + if (gfs2_rs_active(ip->i_res)) { + *rbm = ip->i_res->rs_rbm; + return; + } + + if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal)) + goal = ip->i_goal; + else + goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0; + + gfs2_rbm_from_block(rbm, goal); +} + /** * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode * @ip: the inode to allocate the block for @@ -2151,22 +2180,14 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, struct buffer_head *dibh; struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; unsigned int ndata; - u64 goal; u64 block; /* block, within the file system scope */ int error; - if (gfs2_rs_active(ip->i_res)) - goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm); - else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal)) - goal = ip->i_goal; - else - goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; - - gfs2_rbm_from_block(&rbm, goal); + gfs2_set_alloc_start(&rbm, ip, dinode); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false); if (error == -ENOSPC) { - gfs2_rbm_from_block(&rbm, goal); + gfs2_set_alloc_start(&rbm, ip, dinode); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false); } -- cgit v1.2.3-70-g09d2 From 26e43a15d44a02e380904ac6113fe0f2f1ddea8a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 2 Oct 2013 14:47:02 +0100 Subject: GFS2: Move gfs2_icbit_munge into quota.c This function is only called twice, and both callers are quota related, so lets move this function into quota.c and make it static. Signed-off-by: Steven Whitehouse --- fs/gfs2/quota.c | 20 ++++++++++++++++++++ fs/gfs2/util.c | 20 -------------------- fs/gfs2/util.h | 2 -- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 8fe7a0a87c80..fd1ec5243d2e 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -289,6 +289,26 @@ static void slot_hold(struct gfs2_quota_data *qd) spin_unlock(&qd_lru_lock); } +static void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, + unsigned int bit, int new_value) +{ + unsigned int c, o, b = bit; + int old_value; + + c = b / (8 * PAGE_SIZE); + b %= 8 * PAGE_SIZE; + o = b / 8; + b %= 8; + + old_value = (bitmap[c][o] & (1 << b)); + gfs2_assert_withdraw(sdp, !old_value != !new_value); + + if (new_value) + bitmap[c][o] |= 1 << b; + else + bitmap[c][o] &= ~(1 << b); +} + static void slot_put(struct gfs2_quota_data *qd) { struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 6402fb69d71b..f7109f689e61 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -268,23 +268,3 @@ int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, return rv; } -void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, - unsigned int bit, int new_value) -{ - unsigned int c, o, b = bit; - int old_value; - - c = b / (8 * PAGE_SIZE); - b %= 8 * PAGE_SIZE; - o = b / 8; - b %= 8; - - old_value = (bitmap[c][o] & (1 << b)); - gfs2_assert_withdraw(sdp, !old_value != !new_value); - - if (new_value) - bitmap[c][o] |= 1 << b; - else - bitmap[c][o] &= ~(1 << b); -} - diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 80535739ac7b..b7ffb09b99ea 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -164,8 +164,6 @@ static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, #define gfs2_tune_get(sdp, field) \ gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field) -void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, - unsigned int bit, int new_value); int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...); #endif /* __UTIL_DOT_H__ */ -- cgit v1.2.3-70-g09d2 From bef292a72daf215c00aa20f68603de181afbb4d3 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 3 Oct 2013 18:43:20 +0100 Subject: GFS2: Remove obsolete quota tunable There is no need for a paramater which relates to the internals of quota to be exposed to users. The only possible use would be to turn it up so large that the memory allocation fails. So lets remove it and set it to a sensible value which ensures that we don't ask for multipage allocations. Currently the size of struct gfs2_holder means that the caluclated value is identical to the previous default value, so there should be no functional change. Signed-off-by: Steven Whitehouse Cc: Abhijith Das --- fs/gfs2/incore.h | 1 - fs/gfs2/ops_fstype.c | 1 - fs/gfs2/quota.c | 2 +- fs/gfs2/sys.c | 2 -- 4 files changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 082c8fa7fab9..37b3cd795d6c 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -537,7 +537,6 @@ struct gfs2_tune { unsigned int gt_logd_secs; - unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */ unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */ unsigned int gt_quota_scale_num; /* Numerator */ unsigned int gt_quota_scale_den; /* Denominator */ diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 19ff5e8c285c..8e40fda985d6 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -51,7 +51,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt) { spin_lock_init(>->gt_spin); - gt->gt_quota_simul_sync = 64; gt->gt_quota_warn_period = 10; gt->gt_quota_scale_num = 1; gt->gt_quota_scale_den = 1; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index fd1ec5243d2e..658abe26a098 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1140,7 +1140,7 @@ int gfs2_quota_sync(struct super_block *sb, int type) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_quota_data **qda; - unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync); + unsigned int max_qd = PAGE_SIZE/sizeof(struct gfs2_holder); unsigned int num_qd; unsigned int x; int error = 0; diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index aa5c48044966..d09f6edda0ff 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -587,7 +587,6 @@ TUNE_ATTR(max_readahead, 0); TUNE_ATTR(complain_secs, 0); TUNE_ATTR(statfs_slow, 0); TUNE_ATTR(new_files_jdata, 0); -TUNE_ATTR(quota_simul_sync, 1); TUNE_ATTR(statfs_quantum, 1); TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); @@ -597,7 +596,6 @@ static struct attribute *tune_attrs[] = { &tune_attr_max_readahead.attr, &tune_attr_complain_secs.attr, &tune_attr_statfs_slow.attr, - &tune_attr_quota_simul_sync.attr, &tune_attr_statfs_quantum.attr, &tune_attr_quota_scale.attr, &tune_attr_new_files_jdata.attr, -- cgit v1.2.3-70-g09d2 From 1bf59bf6ded8411058b4c9acc45d9dd8d6965464 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 4 Oct 2013 11:14:46 +0100 Subject: GFS2: Make two similar quota code fragments into a function There should be no functional change bar the removal of a test of the MS_READONLY flag which would never be reachable. This merges the common code from qd_fish and qd_trylock into a single function and calls it from both those places. Signed-off-by: Steven Whitehouse Cc: Abhijith Das --- fs/gfs2/quota.c | 60 +++++++++++++++++++++++++-------------------------------- 1 file changed, 26 insertions(+), 34 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 658abe26a098..75414e773ab4 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -383,6 +383,25 @@ static void bh_put(struct gfs2_quota_data *qd) mutex_unlock(&sdp->sd_quota_mutex); } +static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, + u64 *sync_gen) +{ + if (test_bit(QDF_LOCKED, &qd->qd_flags) || + !test_bit(QDF_CHANGE, &qd->qd_flags) || + (sync_gen && (qd->qd_sync_gen >= *sync_gen))) + return 0; + + list_move_tail(&qd->qd_list, &sdp->sd_quota_list); + + set_bit(QDF_LOCKED, &qd->qd_flags); + gfs2_assert_warn(sdp, atomic_read(&qd->qd_count)); + atomic_inc(&qd->qd_count); + qd->qd_change_sync = qd->qd_change; + gfs2_assert_warn(sdp, qd->qd_slot_count); + qd->qd_slot_count++; + return 1; +} + static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) { struct gfs2_quota_data *qd = NULL; @@ -397,22 +416,9 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) spin_lock(&qd_lru_lock); list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { - if (test_bit(QDF_LOCKED, &qd->qd_flags) || - !test_bit(QDF_CHANGE, &qd->qd_flags) || - qd->qd_sync_gen >= sdp->sd_quota_sync_gen) - continue; - - list_move_tail(&qd->qd_list, &sdp->sd_quota_list); - - set_bit(QDF_LOCKED, &qd->qd_flags); - gfs2_assert_warn(sdp, atomic_read(&qd->qd_count)); - atomic_inc(&qd->qd_count); - qd->qd_change_sync = qd->qd_change; - gfs2_assert_warn(sdp, qd->qd_slot_count); - qd->qd_slot_count++; - found = 1; - - break; + found = qd_check_sync(sdp, qd, &sdp->sd_quota_sync_gen); + if (found) + break; } if (!found) @@ -439,28 +445,14 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) static int qd_trylock(struct gfs2_quota_data *qd) { struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; - - if (sdp->sd_vfs->s_flags & MS_RDONLY) - return 0; + int found; spin_lock(&qd_lru_lock); + found = qd_check_sync(sdp, qd, NULL); + spin_unlock(&qd_lru_lock); - if (test_bit(QDF_LOCKED, &qd->qd_flags) || - !test_bit(QDF_CHANGE, &qd->qd_flags)) { - spin_unlock(&qd_lru_lock); + if (!found) return 0; - } - - list_move_tail(&qd->qd_list, &sdp->sd_quota_list); - - set_bit(QDF_LOCKED, &qd->qd_flags); - gfs2_assert_warn(sdp, atomic_read(&qd->qd_count)); - atomic_inc(&qd->qd_count); - qd->qd_change_sync = qd->qd_change; - gfs2_assert_warn(sdp, qd->qd_slot_count); - qd->qd_slot_count++; - - spin_unlock(&qd_lru_lock); gfs2_assert_warn(sdp, qd->qd_change_sync); if (bh_get(qd)) { -- cgit v1.2.3-70-g09d2 From aabd7c72f52145fcf13f9251770b0b0246b5e406 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 4 Oct 2013 11:31:05 +0100 Subject: GFS2: Inline qd_trylock into gfs2_quota_unlock The function qd_trylock was not a trylock despite its name and can be inlined into gfs2_quota_unlock in order to make the code a bit clearer. There should be no functional change as a result of this patch. Signed-off-by: Steven Whitehouse Cc: Abhijith Das --- fs/gfs2/quota.c | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 75414e773ab4..cfb4cdeddacb 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -442,29 +442,6 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) return 0; } -static int qd_trylock(struct gfs2_quota_data *qd) -{ - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; - int found; - - spin_lock(&qd_lru_lock); - found = qd_check_sync(sdp, qd, NULL); - spin_unlock(&qd_lru_lock); - - if (!found) - return 0; - - gfs2_assert_warn(sdp, qd->qd_change_sync); - if (bh_get(qd)) { - clear_bit(QDF_LOCKED, &qd->qd_flags); - slot_put(qd); - qd_put(qd); - return 0; - } - - return 1; -} - static void qd_unlock(struct gfs2_quota_data *qd) { gfs2_assert_warn(qd->qd_gl->gl_sbd, @@ -1015,9 +992,11 @@ static int need_sync(struct gfs2_quota_data *qd) void gfs2_quota_unlock(struct gfs2_inode *ip) { + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_quota_data *qda[4]; unsigned int count = 0; unsigned int x; + int found; if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) goto out; @@ -1030,9 +1009,25 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) sync = need_sync(qd); gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]); + if (!sync) + continue; + + spin_lock(&qd_lru_lock); + found = qd_check_sync(sdp, qd, NULL); + spin_unlock(&qd_lru_lock); + + if (!found) + continue; + + gfs2_assert_warn(sdp, qd->qd_change_sync); + if (bh_get(qd)) { + clear_bit(QDF_LOCKED, &qd->qd_flags); + slot_put(qd); + qd_put(qd); + continue; + } - if (sync && qd_trylock(qd)) - qda[count++] = qd; + qda[count++] = qd; } if (count) { -- cgit v1.2.3-70-g09d2 From e46c772dbafc2059b0c7ee87c6a7232baadaf6c7 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 4 Oct 2013 12:29:34 +0100 Subject: GFS2: Protect quota sync generation Now that gfs2_quota_sync can be potentially called from multiple threads, we should protect this bit of code, and the sync generation number in particular in order to ensure that there are no races when syncing quotas. Signed-off-by: Steven Whitehouse Cc: Abhijith Das --- fs/gfs2/incore.h | 1 + fs/gfs2/ops_fstype.c | 1 + fs/gfs2/quota.c | 6 ++++-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 37b3cd795d6c..2ab4f8d8f4c4 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -714,6 +714,7 @@ struct gfs2_sbd { struct list_head sd_quota_list; atomic_t sd_quota_count; struct mutex sd_quota_mutex; + struct mutex sd_quota_sync_mutex; wait_queue_head_t sd_quota_wait; struct list_head sd_trunc_list; spinlock_t sd_trunc_lock; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 8e40fda985d6..82303b474958 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -93,6 +93,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) INIT_LIST_HEAD(&sdp->sd_quota_list); mutex_init(&sdp->sd_quota_mutex); + mutex_init(&sdp->sd_quota_sync_mutex); init_waitqueue_head(&sdp->sd_quota_wait); INIT_LIST_HEAD(&sdp->sd_trunc_list); spin_lock_init(&sdp->sd_trunc_lock); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index cfb4cdeddacb..4a9726aa191f 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1132,12 +1132,13 @@ int gfs2_quota_sync(struct super_block *sb, int type) unsigned int x; int error = 0; - sdp->sd_quota_sync_gen++; - qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL); if (!qda) return -ENOMEM; + mutex_lock(&sdp->sd_quota_sync_mutex); + sdp->sd_quota_sync_gen++; + do { num_qd = 0; @@ -1162,6 +1163,7 @@ int gfs2_quota_sync(struct super_block *sb, int type) } } while (!error && num_qd == max_qd); + mutex_unlock(&sdp->sd_quota_sync_mutex); kfree(qda); return error; -- cgit v1.2.3-70-g09d2 From e66cf161098a634dc96e32d0089c5767cf25668a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 15 Oct 2013 15:18:08 +0100 Subject: GFS2: Use lockref for glocks Currently glocks have an atomic reference count and also a spinlock which covers various internal fields, such as the state. This intent of this patch is to replace the spinlock and the atomic reference count with a lockref structure. This contains a spinlock which we can continue to use as before, and a reference counter which is used in conjuction with the spinlock to replace the previous atomic counter. As a result of this there are some new rules for reference counting on glocks. We need to distinguish between reference count changes under gl_spin (which are now just increment or decrement of the new counter, provided the count cannot hit zero) and those which are outside of gl_spin, but which now take gl_spin internally. The conversion is relatively straight forward. There is probably some further clean up which can be done, but the priority at this stage is to make the change in as simple a manner as possible. A consequence of this change is that the reference count is being decoupled from the lru list processing. This should allow future adoption of the lru_list code with glocks in due course. The reason for using the "dead" state and not just relying on 0 being the "invalid state" is so that in due course 0 ref counts can be allowable. The intent is to eventually be able to remove the ref count changes which are currently hidden away in state_change(). Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 83 ++++++++++++++++++++++++------------------------- fs/gfs2/glock.h | 2 -- fs/gfs2/glops.c | 4 +-- fs/gfs2/incore.h | 5 +-- include/linux/lockref.h | 6 ++++ lib/lockref.c | 1 + 6 files changed, 52 insertions(+), 49 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index c2f41b4d00b9..e66a8009aff1 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -129,10 +130,10 @@ void gfs2_glock_free(struct gfs2_glock *gl) * */ -void gfs2_glock_hold(struct gfs2_glock *gl) +static void gfs2_glock_hold(struct gfs2_glock *gl) { - GLOCK_BUG_ON(gl, atomic_read(&gl->gl_ref) == 0); - atomic_inc(&gl->gl_ref); + GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); + lockref_get(&gl->gl_lockref); } /** @@ -186,20 +187,6 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) spin_unlock(&lru_lock); } -/** - * gfs2_glock_put_nolock() - Decrement reference count on glock - * @gl: The glock to put - * - * This function should only be used if the caller has its own reference - * to the glock, in addition to the one it is dropping. - */ - -void gfs2_glock_put_nolock(struct gfs2_glock *gl) -{ - if (atomic_dec_and_test(&gl->gl_ref)) - GLOCK_BUG_ON(gl, 1); -} - /** * gfs2_glock_put() - Decrement reference count on glock * @gl: The glock to put @@ -211,17 +198,22 @@ void gfs2_glock_put(struct gfs2_glock *gl) struct gfs2_sbd *sdp = gl->gl_sbd; struct address_space *mapping = gfs2_glock2aspace(gl); - if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) { - __gfs2_glock_remove_from_lru(gl); - spin_unlock(&lru_lock); - spin_lock_bucket(gl->gl_hash); - hlist_bl_del_rcu(&gl->gl_list); - spin_unlock_bucket(gl->gl_hash); - GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); - GLOCK_BUG_ON(gl, mapping && mapping->nrpages); - trace_gfs2_glock_put(gl); - sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); - } + if (lockref_put_or_lock(&gl->gl_lockref)) + return; + + lockref_mark_dead(&gl->gl_lockref); + + spin_lock(&lru_lock); + __gfs2_glock_remove_from_lru(gl); + spin_unlock(&lru_lock); + spin_unlock(&gl->gl_lockref.lock); + spin_lock_bucket(gl->gl_hash); + hlist_bl_del_rcu(&gl->gl_list); + spin_unlock_bucket(gl->gl_hash); + GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); + GLOCK_BUG_ON(gl, mapping && mapping->nrpages); + trace_gfs2_glock_put(gl); + sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); } /** @@ -244,7 +236,7 @@ static struct gfs2_glock *search_bucket(unsigned int hash, continue; if (gl->gl_sbd != sdp) continue; - if (atomic_inc_not_zero(&gl->gl_ref)) + if (lockref_get_not_dead(&gl->gl_lockref)) return gl; } @@ -396,10 +388,11 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) held2 = (new_state != LM_ST_UNLOCKED); if (held1 != held2) { + GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); if (held2) - gfs2_glock_hold(gl); + gl->gl_lockref.count++; else - gfs2_glock_put_nolock(gl); + gl->gl_lockref.count--; } if (held1 && held2 && list_empty(&gl->gl_holders)) clear_bit(GLF_QUEUED, &gl->gl_flags); @@ -626,9 +619,9 @@ out: out_sched: clear_bit(GLF_LOCK, &gl->gl_flags); smp_mb__after_clear_bit(); - gfs2_glock_hold(gl); + gl->gl_lockref.count++; if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) - gfs2_glock_put_nolock(gl); + gl->gl_lockref.count--; return; out_unlock: @@ -754,7 +747,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_sbd = sdp; gl->gl_flags = 0; gl->gl_name = name; - atomic_set(&gl->gl_ref, 1); + gl->gl_lockref.count = 1; gl->gl_state = LM_ST_UNLOCKED; gl->gl_target = LM_ST_UNLOCKED; gl->gl_demote_state = LM_ST_EXCLUSIVE; @@ -1356,10 +1349,10 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) } } - spin_unlock(&gl->gl_spin); + gl->gl_lockref.count++; set_bit(GLF_REPLY_PENDING, &gl->gl_flags); - smp_wmb(); - gfs2_glock_hold(gl); + spin_unlock(&gl->gl_spin); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); } @@ -1404,15 +1397,19 @@ __acquires(&lru_lock) while(!list_empty(list)) { gl = list_entry(list->next, struct gfs2_glock, gl_lru); list_del_init(&gl->gl_lru); + if (!spin_trylock(&gl->gl_spin)) { + list_add(&gl->gl_lru, &lru_list); + atomic_inc(&lru_count); + continue; + } clear_bit(GLF_LRU, &gl->gl_flags); - gfs2_glock_hold(gl); spin_unlock(&lru_lock); - spin_lock(&gl->gl_spin); + gl->gl_lockref.count++; if (demote_ok(gl)) handle_callback(gl, LM_ST_UNLOCKED, 0, false); WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags)); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) - gfs2_glock_put_nolock(gl); + gl->gl_lockref.count--; spin_unlock(&gl->gl_spin); spin_lock(&lru_lock); } @@ -1493,7 +1490,7 @@ static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp, rcu_read_lock(); hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) { - if ((gl->gl_sbd == sdp) && atomic_inc_not_zero(&gl->gl_ref)) + if ((gl->gl_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref)) examiner(gl); } rcu_read_unlock(); @@ -1746,7 +1743,7 @@ int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) state2str(gl->gl_demote_state), dtime, atomic_read(&gl->gl_ail_count), atomic_read(&gl->gl_revokes), - atomic_read(&gl->gl_ref), gl->gl_hold_time); + (int)gl->gl_lockref.count, gl->gl_hold_time); list_for_each_entry(gh, &gl->gl_holders, gh_list) { error = dump_holder(seq, gh); @@ -1902,7 +1899,7 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) gi->nhash = 0; } /* Skip entries for other sb and dead entries */ - } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0); + } while (gi->sdp != gi->gl->gl_sbd || __lockref_is_dead(&gl->gl_lockref)); return 0; } diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 69f66e3d22bf..6647d77366ba 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -181,8 +181,6 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl) extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, int create, struct gfs2_glock **glp); -extern void gfs2_glock_hold(struct gfs2_glock *gl); -extern void gfs2_glock_put_nolock(struct gfs2_glock *gl); extern void gfs2_glock_put(struct gfs2_glock *gl); extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, struct gfs2_holder *gh); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index e2e0a90396e7..db908f697139 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -525,9 +525,9 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote) if (gl->gl_demote_state == LM_ST_UNLOCKED && gl->gl_state == LM_ST_SHARED && ip) { - gfs2_glock_hold(gl); + gl->gl_lockref.count++; if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) - gfs2_glock_put_nolock(gl); + gl->gl_lockref.count--; } } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 2ab4f8d8f4c4..bb88e417231f 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -21,6 +21,7 @@ #include #include #include +#include #define DIO_WAIT 0x00000010 #define DIO_METADATA 0x00000020 @@ -321,9 +322,9 @@ struct gfs2_glock { struct gfs2_sbd *gl_sbd; unsigned long gl_flags; /* GLF_... */ struct lm_lockname gl_name; - atomic_t gl_ref; - spinlock_t gl_spin; + struct lockref gl_lockref; +#define gl_spin gl_lockref.lock /* State fields protected by gl_spin */ unsigned int gl_state:2, /* Current state */ diff --git a/include/linux/lockref.h b/include/linux/lockref.h index f279ed9a9163..13dfd36a3294 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -36,4 +36,10 @@ extern int lockref_put_or_lock(struct lockref *); extern void lockref_mark_dead(struct lockref *); extern int lockref_get_not_dead(struct lockref *); +/* Must be called under spinlock for reliable results */ +static inline int __lockref_is_dead(const struct lockref *l) +{ + return ((int)l->count < 0); +} + #endif /* __LINUX_LOCKREF_H */ diff --git a/lib/lockref.c b/lib/lockref.c index e2cd2c0a8821..8ff162fe3413 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -136,6 +136,7 @@ void lockref_mark_dead(struct lockref *lockref) assert_spin_locked(&lockref->lock); lockref->count = -128; } +EXPORT_SYMBOL(lockref_mark_dead); /** * lockref_get_not_dead - Increments count unless the ref is dead -- cgit v1.2.3-70-g09d2 From 9b9f039d570bddc1653cda2e38f4331dcacfcde5 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 1 Nov 2013 14:52:06 -0400 Subject: GFS2: Use reflink for quota data cache This patch adds reflink support to the quota data cache. It looks a bit strange because we still don't have a sensible split in the lookup by id and the lru list. That is coming in later patches though. The intent here is just to swap the current ref count for reflinks in all cases with as little as possible other change. Signed-off-by: Steven Whitehouse Signed-off-by: Abhijith Das Tested-by: Abhijith Das --- fs/gfs2/incore.h | 2 +- fs/gfs2/quota.c | 42 ++++++++++++++++++++++++++++-------------- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index bb88e417231f..9d778044cc6e 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -422,7 +422,7 @@ struct gfs2_quota_data { struct list_head qd_list; struct list_head qd_reclaim; - atomic_t qd_count; + struct lockref qd_lockref; struct kqid qd_id; unsigned long qd_flags; /* QDF_... */ diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 4a9726aa191f..ed089118c171 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -148,7 +149,8 @@ static int qd_alloc(struct gfs2_sbd *sdp, struct kqid qid, if (!qd) return -ENOMEM; - atomic_set(&qd->qd_count, 1); + qd->qd_lockref.count = 1; + spin_lock_init(&qd->qd_lockref.lock); qd->qd_id = qid; qd->qd_slot = -1; INIT_LIST_HEAD(&qd->qd_reclaim); @@ -180,13 +182,12 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, spin_lock(&qd_lru_lock); list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { if (qid_eq(qd->qd_id, qid)) { - if (!atomic_read(&qd->qd_count) && - !list_empty(&qd->qd_reclaim)) { + lockref_get(&qd->qd_lockref); + if (!list_empty(&qd->qd_reclaim)) { /* Remove it from reclaim list */ list_del_init(&qd->qd_reclaim); atomic_dec(&qd_lru_count); } - atomic_inc(&qd->qd_count); found = 1; break; } @@ -222,18 +223,24 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, static void qd_hold(struct gfs2_quota_data *qd) { struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; - gfs2_assert(sdp, atomic_read(&qd->qd_count)); - atomic_inc(&qd->qd_count); + gfs2_assert(sdp, !__lockref_is_dead(&qd->qd_lockref)); + lockref_get(&qd->qd_lockref); } static void qd_put(struct gfs2_quota_data *qd) { - if (atomic_dec_and_lock(&qd->qd_count, &qd_lru_lock)) { + spin_lock(&qd_lru_lock); + + if (!lockref_put_or_lock(&qd->qd_lockref)) { + /* Add to the reclaim list */ list_add_tail(&qd->qd_reclaim, &qd_lru_list); atomic_inc(&qd_lru_count); - spin_unlock(&qd_lru_lock); + + spin_unlock(&qd->qd_lockref.lock); } + + spin_unlock(&qd_lru_lock); } static int slot_get(struct gfs2_quota_data *qd) @@ -394,8 +401,8 @@ static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, list_move_tail(&qd->qd_list, &sdp->sd_quota_list); set_bit(QDF_LOCKED, &qd->qd_flags); - gfs2_assert_warn(sdp, atomic_read(&qd->qd_count)); - atomic_inc(&qd->qd_count); + gfs2_assert_warn(sdp, !__lockref_is_dead(&qd->qd_lockref)); + lockref_get(&qd->qd_lockref); qd->qd_change_sync = qd->qd_change; gfs2_assert_warn(sdp, qd->qd_slot_count); qd->qd_slot_count++; @@ -1303,15 +1310,22 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) while (!list_empty(head)) { qd = list_entry(head->prev, struct gfs2_quota_data, qd_list); - if (atomic_read(&qd->qd_count) > 1 || - (atomic_read(&qd->qd_count) && - !test_bit(QDF_CHANGE, &qd->qd_flags))) { + /* + * To be removed in due course... we should be able to + * ensure that all refs to the qd have done by this point + * so that this rather odd test is not required + */ + spin_lock(&qd->qd_lockref.lock); + if (qd->qd_lockref.count > 1 || + (qd->qd_lockref.count && !test_bit(QDF_CHANGE, &qd->qd_flags))) { + spin_unlock(&qd->qd_lockref.lock); list_move(&qd->qd_list, head); spin_unlock(&qd_lru_lock); schedule(); spin_lock(&qd_lru_lock); continue; } + spin_unlock(&qd->qd_lockref.lock); list_del(&qd->qd_list); /* Also remove if this qd exists in the reclaim list */ @@ -1322,7 +1336,7 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) atomic_dec(&sdp->sd_quota_count); spin_unlock(&qd_lru_lock); - if (!atomic_read(&qd->qd_count)) { + if (!qd->qd_lockref.count) { gfs2_assert_warn(sdp, !qd->qd_change); gfs2_assert_warn(sdp, !qd->qd_slot_count); } else -- cgit v1.2.3-70-g09d2 From 7d80823e1d83e35977d77ae201bf63af3317ad0a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 1 Nov 2013 14:52:08 -0400 Subject: GFS2: Rename quota qd_lru_lock qd_lock This is a straight forward rename which is in preparation for introducing the generic list_lru infrastructure in the following patch. Signed-off-by: Steven Whitehouse Signed-off-by: Abhijith Das Tested-by: Abhijith Das --- fs/gfs2/quota.c | 70 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index ed089118c171..466516ac5e57 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -74,7 +74,7 @@ struct gfs2_quota_change_host { static LIST_HEAD(qd_lru_list); static atomic_t qd_lru_count = ATOMIC_INIT(0); -static DEFINE_SPINLOCK(qd_lru_lock); +static DEFINE_SPINLOCK(qd_lock); unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) @@ -87,7 +87,7 @@ unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, if (!(sc->gfp_mask & __GFP_FS)) return SHRINK_STOP; - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); while (nr_to_scan && !list_empty(&qd_lru_list)) { qd = list_entry(qd_lru_list.next, struct gfs2_quota_data, qd_reclaim); @@ -106,13 +106,13 @@ unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, /* Delete it from the common reclaim list */ list_del_init(&qd->qd_reclaim); atomic_dec(&qd_lru_count); - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); kmem_cache_free(gfs2_quotad_cachep, qd); - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); nr_to_scan--; freed++; } - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); return freed; } @@ -179,7 +179,7 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, for (;;) { found = 0; - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { if (qid_eq(qd->qd_id, qid)) { lockref_get(&qd->qd_lockref); @@ -203,7 +203,7 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, new_qd = NULL; } - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); if (qd) { if (new_qd) { @@ -229,7 +229,7 @@ static void qd_hold(struct gfs2_quota_data *qd) static void qd_put(struct gfs2_quota_data *qd) { - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); if (!lockref_put_or_lock(&qd->qd_lockref)) { @@ -240,7 +240,7 @@ static void qd_put(struct gfs2_quota_data *qd) spin_unlock(&qd->qd_lockref.lock); } - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); } static int slot_get(struct gfs2_quota_data *qd) @@ -249,10 +249,10 @@ static int slot_get(struct gfs2_quota_data *qd) unsigned int c, o = 0, b; unsigned char byte = 0; - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); if (qd->qd_slot_count++) { - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); return 0; } @@ -276,13 +276,13 @@ found: sdp->sd_quota_bitmap[c][o] |= 1 << b; - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); return 0; fail: qd->qd_slot_count--; - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); return -ENOSPC; } @@ -290,10 +290,10 @@ static void slot_hold(struct gfs2_quota_data *qd) { struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); gfs2_assert(sdp, qd->qd_slot_count); qd->qd_slot_count++; - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); } static void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, @@ -320,13 +320,13 @@ static void slot_put(struct gfs2_quota_data *qd) { struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); gfs2_assert(sdp, qd->qd_slot_count); if (!--qd->qd_slot_count) { gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0); qd->qd_slot = -1; } - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); } static int bh_get(struct gfs2_quota_data *qd) @@ -420,7 +420,7 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) if (sdp->sd_vfs->s_flags & MS_RDONLY) return 0; - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { found = qd_check_sync(sdp, qd, &sdp->sd_quota_sync_gen); @@ -431,7 +431,7 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) if (!found) qd = NULL; - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); if (qd) { gfs2_assert_warn(sdp, qd->qd_change_sync); @@ -598,9 +598,9 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) x = be64_to_cpu(qc->qc_change) + change; qc->qc_change = cpu_to_be64(x); - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); qd->qd_change = x; - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); if (!x) { gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags)); @@ -972,9 +972,9 @@ static int need_sync(struct gfs2_quota_data *qd) if (!qd->qd_qb.qb_limit) return 0; - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); value = qd->qd_change; - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); spin_lock(>->gt_spin); num = gt->gt_quota_scale_num; @@ -1019,9 +1019,9 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) if (!sync) continue; - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); found = qd_check_sync(sdp, qd, NULL); - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); if (!found) continue; @@ -1083,9 +1083,9 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) continue; value = (s64)be64_to_cpu(qd->qd_qb.qb_value); - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); value += qd->qd_change; - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { print_message(qd, "exceeded"); @@ -1276,11 +1276,11 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) qd->qd_slot = slot; qd->qd_slot_count = 1; - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1); list_add(&qd->qd_list, &sdp->sd_quota_list); atomic_inc(&sdp->sd_quota_count); - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); found++; } @@ -1306,7 +1306,7 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) struct gfs2_quota_data *qd; unsigned int x; - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); while (!list_empty(head)) { qd = list_entry(head->prev, struct gfs2_quota_data, qd_list); @@ -1320,9 +1320,9 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) (qd->qd_lockref.count && !test_bit(QDF_CHANGE, &qd->qd_flags))) { spin_unlock(&qd->qd_lockref.lock); list_move(&qd->qd_list, head); - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); schedule(); - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); continue; } spin_unlock(&qd->qd_lockref.lock); @@ -1334,7 +1334,7 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) atomic_dec(&qd_lru_count); } atomic_dec(&sdp->sd_quota_count); - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); if (!qd->qd_lockref.count) { gfs2_assert_warn(sdp, !qd->qd_change); @@ -1346,9 +1346,9 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) gfs2_glock_put(qd->qd_gl); kmem_cache_free(gfs2_quotad_cachep, qd); - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); } - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count)); -- cgit v1.2.3-70-g09d2 From 2147dbfd059eb7fefcfd5934f74f25f0693d4a1f Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 4 Nov 2013 10:15:08 +0000 Subject: GFS2: Use generic list_lru for quota By using the generic list_lru code, we can now separate the per sb quota list locking from the lru locking. The lru lock is made into the inner-most lock. As a result of this new lock order, we may occasionally see items on the per-sb quota list which are "dead" so that the two places where we traverse that list are updated to take account of that. As a result of this patch, the gfs2 quota shrinker is now NUMA zone aware, and we are also laying the foundations for further improvments in due course. Signed-off-by: Steven Whitehouse Signed-off-by: Abhijith Das Tested-by: Abhijith Das Cc: Dave Chinner --- fs/gfs2/incore.h | 5 +-- fs/gfs2/main.c | 19 ++++----- fs/gfs2/quota.c | 118 ++++++++++++++++++++++++++++++++----------------------- fs/gfs2/quota.h | 9 ++--- 4 files changed, 85 insertions(+), 66 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 9d778044cc6e..ba1ea67f4eeb 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -420,11 +420,10 @@ enum { struct gfs2_quota_data { struct list_head qd_list; - struct list_head qd_reclaim; - + struct kqid qd_id; struct lockref qd_lockref; + struct list_head qd_lru; - struct kqid qd_id; unsigned long qd_flags; /* QDF_... */ s64 qd_change; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 351586e24e30..0650db2541ef 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -31,12 +31,6 @@ struct workqueue_struct *gfs2_control_wq; -static struct shrinker qd_shrinker = { - .count_objects = gfs2_qd_shrink_count, - .scan_objects = gfs2_qd_shrink_scan, - .seeks = DEFAULT_SEEKS, -}; - static void gfs2_init_inode_once(void *foo) { struct gfs2_inode *ip = foo; @@ -87,6 +81,10 @@ static int __init init_gfs2_fs(void) if (error) return error; + error = list_lru_init(&gfs2_qd_lru); + if (error) + goto fail_lru; + error = gfs2_glock_init(); if (error) goto fail; @@ -139,7 +137,7 @@ static int __init init_gfs2_fs(void) if (!gfs2_rsrv_cachep) goto fail; - register_shrinker(&qd_shrinker); + register_shrinker(&gfs2_qd_shrinker); error = register_filesystem(&gfs2_fs_type); if (error) @@ -179,7 +177,9 @@ fail_wq: fail_unregister: unregister_filesystem(&gfs2_fs_type); fail: - unregister_shrinker(&qd_shrinker); + list_lru_destroy(&gfs2_qd_lru); +fail_lru: + unregister_shrinker(&gfs2_qd_shrinker); gfs2_glock_exit(); if (gfs2_rsrv_cachep) @@ -214,13 +214,14 @@ fail: static void __exit exit_gfs2_fs(void) { - unregister_shrinker(&qd_shrinker); + unregister_shrinker(&gfs2_qd_shrinker); gfs2_glock_exit(); gfs2_unregister_debugfs(); unregister_filesystem(&gfs2_fs_type); unregister_filesystem(&gfs2meta_fs_type); destroy_workqueue(gfs_recovery_wq); destroy_workqueue(gfs2_control_wq); + list_lru_destroy(&gfs2_qd_lru); rcu_barrier(); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 466516ac5e57..453b50eaddec 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -51,6 +51,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -72,29 +73,25 @@ struct gfs2_quota_change_host { struct kqid qc_id; }; -static LIST_HEAD(qd_lru_list); -static atomic_t qd_lru_count = ATOMIC_INIT(0); +/* Lock order: qd_lock -> qd->lockref.lock -> lru lock */ static DEFINE_SPINLOCK(qd_lock); +struct list_lru gfs2_qd_lru; -unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, - struct shrink_control *sc) +static void gfs2_qd_dispose(struct list_head *list) { struct gfs2_quota_data *qd; struct gfs2_sbd *sdp; - int nr_to_scan = sc->nr_to_scan; - long freed = 0; - if (!(sc->gfp_mask & __GFP_FS)) - return SHRINK_STOP; - - spin_lock(&qd_lock); - while (nr_to_scan && !list_empty(&qd_lru_list)) { - qd = list_entry(qd_lru_list.next, - struct gfs2_quota_data, qd_reclaim); + while (!list_empty(list)) { + qd = list_entry(list->next, struct gfs2_quota_data, qd_lru); sdp = qd->qd_gl->gl_sbd; + list_del(&qd->qd_lru); + /* Free from the filesystem-specific list */ + spin_lock(&qd_lock); list_del(&qd->qd_list); + spin_unlock(&qd_lock); gfs2_assert_warn(sdp, !qd->qd_change); gfs2_assert_warn(sdp, !qd->qd_slot_count); @@ -104,24 +101,59 @@ unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, atomic_dec(&sdp->sd_quota_count); /* Delete it from the common reclaim list */ - list_del_init(&qd->qd_reclaim); - atomic_dec(&qd_lru_count); - spin_unlock(&qd_lock); kmem_cache_free(gfs2_quotad_cachep, qd); - spin_lock(&qd_lock); - nr_to_scan--; - freed++; } - spin_unlock(&qd_lock); +} + + +static enum lru_status gfs2_qd_isolate(struct list_head *item, spinlock_t *lock, void *arg) +{ + struct list_head *dispose = arg; + struct gfs2_quota_data *qd = list_entry(item, struct gfs2_quota_data, qd_lru); + + if (!spin_trylock(&qd->qd_lockref.lock)) + return LRU_SKIP; + + if (qd->qd_lockref.count == 0) { + lockref_mark_dead(&qd->qd_lockref); + list_move(&qd->qd_lru, dispose); + } + + spin_unlock(&qd->qd_lockref.lock); + return LRU_REMOVED; +} + +static unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) +{ + LIST_HEAD(dispose); + unsigned long freed; + + if (!(sc->gfp_mask & __GFP_FS)) + return SHRINK_STOP; + + freed = list_lru_walk_node(&gfs2_qd_lru, sc->nid, gfs2_qd_isolate, + &dispose, &sc->nr_to_scan); + + gfs2_qd_dispose(&dispose); + return freed; } -unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) { - return vfs_pressure_ratio(atomic_read(&qd_lru_count)); + return vfs_pressure_ratio(list_lru_count_node(&gfs2_qd_lru, sc->nid)); } +struct shrinker gfs2_qd_shrinker = { + .count_objects = gfs2_qd_shrink_count, + .scan_objects = gfs2_qd_shrink_scan, + .seeks = DEFAULT_SEEKS, + .flags = SHRINKER_NUMA_AWARE, +}; + + static u64 qd2index(struct gfs2_quota_data *qd) { struct kqid qid = qd->qd_id; @@ -153,7 +185,7 @@ static int qd_alloc(struct gfs2_sbd *sdp, struct kqid qid, spin_lock_init(&qd->qd_lockref.lock); qd->qd_id = qid; qd->qd_slot = -1; - INIT_LIST_HEAD(&qd->qd_reclaim); + INIT_LIST_HEAD(&qd->qd_lru); error = gfs2_glock_get(sdp, qd2index(qd), &gfs2_quota_glops, CREATE, &qd->qd_gl); @@ -181,13 +213,9 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, found = 0; spin_lock(&qd_lock); list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { - if (qid_eq(qd->qd_id, qid)) { - lockref_get(&qd->qd_lockref); - if (!list_empty(&qd->qd_reclaim)) { - /* Remove it from reclaim list */ - list_del_init(&qd->qd_reclaim); - atomic_dec(&qd_lru_count); - } + if (qid_eq(qd->qd_id, qid) && + lockref_get_not_dead(&qd->qd_lockref)) { + list_lru_del(&gfs2_qd_lru, &qd->qd_lru); found = 1; break; } @@ -229,18 +257,13 @@ static void qd_hold(struct gfs2_quota_data *qd) static void qd_put(struct gfs2_quota_data *qd) { - spin_lock(&qd_lock); - - if (!lockref_put_or_lock(&qd->qd_lockref)) { + if (lockref_put_or_lock(&qd->qd_lockref)) + return; - /* Add to the reclaim list */ - list_add_tail(&qd->qd_reclaim, &qd_lru_list); - atomic_inc(&qd_lru_count); + qd->qd_lockref.count = 0; + list_lru_add(&gfs2_qd_lru, &qd->qd_lru); + spin_unlock(&qd->qd_lockref.lock); - spin_unlock(&qd->qd_lockref.lock); - } - - spin_unlock(&qd_lock); } static int slot_get(struct gfs2_quota_data *qd) @@ -398,11 +421,11 @@ static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, (sync_gen && (qd->qd_sync_gen >= *sync_gen))) return 0; - list_move_tail(&qd->qd_list, &sdp->sd_quota_list); + if (!lockref_get_not_dead(&qd->qd_lockref)) + return 0; + list_move_tail(&qd->qd_list, &sdp->sd_quota_list); set_bit(QDF_LOCKED, &qd->qd_flags); - gfs2_assert_warn(sdp, !__lockref_is_dead(&qd->qd_lockref)); - lockref_get(&qd->qd_lockref); qd->qd_change_sync = qd->qd_change; gfs2_assert_warn(sdp, qd->qd_slot_count); qd->qd_slot_count++; @@ -1329,10 +1352,7 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) list_del(&qd->qd_list); /* Also remove if this qd exists in the reclaim list */ - if (!list_empty(&qd->qd_reclaim)) { - list_del_init(&qd->qd_reclaim); - atomic_dec(&qd_lru_count); - } + list_lru_del(&gfs2_qd_lru, &qd->qd_lru); atomic_dec(&sdp->sd_quota_count); spin_unlock(&qd_lock); @@ -1487,7 +1507,7 @@ static int gfs2_quota_get_xstate(struct super_block *sb, } fqs->qs_uquota.qfs_nextents = 1; /* unsupported */ fqs->qs_gquota = fqs->qs_uquota; /* its the same inode in both cases */ - fqs->qs_incoredqs = atomic_read(&qd_lru_count); + fqs->qs_incoredqs = list_lru_count(&gfs2_qd_lru); return 0; } diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 0f64d9deb1b0..96e4f34a03b0 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -10,9 +10,10 @@ #ifndef __QUOTA_DOT_H__ #define __QUOTA_DOT_H__ +#include + struct gfs2_inode; struct gfs2_sbd; -struct shrink_control; #define NO_UID_QUOTA_CHANGE INVALID_UID #define NO_GID_QUOTA_CHANGE INVALID_GID @@ -53,10 +54,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) return ret; } -extern unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, - struct shrink_control *sc); -extern unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, - struct shrink_control *sc); extern const struct quotactl_ops gfs2_quotactl_ops; +extern struct shrinker gfs2_qd_shrinker; +extern struct list_lru gfs2_qd_lru; #endif /* __QUOTA_DOT_H__ */ -- cgit v1.2.3-70-g09d2