diff options
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 72 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_da_btree.c | 8 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_format.h | 62 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ialloc.c | 6 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_sb.c | 12 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 31 | ||||
-rw-r--r-- | fs/xfs/xfs_error.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 84 | ||||
-rw-r--r-- | fs/xfs/xfs_filestream.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_fsops.c | 20 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 532 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 36 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl.c | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.c | 101 | ||||
-rw-r--r-- | fs/xfs/xfs_linux.h | 9 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 918 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 95 | ||||
-rw-r--r-- | fs/xfs/xfs_mru_cache.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_pnfs.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.c | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 107 | ||||
-rw-r--r-- | fs/xfs/xfs_super.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 23 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.c | 234 |
26 files changed, 890 insertions, 1489 deletions
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index e8696f5a8041..aeffeaaac0ec 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -244,30 +244,6 @@ xfs_bmap_forkoff_reset( } } -/* - * Debug/sanity checking code - */ - -STATIC int -xfs_bmap_sanity_check( - struct xfs_mount *mp, - struct xfs_buf *bp, - int level) -{ - struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - - if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) && - block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC)) - return 0; - - if (be16_to_cpu(block->bb_level) != level || - be16_to_cpu(block->bb_numrecs) == 0 || - be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) - return 0; - - return 1; -} - #ifdef DEBUG STATIC struct xfs_buf * xfs_bmap_get_bp( @@ -410,9 +386,6 @@ xfs_bmap_check_leaf_extents( goto error_norelse; } block = XFS_BUF_TO_BLOCK(bp); - XFS_WANT_CORRUPTED_GOTO(mp, - xfs_bmap_sanity_check(mp, bp, level), - error0); if (level == 0) break; @@ -1312,8 +1285,6 @@ xfs_bmap_read_extents( if (error) return error; block = XFS_BUF_TO_BLOCK(bp); - XFS_WANT_CORRUPTED_GOTO(mp, - xfs_bmap_sanity_check(mp, bp, level), error0); if (level == 0) break; pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); @@ -1346,9 +1317,6 @@ xfs_bmap_read_extents( XFS_ERRLEVEL_LOW, ip->i_mount, block); goto error0; } - XFS_WANT_CORRUPTED_GOTO(mp, - xfs_bmap_sanity_check(mp, bp, 0), - error0); /* * Read-ahead the next leaf block, if any. */ @@ -2215,9 +2183,8 @@ xfs_bmap_add_extent_delay_real( diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - (bma->cur ? bma->cur->bc_private.b.allocated : 0)); if (diff > 0) { - error = xfs_icsb_modify_counters(bma->ip->i_mount, - XFS_SBS_FDBLOCKS, - -((int64_t)diff), 0); + error = xfs_mod_fdblocks(bma->ip->i_mount, + -((int64_t)diff), false); ASSERT(!error); if (error) goto done; @@ -2268,9 +2235,8 @@ xfs_bmap_add_extent_delay_real( temp += bma->cur->bc_private.b.allocated; ASSERT(temp <= da_old); if (temp < da_old) - xfs_icsb_modify_counters(bma->ip->i_mount, - XFS_SBS_FDBLOCKS, - (int64_t)(da_old - temp), 0); + xfs_mod_fdblocks(bma->ip->i_mount, + (int64_t)(da_old - temp), false); } /* clear out the allocated field, done with it now in any case. */ @@ -2948,8 +2914,8 @@ xfs_bmap_add_extent_hole_delay( } if (oldlen != newlen) { ASSERT(oldlen > newlen); - xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, - (int64_t)(oldlen - newlen), 0); + xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen), + false); /* * Nothing to do for disk quota accounting here. */ @@ -4166,18 +4132,15 @@ xfs_bmapi_reserve_delalloc( ASSERT(indlen > 0); if (rt) { - error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, - -((int64_t)extsz), 0); + error = xfs_mod_frextents(mp, -((int64_t)extsz)); } else { - error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - -((int64_t)alen), 0); + error = xfs_mod_fdblocks(mp, -((int64_t)alen), false); } if (error) goto out_unreserve_quota; - error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - -((int64_t)indlen), 0); + error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false); if (error) goto out_unreserve_blocks; @@ -4204,9 +4167,9 @@ xfs_bmapi_reserve_delalloc( out_unreserve_blocks: if (rt) - xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0); + xfs_mod_frextents(mp, extsz); else - xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0); + xfs_mod_fdblocks(mp, alen, false); out_unreserve_quota: if (XFS_IS_QUOTA_ON(mp)) xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ? @@ -5019,10 +4982,8 @@ xfs_bmap_del_extent( * Nothing to do for disk quota accounting here. */ ASSERT(da_old >= da_new); - if (da_old > da_new) { - xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - (int64_t)(da_old - da_new), 0); - } + if (da_old > da_new) + xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false); done: *logflagsp = flags; return error; @@ -5291,14 +5252,13 @@ xfs_bunmapi( rtexts = XFS_FSB_TO_B(mp, del.br_blockcount); do_div(rtexts, mp->m_sb.sb_rextsize); - xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, - (int64_t)rtexts, 0); + xfs_mod_frextents(mp, (int64_t)rtexts); (void)xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_RTBLKS); } else { - xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - (int64_t)del.br_blockcount, 0); + xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, + false); (void)xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_REGBLKS); diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 9cb0115c6bd1..2385f8cd08ab 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -538,12 +538,12 @@ xfs_da3_root_split( oldroot = blk1->bp->b_addr; if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { - struct xfs_da3_icnode_hdr nodehdr; + struct xfs_da3_icnode_hdr icnodehdr; - dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot); + dp->d_ops->node_hdr_from_disk(&icnodehdr, oldroot); btree = dp->d_ops->node_tree_p(oldroot); - size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot); - level = nodehdr.level; + size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot); + level = icnodehdr.level; /* * we are about to copy oldroot to bp, so set up the type diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 8eb718979383..4daaa662337b 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -264,68 +264,6 @@ typedef struct xfs_dsb { /* must be padded to 64 bit alignment */ } xfs_dsb_t; -/* - * Sequence number values for the fields. - */ -typedef enum { - XFS_SBS_MAGICNUM, XFS_SBS_BLOCKSIZE, XFS_SBS_DBLOCKS, XFS_SBS_RBLOCKS, - XFS_SBS_REXTENTS, XFS_SBS_UUID, XFS_SBS_LOGSTART, XFS_SBS_ROOTINO, - XFS_SBS_RBMINO, XFS_SBS_RSUMINO, XFS_SBS_REXTSIZE, XFS_SBS_AGBLOCKS, - XFS_SBS_AGCOUNT, XFS_SBS_RBMBLOCKS, XFS_SBS_LOGBLOCKS, - XFS_SBS_VERSIONNUM, XFS_SBS_SECTSIZE, XFS_SBS_INODESIZE, - XFS_SBS_INOPBLOCK, XFS_SBS_FNAME, XFS_SBS_BLOCKLOG, - XFS_SBS_SECTLOG, XFS_SBS_INODELOG, XFS_SBS_INOPBLOG, XFS_SBS_AGBLKLOG, - XFS_SBS_REXTSLOG, XFS_SBS_INPROGRESS, XFS_SBS_IMAX_PCT, XFS_SBS_ICOUNT, - XFS_SBS_IFREE, XFS_SBS_FDBLOCKS, XFS_SBS_FREXTENTS, XFS_SBS_UQUOTINO, - XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN, - XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG, - XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT, - XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, XFS_SBS_FEATURES_COMPAT, - XFS_SBS_FEATURES_RO_COMPAT, XFS_SBS_FEATURES_INCOMPAT, - XFS_SBS_FEATURES_LOG_INCOMPAT, XFS_SBS_CRC, XFS_SBS_PAD, - XFS_SBS_PQUOTINO, XFS_SBS_LSN, - XFS_SBS_FIELDCOUNT -} xfs_sb_field_t; - -/* - * Mask values, defined based on the xfs_sb_field_t values. - * Only define the ones we're using. - */ -#define XFS_SB_MVAL(x) (1LL << XFS_SBS_ ## x) -#define XFS_SB_UUID XFS_SB_MVAL(UUID) -#define XFS_SB_FNAME XFS_SB_MVAL(FNAME) -#define XFS_SB_ROOTINO XFS_SB_MVAL(ROOTINO) -#define XFS_SB_RBMINO XFS_SB_MVAL(RBMINO) -#define XFS_SB_RSUMINO XFS_SB_MVAL(RSUMINO) -#define XFS_SB_VERSIONNUM XFS_SB_MVAL(VERSIONNUM) -#define XFS_SB_UQUOTINO XFS_SB_MVAL(UQUOTINO) -#define XFS_SB_GQUOTINO XFS_SB_MVAL(GQUOTINO) -#define XFS_SB_QFLAGS XFS_SB_MVAL(QFLAGS) -#define XFS_SB_SHARED_VN XFS_SB_MVAL(SHARED_VN) -#define XFS_SB_UNIT XFS_SB_MVAL(UNIT) -#define XFS_SB_WIDTH XFS_SB_MVAL(WIDTH) -#define XFS_SB_ICOUNT XFS_SB_MVAL(ICOUNT) -#define XFS_SB_IFREE XFS_SB_MVAL(IFREE) -#define XFS_SB_FDBLOCKS XFS_SB_MVAL(FDBLOCKS) -#define XFS_SB_FEATURES2 (XFS_SB_MVAL(FEATURES2) | \ - XFS_SB_MVAL(BAD_FEATURES2)) -#define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT) -#define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT) -#define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT) -#define XFS_SB_FEATURES_LOG_INCOMPAT XFS_SB_MVAL(FEATURES_LOG_INCOMPAT) -#define XFS_SB_CRC XFS_SB_MVAL(CRC) -#define XFS_SB_PQUOTINO XFS_SB_MVAL(PQUOTINO) -#define XFS_SB_NUM_BITS ((int)XFS_SBS_FIELDCOUNT) -#define XFS_SB_ALL_BITS ((1LL << XFS_SB_NUM_BITS) - 1) -#define XFS_SB_MOD_BITS \ - (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \ - XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \ - XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \ - XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \ - XFS_SB_FEATURES_COMPAT | XFS_SB_FEATURES_RO_COMPAT | \ - XFS_SB_FEATURES_INCOMPAT | XFS_SB_FEATURES_LOG_INCOMPAT | \ - XFS_SB_PQUOTINO) - /* * Misc. Flags - warning - these will be cleared by xfs_repair unless diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index db0444893e96..07349a183a11 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -376,7 +376,8 @@ xfs_ialloc_ag_alloc( */ newlen = args.mp->m_ialloc_inos; if (args.mp->m_maxicount && - args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) + percpu_counter_read(&args.mp->m_icount) + newlen > + args.mp->m_maxicount) return -ENOSPC; args.minlen = args.maxlen = args.mp->m_ialloc_blks; /* @@ -1340,7 +1341,8 @@ xfs_dialloc( * inode. */ if (mp->m_maxicount && - mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) { + percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos > + mp->m_maxicount) { noroom = 1; okalloc = 0; } diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index f3ea02bf893e..dc4bfc5d88fc 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -735,17 +735,15 @@ xfs_initialize_perag_data( btree += pag->pagf_btreeblks; xfs_perag_put(pag); } - /* - * Overwrite incore superblock counters with just-read data - */ + + /* Overwrite incore superblock counters with just-read data */ spin_lock(&mp->m_sb_lock); sbp->sb_ifree = ifree; sbp->sb_icount = ialloc; sbp->sb_fdblocks = bfree + bfreelst + btree; spin_unlock(&mp->m_sb_lock); - /* Fixup the per-cpu counters as well. */ - xfs_icsb_reinit_counters(mp); + xfs_reinit_percpu_counters(mp); return 0; } @@ -763,6 +761,10 @@ xfs_log_sb( struct xfs_mount *mp = tp->t_mountp; struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0); + mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); + mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); + mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks); + xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb)); diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index fe1f11b96d0d..1bd539321799 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1660,13 +1660,6 @@ xfs_swap_extent_flush( /* Verify O_DIRECT for ftmp */ if (VFS_I(ip)->i_mapping->nrpages) return -EINVAL; - - /* - * Don't try to swap extents on mmap()d files because we can't lock - * out races against page faults safely. - */ - if (mapping_mapped(VFS_I(ip)->i_mapping)) - return -EBUSY; return 0; } @@ -1694,13 +1687,14 @@ xfs_swap_extents( } /* - * Lock up the inodes against other IO and truncate to begin with. - * Then we can ensure the inodes are flushed and have no page cache - * safely. Once we have done this we can take the ilocks and do the rest - * of the checks. + * Lock the inodes against other IO, page faults and truncate to + * begin with. Then we can ensure the inodes are flushed and have no + * page cache safely. Once we have done this we can take the ilocks and + * do the rest of the checks. */ - lock_flags = XFS_IOLOCK_EXCL; + lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); + xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL); /* Verify that both files have the same format */ if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { @@ -1727,8 +1721,16 @@ xfs_swap_extents( xfs_trans_cancel(tp, 0); goto out_unlock; } + + /* + * Lock and join the inodes to the tansaction so that transaction commit + * or cancel will unlock the inodes from this point onwards. + */ xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); lock_flags |= XFS_ILOCK_EXCL; + xfs_trans_ijoin(tp, ip, lock_flags); + xfs_trans_ijoin(tp, tip, lock_flags); + /* Verify all data are being swapped */ if (sxp->sx_offset != 0 || @@ -1781,9 +1783,6 @@ xfs_swap_extents( goto out_trans_cancel; } - xfs_trans_ijoin(tp, ip, lock_flags); - xfs_trans_ijoin(tp, tip, lock_flags); - /* * Before we've swapped the forks, lets set the owners of the forks * appropriately. We have to do this as we are demand paging the btree @@ -1917,5 +1916,5 @@ out_unlock: out_trans_cancel: xfs_trans_cancel(tp, 0); - goto out_unlock; + goto out; } diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 3ee186ac1093..338e50bbfd1e 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -131,7 +131,7 @@ xfs_error_report( { if (level <= xfs_error_level) { xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, - "Internal error %s at line %d of file %s. Caller %pF", + "Internal error %s at line %d of file %s. Caller %pS", tag, linenum, filename, ra); xfs_stack_trace(); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index edeaccc7961a..dc5f609bea88 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -397,7 +397,8 @@ STATIC int /* error (positive) */ xfs_zero_last_block( struct xfs_inode *ip, xfs_fsize_t offset, - xfs_fsize_t isize) + xfs_fsize_t isize, + bool *did_zeroing) { struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize); @@ -425,6 +426,7 @@ xfs_zero_last_block( zero_len = mp->m_sb.sb_blocksize - zero_offset; if (isize + zero_len > offset) zero_len = offset - isize; + *did_zeroing = true; return xfs_iozero(ip, isize, zero_len); } @@ -443,7 +445,8 @@ int /* error (positive) */ xfs_zero_eof( struct xfs_inode *ip, xfs_off_t offset, /* starting I/O offset */ - xfs_fsize_t isize) /* current inode size */ + xfs_fsize_t isize, /* current inode size */ + bool *did_zeroing) { struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t start_zero_fsb; @@ -465,7 +468,7 @@ xfs_zero_eof( * We only zero a part of that block so it is handled specially. */ if (XFS_B_FSB_OFFSET(mp, isize) != 0) { - error = xfs_zero_last_block(ip, offset, isize); + error = xfs_zero_last_block(ip, offset, isize, did_zeroing); if (error) return error; } @@ -525,6 +528,7 @@ xfs_zero_eof( if (error) return error; + *did_zeroing = true; start_zero_fsb = imap.br_startoff + imap.br_blockcount; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); } @@ -567,13 +571,15 @@ restart: * having to redo all checks before. */ if (*pos > i_size_read(inode)) { + bool zero = false; + if (*iolock == XFS_IOLOCK_SHARED) { xfs_rw_iunlock(ip, *iolock); *iolock = XFS_IOLOCK_EXCL; xfs_rw_ilock(ip, *iolock); goto restart; } - error = xfs_zero_eof(ip, *pos, i_size_read(inode)); + error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero); if (error) return error; } @@ -846,6 +852,9 @@ xfs_file_fallocate( if (error) goto out_unlock; + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); + iolock |= XFS_MMAPLOCK_EXCL; + if (mode & FALLOC_FL_PUNCH_HOLE) { error = xfs_free_file_space(ip, offset, len); if (error) @@ -1028,20 +1037,6 @@ xfs_file_mmap( } /* - * mmap()d file has taken write protection fault and is being made - * writable. We can set the page state up correctly for a writable - * page, which means we can do correct delalloc accounting (ENOSPC - * checking!) and unwritten extent mapping. - */ -STATIC int -xfs_vm_page_mkwrite( - struct vm_area_struct *vma, - struct vm_fault *vmf) -{ - return block_page_mkwrite(vma, vmf, xfs_get_blocks); -} - -/* * This type is designed to indicate the type of offset we would like * to search from page cache for xfs_seek_hole_data(). */ @@ -1416,6 +1411,55 @@ xfs_file_llseek( } } +/* + * Locking for serialisation of IO during page faults. This results in a lock + * ordering of: + * + * mmap_sem (MM) + * i_mmap_lock (XFS - truncate serialisation) + * page_lock (MM) + * i_lock (XFS - extent map serialisation) + */ +STATIC int +xfs_filemap_fault( + struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host); + int error; + + trace_xfs_filemap_fault(ip); + + xfs_ilock(ip, XFS_MMAPLOCK_SHARED); + error = filemap_fault(vma, vmf); + xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); + + return error; +} + +/* + * mmap()d file has taken write protection fault and is being made writable. We + * can set the page state up correctly for a writable page, which means we can + * do correct delalloc accounting (ENOSPC checking!) and unwritten extent + * mapping. + */ +STATIC int +xfs_filemap_page_mkwrite( + struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct xfs_inode *ip = XFS_I(vma->vm_file->f_mapping->host); + int error; + + trace_xfs_filemap_page_mkwrite(ip); + + xfs_ilock(ip, XFS_MMAPLOCK_SHARED); + error = block_page_mkwrite(vma, vmf, xfs_get_blocks); + xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); + + return error; +} + const struct file_operations xfs_file_operations = { .llseek = xfs_file_llseek, .read = new_sync_read, @@ -1448,7 +1492,7 @@ const struct file_operations xfs_dir_file_operations = { }; static const struct vm_operations_struct xfs_file_vm_ops = { - .fault = filemap_fault, + .fault = xfs_filemap_fault, .map_pages = filemap_map_pages, - .page_mkwrite = xfs_vm_page_mkwrite, + .page_mkwrite = xfs_filemap_page_mkwrite, }; diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index a2e86e8a0fea..8f9f854376c6 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -322,7 +322,7 @@ xfs_filestream_lookup_ag( pip = xfs_filestream_get_parent(ip); if (!pip) - goto out; + return NULLAGNUMBER; mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); if (mru) { diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 74efe5b760dc..cb7e8a29dfb6 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -637,12 +637,13 @@ xfs_fs_counts( xfs_mount_t *mp, xfs_fsop_counts_t *cnt) { - xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); + cnt->allocino = percpu_counter_read_positive(&mp->m_icount); + cnt->freeino = percpu_counter_read_positive(&mp->m_ifree); + cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) - + XFS_ALLOC_SET_ASIDE(mp); + spin_lock(&mp->m_sb_lock); - cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); cnt->freertx = mp->m_sb.sb_frextents; - cnt->freeino = mp->m_sb.sb_ifree; - cnt->allocino = mp->m_sb.sb_icount; spin_unlock(&mp->m_sb_lock); return 0; } @@ -692,14 +693,9 @@ xfs_reserve_blocks( * what to do. This means that the amount of free space can * change while we do this, so we need to retry if we end up * trying to reserve more space than is available. - * - * We also use the xfs_mod_incore_sb() interface so that we - * don't have to care about whether per cpu counter are - * enabled, disabled or even compiled in.... */ retry: spin_lock(&mp->m_sb_lock); - xfs_icsb_sync_counters_locked(mp, 0); /* * If our previous reservation was larger than the current value, @@ -716,7 +712,8 @@ retry: } else { __int64_t free; - free = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); + free = percpu_counter_sum(&mp->m_fdblocks) - + XFS_ALLOC_SET_ASIDE(mp); if (!free) goto out; /* ENOSPC and fdblks_delta = 0 */ @@ -755,8 +752,7 @@ out: * the extra reserve blocks from the reserve..... */ int error; - error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - fdblks_delta, 0); + error = xfs_mod_fdblocks(mp, fdblks_delta, 0); if (error == -ENOSPC) goto retry; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d0414f305967..d6ebc85192b7 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -117,24 +117,34 @@ xfs_ilock_attr_map_shared( } /* - * The xfs inode contains 2 locks: a multi-reader lock called the - * i_iolock and a multi-reader lock called the i_lock. This routine - * allows either or both of the locks to be obtained. + * The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and + * the i_lock. This routine allows various combinations of the locks to be + * obtained. * - * The 2 locks should always be ordered so that the IO lock is - * obtained first in order to prevent deadlock. + * The 3 locks should always be ordered so that the IO lock is obtained first, + * the mmap lock second and the ilock last in order to prevent deadlock. * - * ip -- the inode being locked - * lock_flags -- this parameter indicates the inode's locks - * to be locked. It can be: - * XFS_IOLOCK_SHARED, - * XFS_IOLOCK_EXCL, - * XFS_ILOCK_SHARED, - * XFS_ILOCK_EXCL, - * XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED, - * XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL, - * XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED, - * XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL + * Basic locking order: + * + * i_iolock -> i_mmap_lock -> page_lock -> i_ilock + * + * mmap_sem locking order: + * + * i_iolock -> page lock -> mmap_sem + * mmap_sem -> i_mmap_lock -> page_lock + * + * The difference in mmap_sem locking order mean that we cannot hold the + * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can + * fault in pages during copy in/out (for buffered IO) or require the mmap_sem + * in get_user_pages() to map the user pages into the kernel address space for + * direct IO. Similarly the i_iolock cannot be taken inside a page fault because + * page faults already hold the mmap_sem. + * + * Hence to serialise fully against both syscall and mmap based IO, we need to + * take both the i_iolock and the i_mmap_lock. These locks should *only* be both + * taken in places where we need to invalidate the page cache in a race + * free manner (e.g. truncate, hole punch and other extent manipulation + * functions). */ void xfs_ilock( @@ -150,6 +160,8 @@ xfs_ilock( */ ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); + ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != + (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); @@ -159,6 +171,11 @@ xfs_ilock( else if (lock_flags & XFS_IOLOCK_SHARED) mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); + if (lock_flags & XFS_MMAPLOCK_EXCL) + mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); + else if (lock_flags & XFS_MMAPLOCK_SHARED) + mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); + if (lock_flags & XFS_ILOCK_EXCL) mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags)); else if (lock_flags & XFS_ILOCK_SHARED) @@ -191,6 +208,8 @@ xfs_ilock_nowait( */ ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); + ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != + (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); @@ -202,21 +221,35 @@ xfs_ilock_nowait( if (!mrtryaccess(&ip->i_iolock)) goto out; } + + if (lock_flags & XFS_MMAPLOCK_EXCL) { + if (!mrtryupdate(&ip->i_mmaplock)) + goto out_undo_iolock; + } else if (lock_flags & XFS_MMAPLOCK_SHARED) { + if (!mrtryaccess(&ip->i_mmaplock)) + goto out_undo_iolock; + } + if (lock_flags & XFS_ILOCK_EXCL) { if (!mrtryupdate(&ip->i_lock)) - goto out_undo_iolock; + goto out_undo_mmaplock; } else if (lock_flags & XFS_ILOCK_SHARED) { if (!mrtryaccess(&ip->i_lock)) - goto out_undo_iolock; + goto out_undo_mmaplock; } return 1; - out_undo_iolock: +out_undo_mmaplock: + if (lock_flags & XFS_MMAPLOCK_EXCL) + mrunlock_excl(&ip->i_mmaplock); + else if (lock_flags & XFS_MMAPLOCK_SHARED) + mrunlock_shared(&ip->i_mmaplock); +out_undo_iolock: if (lock_flags & XFS_IOLOCK_EXCL) mrunlock_excl(&ip->i_iolock); else if (lock_flags & XFS_IOLOCK_SHARED) mrunlock_shared(&ip->i_iolock); - out: +out: return 0; } @@ -244,6 +277,8 @@ xfs_iunlock( */ ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) != (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)); + ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) != + (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)); ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) != (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0); @@ -254,6 +289,11 @@ xfs_iunlock( else if (lock_flags & XFS_IOLOCK_SHARED) mrunlock_shared(&ip->i_iolock); + if (lock_flags & XFS_MMAPLOCK_EXCL) + mrunlock_excl(&ip->i_mmaplock); + else if (lock_flags & XFS_MMAPLOCK_SHARED) + mrunlock_shared(&ip->i_mmaplock); + if (lock_flags & XFS_ILOCK_EXCL) mrunlock_excl(&ip->i_lock); else if (lock_flags & XFS_ILOCK_SHARED) @@ -271,11 +311,14 @@ xfs_ilock_demote( xfs_inode_t *ip, uint lock_flags) { - ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)); - ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); + ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)); + ASSERT((lock_flags & + ~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0); if (lock_flags & XFS_ILOCK_EXCL) mrdemote(&ip->i_lock); + if (lock_flags & XFS_MMAPLOCK_EXCL) + mrdemote(&ip->i_mmaplock); if (lock_flags & XFS_IOLOCK_EXCL) mrdemote(&ip->i_iolock); @@ -294,6 +337,12 @@ xfs_isilocked( return rwsem_is_locked(&ip->i_lock.mr_lock); } + if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) { + if (!(lock_flags & XFS_MMAPLOCK_SHARED)) + return !!ip->i_mmaplock.mr_writer; + return rwsem_is_locked(&ip->i_mmaplock.mr_lock); + } + if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { if (!(lock_flags & XFS_IOLOCK_SHARED)) return !!ip->i_iolock.mr_writer; @@ -314,14 +363,27 @@ int xfs_lock_delays; #endif /* - * Bump the subclass so xfs_lock_inodes() acquires each lock with - * a different value + * Bump the subclass so xfs_lock_inodes() acquires each lock with a different + * value. This shouldn't be called for page fault locking, but we also need to + * ensure we don't overrun the number of lockdep subclasses for the iolock or + * mmaplock as that is limited to 12 by the mmap lock lockdep annotations. */ static inline int xfs_lock_inumorder(int lock_mode, int subclass) { - if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) + if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { + ASSERT(subclass + XFS_LOCK_INUMORDER < + (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT))); lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT; + } + + if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) { + ASSERT(subclass + XFS_LOCK_INUMORDER < + (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT))); + lock_mode |= (subclass + XFS_LOCK_INUMORDER) << + XFS_MMAPLOCK_SHIFT; + } + if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT; @@ -329,15 +391,14 @@ xfs_lock_inumorder(int lock_mode, int subclass) } /* - * The following routine will lock n inodes in exclusive mode. - * We assume the caller calls us with the inodes in i_ino order. + * The following routine will lock n inodes in exclusive mode. We assume the + * caller calls us with the inodes in i_ino order. * - * We need to detect deadlock where an inode that we lock - * is in the AIL and we start waiting for another inode that is locked - * by a thread in a long running transaction (such as truncate). This can - * result in deadlock since the long running trans might need to wait - * for the inode we just locked in order to push the tail and free space - * in the log. + * We need to detect deadlock where an inode that we lock is in the AIL and we + * start waiting for another inode that is locked by a thread in a long running + * transaction (such as truncate). This can result in deadlock since the long + * running trans might need to wait for the inode we just locked in order to + * push the tail and free space in the log. */ void xfs_lock_inodes( @@ -348,30 +409,27 @@ xfs_lock_inodes( int attempts = 0, i, j, try_lock; xfs_log_item_t *lp; - ASSERT(ips && (inodes >= 2)); /* we need at least two */ + /* currently supports between 2 and 5 inodes */ + ASSERT(ips && inodes >= 2 && inodes <= 5); try_lock = 0; i = 0; - again: for (; i < inodes; i++) { ASSERT(ips[i]); - if (i && (ips[i] == ips[i-1])) /* Already locked */ + if (i && (ips[i] == ips[i - 1])) /* Already locked */ continue; /* - * If try_lock is not set yet, make sure all locked inodes - * are not in the AIL. - * If any are, set try_lock to be used later. + * If try_lock is not set yet, make sure all locked inodes are + * not in the AIL. If any are, set try_lock to be used later. */ - if (!try_lock) { for (j = (i - 1); j >= 0 && !try_lock; j--) { lp = (xfs_log_item_t *)ips[j]->i_itemp; - if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { + if (lp && (lp->li_flags & XFS_LI_IN_AIL)) try_lock++; - } } } @@ -381,51 +439,42 @@ again: * we can't get any, we must release all we have * and try again. */ + if (!try_lock) { + xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); + continue; + } + + /* try_lock means we have an inode locked that is in the AIL. */ + ASSERT(i != 0); + if (xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) + continue; - if (try_lock) { - /* try_lock must be 0 if i is 0. */ + /* + * Unlock all previous guys and try again. xfs_iunlock will try + * to push the tail if the inode is in the AIL. + */ + attempts++; + for (j = i - 1; j >= 0; j--) { /* - * try_lock means we have an inode locked - * that is in the AIL. + * Check to see if we've already unlocked this one. Not + * the first one going back, and the inode ptr is the + * same. */ - ASSERT(i != 0); - if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) { - attempts++; - - /* - * Unlock all previous guys and try again. - * xfs_iunlock will try to push the tail - * if the inode is in the AIL. - */ - - for(j = i - 1; j >= 0; j--) { - - /* - * Check to see if we've already - * unlocked this one. - * Not the first one going back, - * and the inode ptr is the same. - */ - if ((j != (i - 1)) && ips[j] == - ips[j+1]) - continue; - - xfs_iunlock(ips[j], lock_mode); - } + if (j != (i - 1) && ips[j] == ips[j + 1]) + continue; - if ((attempts % 5) == 0) { - delay(1); /* Don't just spin the CPU */ + xfs_iunlock(ips[j], lock_mode); + } + + if ((attempts % 5) == 0) { + delay(1); /* Don't just spin the CPU */ #ifdef DEBUG - xfs_lock_delays++; + xfs_lock_delays++; #endif - } - i = 0; - try_lock = 0; - goto again; - } - } else { - xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i)); } + i = 0; + try_lock = 0; + goto again; } #ifdef DEBUG @@ -440,10 +489,10 @@ again: } /* - * xfs_lock_two_inodes() can only be used to lock one type of lock - * at a time - the iolock or the ilock, but not both at once. If - * we lock both at once, lockdep will report false positives saying - * we have violated locking orders. + * xfs_lock_two_inodes() can only be used to lock one type of lock at a time - + * the iolock, the mmaplock or the ilock, but not more than one at a time. If we + * lock more than one at a time, lockdep will report false positives saying we + * have violated locking orders. */ void xfs_lock_two_inodes( @@ -455,8 +504,12 @@ xfs_lock_two_inodes( int attempts = 0; xfs_log_item_t *lp; - if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) - ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); + if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { + ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))); + ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); + } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) + ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); + ASSERT(ip0->i_ino != ip1->i_ino); if (ip0->i_ino > ip1->i_ino) { @@ -2615,19 +2668,22 @@ xfs_remove( /* * Enter all inodes for a rename transaction into a sorted array. */ +#define __XFS_SORT_INODES 5 STATIC void xfs_sort_for_rename( - xfs_inode_t *dp1, /* in: old (source) directory inode */ - xfs_inode_t *dp2, /* in: new (target) directory inode */ - xfs_inode_t *ip1, /* in: inode of old entry */ - xfs_inode_t *ip2, /* in: inode of new entry, if it - already exists, NULL otherwise. */ - xfs_inode_t **i_tab,/* out: array of inode returned, sorted */ - int *num_inodes) /* out: number of inodes in array */ + struct xfs_inode *dp1, /* in: old (source) directory inode */ + struct xfs_inode *dp2, /* in: new (target) directory inode */ + struct xfs_inode *ip1, /* in: inode of old entry */ + struct xfs_inode *ip2, /* in: inode of new entry */ + struct xfs_inode *wip, /* in: whiteout inode */ + struct xfs_inode **i_tab,/* out: sorted array of inodes */ + int *num_inodes) /* in/out: inodes in array */ { - xfs_inode_t *temp; int i, j; + ASSERT(*num_inodes == __XFS_SORT_INODES); + memset(i_tab, 0, *num_inodes * sizeof(struct xfs_inode *)); + /* * i_tab contains a list of pointers to inodes. We initialize * the table here & we'll sort it. We will then use it to @@ -2635,25 +2691,24 @@ xfs_sort_for_rename( * * Note that the table may contain duplicates. e.g., dp1 == dp2. */ - i_tab[0] = dp1; - i_tab[1] = dp2; - i_tab[2] = ip1; - if (ip2) { - *num_inodes = 4; - i_tab[3] = ip2; - } else { - *num_inodes = 3; - i_tab[3] = NULL; - } + i = 0; + i_tab[i++] = dp1; + i_tab[i++] = dp2; + i_tab[i++] = ip1; + if (ip2) + i_tab[i++] = ip2; + if (wip) + i_tab[i++] = wip; + *num_inodes = i; /* * Sort the elements via bubble sort. (Remember, there are at - * most 4 elements to sort, so this is adequate.) + * most 5 elements to sort, so this is adequate.) */ for (i = 0; i < *num_inodes; i++) { for (j = 1; j < *num_inodes; j++) { if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) { - temp = i_tab[j]; + struct xfs_inode *temp = i_tab[j]; i_tab[j] = i_tab[j-1]; i_tab[j-1] = temp; } @@ -2661,6 +2716,31 @@ xfs_sort_for_rename( } } +static int +xfs_finish_rename( + struct xfs_trans *tp, + struct xfs_bmap_free *free_list) +{ + int committed = 0; + int error; + + /* + * If this is a synchronous mount, make sure that the rename transaction + * goes to disk before returning to the user. + */ + if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) + xfs_trans_set_sync(tp); + + error = xfs_bmap_finish(&tp, free_list, &committed); + if (error) { + xfs_bmap_cancel(free_list); + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); + return error; + } + + return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); +} + /* * xfs_cross_rename() * @@ -2689,14 +2769,14 @@ xfs_cross_rename( ip2->i_ino, first_block, free_list, spaceres); if (error) - goto out; + goto out_trans_abort; /* Swap inode number for dirent in second parent */ error = xfs_dir_replace(tp, dp2, name2, ip1->i_ino, first_block, free_list, spaceres); if (error) - goto out; + goto out_trans_abort; /* * If we're renaming one or more directories across different parents, @@ -2711,16 +2791,16 @@ xfs_cross_rename( dp1->i_ino, first_block, free_list, spaceres); if (error) - goto out; + goto out_trans_abort; /* transfer ip2 ".." reference to dp1 */ if (!S_ISDIR(ip1->i_d.di_mode)) { error = xfs_droplink(tp, dp2); if (error) - goto out; + goto out_trans_abort; error = xfs_bumplink(tp, dp1); if (error) - goto out; + goto out_trans_abort; } /* @@ -2738,16 +2818,16 @@ xfs_cross_rename( dp2->i_ino, first_block, free_list, spaceres); if (error) - goto out; + goto out_trans_abort; /* transfer ip1 ".." reference to dp2 */ if (!S_ISDIR(ip2->i_d.di_mode)) { error = xfs_droplink(tp, dp1); if (error) - goto out; + goto out_trans_abort; error = xfs_bumplink(tp, dp2); if (error) - goto out; + goto out_trans_abort; } /* @@ -2775,66 +2855,108 @@ xfs_cross_rename( } xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); -out: + return xfs_finish_rename(tp, free_list); + +out_trans_abort: + xfs_bmap_cancel(free_list); + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); return error; } /* + * xfs_rename_alloc_whiteout() + * + * Return a referenced, unlinked, unlocked inode that that can be used as a + * whiteout in a rename transaction. We use a tmpfile inode here so that if we + * crash between allocating the inode and linking it into the rename transaction + * recovery will free the inode and we won't leak it. + */ +static int +xfs_rename_alloc_whiteout( + struct xfs_inode *dp, + struct xfs_inode **wip) +{ + struct xfs_inode *tmpfile; + int error; + + error = xfs_create_tmpfile(dp, NULL, S_IFCHR | WHITEOUT_MODE, &tmpfile); + if (error) + return error; + + /* Satisfy xfs_bumplink that this is a real tmpfile */ + xfs_finish_inode_setup(tmpfile); + VFS_I(tmpfile)->i_state |= I_LINKABLE; + + *wip = tmpfile; + return 0; +} + +/* * xfs_rename */ int xfs_rename( - xfs_inode_t *src_dp, - struct xfs_name *src_name, - xfs_inode_t *src_ip, - xfs_inode_t *target_dp, - struct xfs_name *target_name, - xfs_inode_t *target_ip, - unsigned int flags) + struct xfs_inode *src_dp, + struct xfs_name *src_name, + struct xfs_inode *src_ip, + struct xfs_inode *target_dp, + struct xfs_name *target_name, + struct xfs_inode *target_ip, + unsigned int flags) { - xfs_trans_t *tp = NULL; - xfs_mount_t *mp = src_dp->i_mount; - int new_parent; /* moving to a new dir */ - int src_is_directory; /* src_name is a directory */ - int error; - xfs_bmap_free_t free_list; - xfs_fsblock_t first_block; - int cancel_flags; - int committed; - xfs_inode_t *inodes[4]; - int spaceres; - int num_inodes; + struct xfs_mount *mp = src_dp->i_mount; + struct xfs_trans *tp; + struct xfs_bmap_free free_list; + xfs_fsblock_t first_block; + struct xfs_inode *wip = NULL; /* whiteout inode */ + struct xfs_inode *inodes[__XFS_SORT_INODES]; + int num_inodes = __XFS_SORT_INODES; + bool new_parent = (src_dp != target_dp); + bool src_is_directory = S_ISDIR(src_ip->i_d.di_mode); + int cancel_flags = 0; + int spaceres; + int error; trace_xfs_rename(src_dp, target_dp, src_name, target_name); - new_parent = (src_dp != target_dp); - src_is_directory = S_ISDIR(src_ip->i_d.di_mode); + if ((flags & RENAME_EXCHANGE) && !target_ip) + return -EINVAL; + + /* + * If we are doing a whiteout operation, allocate the whiteout inode + * we will be placing at the target and ensure the type is set + * appropriately. + */ + if (flags & RENAME_WHITEOUT) { + ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE))); + error = xfs_rename_alloc_whiteout(target_dp, &wip); + if (error) + return error; + + /* setup target dirent info as whiteout */ + src_name->type = XFS_DIR3_FT_CHRDEV; + } - xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, + xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip, inodes, &num_inodes); - xfs_bmap_init(&free_list, &first_block); tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); - cancel_flags = XFS_TRANS_RELEASE_LOG_RES; spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0); if (error == -ENOSPC) { spaceres = 0; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0); } - if (error) { - xfs_trans_cancel(tp, 0); - goto std_return; - } + if (error) + goto out_trans_cancel; + cancel_flags = XFS_TRANS_RELEASE_LOG_RES; /* * Attach the dquots to the inodes */ error = xfs_qm_vop_rename_dqattach(inodes); - if (error) { - xfs_trans_cancel(tp, cancel_flags); - goto std_return; - } + if (error) + goto out_trans_cancel; /* * Lock all the participating inodes. Depending upon whether @@ -2855,6 +2977,8 @@ xfs_rename( xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); if (target_ip) xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); + if (wip) + xfs_trans_ijoin(tp, wip, XFS_ILOCK_EXCL); /* * If we are using project inheritance, we only allow renames @@ -2864,20 +2988,16 @@ xfs_rename( if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) { error = -EXDEV; - goto error_return; + goto out_trans_cancel; } - /* - * Handle RENAME_EXCHANGE flags - */ - if (flags & RENAME_EXCHANGE) { - error = xfs_cross_rename(tp, src_dp, src_name, src_ip, - target_dp, target_name, target_ip, - &free_list, &first_block, spaceres); - if (error) - goto abort_return; - goto finish_rename; - } + xfs_bmap_init(&free_list, &first_block); + + /* RENAME_EXCHANGE is unique from here on. */ + if (flags & RENAME_EXCHANGE) + return xfs_cross_rename(tp, src_dp, src_name, src_ip, + target_dp, target_name, target_ip, + &free_list, &first_block, spaceres); /* * Set up the target. @@ -2890,7 +3010,7 @@ xfs_rename( if (!spaceres) { error = xfs_dir_canenter(tp, target_dp, target_name); if (error) - goto error_return; + goto out_trans_cancel; } /* * If target does not exist and the rename crosses @@ -2901,9 +3021,9 @@ xfs_rename( src_ip->i_ino, &first_block, &free_list, spaceres); if (error == -ENOSPC) - goto error_return; + goto out_bmap_cancel; if (error) - goto abort_return; + goto out_trans_abort; xfs_trans_ichgtime(tp, target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -2911,7 +3031,7 @@ xfs_rename( if (new_parent && src_is_directory) { error = xfs_bumplink(tp, target_dp); if (error) - goto abort_return; + goto out_trans_abort; } } else { /* target_ip != NULL */ /* @@ -2926,7 +3046,7 @@ xfs_rename( if (!(xfs_dir_isempty(target_ip)) || (target_ip->i_d.di_nlink > 2)) { error = -EEXIST; - goto error_return; + goto out_trans_cancel; } } @@ -2943,7 +3063,7 @@ xfs_rename( src_ip->i_ino, &first_block, &free_list, spaceres); if (error) - goto abort_return; + goto out_trans_abort; xfs_trans_ichgtime(tp, target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); @@ -2954,7 +3074,7 @@ xfs_rename( */ error = xfs_droplink(tp, target_ip); if (error) - goto abort_return; + goto out_trans_abort; if (src_is_directory) { /* @@ -2962,7 +3082,7 @@ xfs_rename( */ error = xfs_droplink(tp, target_ip); if (error) - goto abort_return; + goto out_trans_abort; } } /* target_ip != NULL */ @@ -2979,7 +3099,7 @@ xfs_rename( &first_block, &free_list, spaceres); ASSERT(error != -EEXIST); if (error) - goto abort_return; + goto out_trans_abort; } /* @@ -3005,49 +3125,67 @@ xfs_rename( */ error = xfs_droplink(tp, src_dp); if (error) - goto abort_return; + goto out_trans_abort; } - error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, + /* + * For whiteouts, we only need to update the source dirent with the + * inode number of the whiteout inode rather than removing it + * altogether. + */ + if (wip) { + error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino, &first_block, &free_list, spaceres); + } else + error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, + &first_block, &free_list, spaceres); if (error) - goto abort_return; - - xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); - if (new_parent) - xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); + goto out_trans_abort; -finish_rename: /* - * If this is a synchronous mount, make sure that the - * rename transaction goes to disk before returning to - * the user. + * For whiteouts, we need to bump the link count on the whiteout inode. + * This means that failures all the way up to this point leave the inode + * on the unlinked list and so cleanup is a simple matter of dropping + * the remaining reference to it. If we fail here after bumping the link + * count, we're shutting down the filesystem so we'll never see the + * intermediate state on disk. */ - if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { - xfs_trans_set_sync(tp); - } + if (wip) { + ASSERT(wip->i_d.di_nlink == 0); + error = xfs_bumplink(tp, wip); + if (error) + goto out_trans_abort; + error = xfs_iunlink_remove(tp, wip); + if (error) + goto out_trans_abort; + xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE); - error = xfs_bmap_finish(&tp, &free_list, &committed); - if (error) { - xfs_bmap_cancel(&free_list); - xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | - XFS_TRANS_ABORT)); - goto std_return; + /* + * Now we have a real link, clear the "I'm a tmpfile" state + * flag from the inode so it doesn't accidentally get misused in + * future. + */ + VFS_I(wip)->i_state &= ~I_LINKABLE; } - /* - * trans_commit will unlock src_ip, target_ip & decrement - * the vnode references. - */ - return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); + if (new_parent) + xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); - abort_return: + error = xfs_finish_rename(tp, &free_list); + if (wip) + IRELE(wip); + return error; + +out_trans_abort: cancel_flags |= XFS_TRANS_ABORT; - error_return: +out_bmap_cancel: xfs_bmap_cancel(&free_list); +out_trans_cancel: xfs_trans_cancel(tp, cancel_flags); - std_return: + if (wip) + IRELE(wip); return error; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 8e82b41d2050..8f22d20368d8 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -56,6 +56,7 @@ typedef struct xfs_inode { struct xfs_inode_log_item *i_itemp; /* logging information */ mrlock_t i_lock; /* inode lock */ mrlock_t i_iolock; /* inode IO lock */ + mrlock_t i_mmaplock; /* inode mmap IO lock */ atomic_t i_pincount; /* inode pin count */ spinlock_t i_flags_lock; /* inode i_flags lock */ /* Miscellaneous state. */ @@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) #define XFS_IOLOCK_SHARED (1<<1) #define XFS_ILOCK_EXCL (1<<2) #define XFS_ILOCK_SHARED (1<<3) +#define XFS_MMAPLOCK_EXCL (1<<4) +#define XFS_MMAPLOCK_SHARED (1<<5) #define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ - | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED) + | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \ + | XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED) #define XFS_LOCK_FLAGS \ { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \ { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \ { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \ - { XFS_ILOCK_SHARED, "ILOCK_SHARED" } + { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \ + { XFS_MMAPLOCK_EXCL, "MMAPLOCK_EXCL" }, \ + { XFS_MMAPLOCK_SHARED, "MMAPLOCK_SHARED" } /* @@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip) #define XFS_IOLOCK_SHIFT 16 #define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) +#define XFS_MMAPLOCK_SHIFT 20 + #define XFS_ILOCK_SHIFT 24 #define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) #define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT) #define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT) -#define XFS_IOLOCK_DEP_MASK 0x00ff0000 +#define XFS_IOLOCK_DEP_MASK 0x000f0000 +#define XFS_MMAPLOCK_DEP_MASK 0x00f00000 #define XFS_ILOCK_DEP_MASK 0xff000000 -#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK) +#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \ + XFS_MMAPLOCK_DEP_MASK | \ + XFS_ILOCK_DEP_MASK) -#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT) -#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT) +#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) \ + >> XFS_IOLOCK_SHIFT) +#define XFS_MMAPLOCK_DEP(flags) (((flags) & XFS_MMAPLOCK_DEP_MASK) \ + >> XFS_MMAPLOCK_SHIFT) +#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) \ + >> XFS_ILOCK_SHIFT) /* * For multiple groups support: if S_ISGID bit is set in the parent @@ -384,10 +399,11 @@ enum xfs_prealloc_flags { XFS_PREALLOC_INVISIBLE = (1 << 4), }; -int xfs_update_prealloc_flags(struct xfs_inode *, - enum xfs_prealloc_flags); -int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); -int xfs_iozero(struct xfs_inode *, loff_t, size_t); +int xfs_update_prealloc_flags(struct xfs_inode *ip, + enum xfs_prealloc_flags flags); +int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset, + xfs_fsize_t isize, bool *did_zeroing); +int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count); /* from xfs_iops.c */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index ac4feae45eb3..4ee44ddfdfb7 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -631,7 +631,7 @@ xfs_ioc_space( if (filp->f_flags & O_DSYNC) flags |= XFS_PREALLOC_SYNC; - if (ioflags & XFS_IO_INVIS) + if (ioflags & XFS_IO_INVIS) flags |= XFS_PREALLOC_INVISIBLE; error = mnt_want_write_file(filp); @@ -643,6 +643,9 @@ xfs_ioc_space( if (error) goto out_unlock; + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); + iolock |= XFS_MMAPLOCK_EXCL; + switch (bf->l_whence) { case 0: /*SEEK_SET*/ break; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index ccb1dd0d509e..38e633bad8c2 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -460,8 +460,7 @@ xfs_iomap_prealloc_size( alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN), alloc_blocks); - xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); - freesp = mp->m_sb.sb_fdblocks; + freesp = percpu_counter_read_positive(&mp->m_fdblocks); if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { shift = 2; if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index d7782ae1af3c..015d6a366b16 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -394,7 +394,7 @@ xfs_vn_rename( struct xfs_name oname; struct xfs_name nname; - if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; /* if we are exchanging files, we need to set i_mode of both files */ @@ -756,6 +756,7 @@ xfs_setattr_size( int error; uint lock_flags = 0; uint commit_flags = 0; + bool did_zeroing = false; trace_xfs_setattr(ip); @@ -770,6 +771,7 @@ xfs_setattr_size( return error; ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL)); ASSERT(S_ISREG(ip->i_d.di_mode)); ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); @@ -799,20 +801,16 @@ xfs_setattr_size( return error; /* - * Now we can make the changes. Before we join the inode to the - * transaction, take care of the part of the truncation that must be - * done without the inode lock. This needs to be done before joining - * the inode to the transaction, because the inode cannot be unlocked - * once it is a part of the transaction. + * File data changes must be complete before we start the transaction to + * modify the inode. This needs to be done before joining the inode to + * the transaction because the inode cannot be unlocked once it is a + * part of the transaction. + * + * Start with zeroing any data block beyond EOF that we may expose on + * file extension. */ if (newsize > oldsize) { - /* - * Do the first part of growing a file: zero any data in the - * last block that is beyond the old EOF. We need to do this - * before the inode is joined to the transaction to modify - * i_size. - */ - error = xfs_zero_eof(ip, newsize, oldsize); + error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing); if (error) return error; } @@ -822,75 +820,42 @@ xfs_setattr_size( * any previous writes that are beyond the on disk EOF and the new * EOF that have not been written out need to be written here. If we * do not write the data out, we expose ourselves to the null files - * problem. - * - * Only flush from the on disk size to the smaller of the in memory - * file size or the new size as that's the range we really care about - * here and prevents waiting for other data not within the range we - * care about here. + * problem. Note that this includes any block zeroing we did above; + * otherwise those blocks may not be zeroed after a crash. */ - if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) { + if (newsize > ip->i_d.di_size && + (oldsize != ip->i_d.di_size || did_zeroing)) { error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ip->i_d.di_size, newsize); if (error) return error; } - /* - * Wait for all direct I/O to complete. - */ + /* Now wait for all direct I/O to complete. */ inode_dio_wait(inode); /* - * Do all the page cache truncate work outside the transaction context - * as the "lock" order is page lock->log space reservation. i.e. - * locking pages inside the transaction can ABBA deadlock with - * writeback. We have to do the VFS inode size update before we truncate - * the pagecache, however, to avoid racing with page faults beyond the - * new EOF they are not serialised against truncate operations except by - * page locks and size updates. + * We've already locked out new page faults, so now we can safely remove + * pages from the page cache knowing they won't get refaulted until we + * drop the XFS_MMAP_EXCL lock after the extent manipulations are + * complete. The truncate_setsize() call also cleans partial EOF page + * PTEs on extending truncates and hence ensures sub-page block size + * filesystems are correctly handled, too. * - * Hence we are in a situation where a truncate can fail with ENOMEM - * from xfs_trans_reserve(), but having already truncated the in-memory - * version of the file (i.e. made user visible changes). There's not - * much we can do about this, except to hope that the caller sees ENOMEM - * and retries the truncate operation. + * We have to do all the page cache truncate work outside the + * transaction context as the "lock" order is page lock->log space + * reservation as defined by extent allocation in the writeback path. + * Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but + * having already truncated the in-memory version of the file (i.e. made + * user visible changes). There's not much we can do about this, except + * to hope that the caller sees ENOMEM and retries the truncate + * operation. */ error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); if (error) return error; truncate_setsize(inode, newsize); - /* - * The "we can't serialise against page faults" pain gets worse. - * - * If the file is mapped then we have to clean the page at the old EOF - * when extending the file. Extending the file can expose changes the - * underlying page mapping (e.g. from beyond EOF to a hole or - * unwritten), and so on the next attempt to write to that page we need - * to remap it for write. i.e. we need .page_mkwrite() to be called. - * Hence we need to clean the page to clean the pte and so a new write - * fault will be triggered appropriately. - * - * If we do it before we change the inode size, then we can race with a - * page fault that maps the page with exactly the same problem. If we do - * it after we change the file size, then a new page fault can come in - * and allocate space before we've run the rest of the truncate - * transaction. That's kinda grotesque, but it's better than have data - * over a hole, and so that's the lesser evil that has been chosen here. - * - * The real solution, however, is to have some mechanism for locking out - * page faults while a truncate is in progress. - */ - if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) { - error = filemap_write_and_wait_range( - VFS_I(ip)->i_mapping, - round_down(oldsize, PAGE_CACHE_SIZE), - round_up(oldsize, PAGE_CACHE_SIZE) - 1); - if (error) - return error; - } - tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) @@ -989,8 +954,12 @@ xfs_vn_setattr( xfs_ilock(ip, iolock); error = xfs_break_layouts(dentry->d_inode, &iolock); - if (!error) + if (!error) { + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); + iolock |= XFS_MMAPLOCK_EXCL; + error = xfs_setattr_size(ip, iattr); + } xfs_iunlock(ip, iolock); } else { error = xfs_setattr_nonsize(ip, iattr, 0); diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index c31d2c2eadc4..7c7842c85a08 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -116,15 +116,6 @@ typedef __uint64_t __psunsigned_t; #undef XFS_NATIVE_HOST #endif -/* - * Feature macros (disable/enable) - */ -#ifdef CONFIG_SMP -#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ -#else -#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ -#endif - #define irix_sgid_inherit xfs_params.sgid_inherit.val #define irix_symlink_mode xfs_params.symlink_mode.val #define xfs_panic_mask xfs_params.panic_mask.val diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index a5a945fc3bdc..4f5784f85a5b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -4463,10 +4463,10 @@ xlog_do_recover( xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC); ASSERT(xfs_sb_good_version(sbp)); + xfs_reinit_percpu_counters(log->l_mp); + xfs_buf_relse(bp); - /* We've re-read the superblock so re-initialize per-cpu counters */ - xfs_icsb_reinit_counters(log->l_mp); xlog_recover_check_summary(log); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 4fa80e63eea2..2ce7ee3b4ec1 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -43,18 +43,6 @@ #include "xfs_sysfs.h" -#ifdef HAVE_PERCPU_SB -STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, - int); -STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t, - int); -STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); -#else - -#define xfs_icsb_balance_counter(mp, a, b) do { } while (0) -#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0) -#endif - static DEFINE_MUTEX(xfs_uuid_table_mutex); static int xfs_uuid_table_size; static uuid_t *xfs_uuid_table; @@ -347,8 +335,7 @@ reread: goto reread; } - /* Initialize per-cpu counters */ - xfs_icsb_reinit_counters(mp); + xfs_reinit_percpu_counters(mp); /* no need to be quiet anymore, so reset the buf ops */ bp->b_ops = &xfs_sb_buf_ops; @@ -1087,8 +1074,6 @@ xfs_log_sbcount(xfs_mount_t *mp) if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE)) return 0; - xfs_icsb_sync_counters(mp, 0); - /* * we don't need to do this if we are updating the superblock * counters on every modification. @@ -1099,253 +1084,136 @@ xfs_log_sbcount(xfs_mount_t *mp) return xfs_sync_sb(mp, true); } -/* - * xfs_mod_incore_sb_unlocked() is a utility routine commonly used to apply - * a delta to a specified field in the in-core superblock. Simply - * switch on the field indicated and apply the delta to that field. - * Fields are not allowed to dip below zero, so if the delta would - * do this do not apply it and return EINVAL. - * - * The m_sb_lock must be held when this routine is called. - */ -STATIC int -xfs_mod_incore_sb_unlocked( - xfs_mount_t *mp, - xfs_sb_field_t field, - int64_t delta, - int rsvd) +int +xfs_mod_icount( + struct xfs_mount *mp, + int64_t delta) { - int scounter; /* short counter for 32 bit fields */ - long long lcounter; /* long counter for 64 bit fields */ - long long res_used, rem; - - /* - * With the in-core superblock spin lock held, switch - * on the indicated field. Apply the delta to the - * proper field. If the fields value would dip below - * 0, then do not apply the delta and return EINVAL. - */ - switch (field) { - case XFS_SBS_ICOUNT: - lcounter = (long long)mp->m_sb.sb_icount; - lcounter += delta; - if (lcounter < 0) { - ASSERT(0); - return -EINVAL; - } - mp->m_sb.sb_icount = lcounter; - return 0; - case XFS_SBS_IFREE: - lcounter = (long long)mp->m_sb.sb_ifree; - lcounter += delta; - if (lcounter < 0) { - ASSERT(0); - return -EINVAL; - } - mp->m_sb.sb_ifree = lcounter; - return 0; - case XFS_SBS_FDBLOCKS: - lcounter = (long long) - mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); - res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); - - if (delta > 0) { /* Putting blocks back */ - if (res_used > delta) { - mp->m_resblks_avail += delta; - } else { - rem = delta - res_used; - mp->m_resblks_avail = mp->m_resblks; - lcounter += rem; - } - } else { /* Taking blocks away */ - lcounter += delta; - if (lcounter >= 0) { - mp->m_sb.sb_fdblocks = lcounter + - XFS_ALLOC_SET_ASIDE(mp); - return 0; - } - - /* - * We are out of blocks, use any available reserved - * blocks if were allowed to. - */ - if (!rsvd) - return -ENOSPC; - - lcounter = (long long)mp->m_resblks_avail + delta; - if (lcounter >= 0) { - mp->m_resblks_avail = lcounter; - return 0; - } - printk_once(KERN_WARNING - "Filesystem \"%s\": reserve blocks depleted! " - "Consider increasing reserve pool size.", - mp->m_fsname); - return -ENOSPC; - } - - mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); - return 0; - case XFS_SBS_FREXTENTS: - lcounter = (long long)mp->m_sb.sb_frextents; - lcounter += delta; - if (lcounter < 0) { - return -ENOSPC; - } - mp->m_sb.sb_frextents = lcounter; - return 0; - case XFS_SBS_DBLOCKS: - lcounter = (long long)mp->m_sb.sb_dblocks; - lcounter += delta; - if (lcounter < 0) { - ASSERT(0); - return -EINVAL; - } - mp->m_sb.sb_dblocks = lcounter; - return 0; - case XFS_SBS_AGCOUNT: - scounter = mp->m_sb.sb_agcount; - scounter += delta; - if (scounter < 0) { - ASSERT(0); - return -EINVAL; - } - mp->m_sb.sb_agcount = scounter; - return 0; - case XFS_SBS_IMAX_PCT: - scounter = mp->m_sb.sb_imax_pct; - scounter += delta; - if (scounter < 0) { - ASSERT(0); - return -EINVAL; - } - mp->m_sb.sb_imax_pct = scounter; - return 0; - case XFS_SBS_REXTSIZE: - scounter = mp->m_sb.sb_rextsize; - scounter += delta; - if (scounter < 0) { - ASSERT(0); - return -EINVAL; - } - mp->m_sb.sb_rextsize = scounter; - return 0; - case XFS_SBS_RBMBLOCKS: - scounter = mp->m_sb.sb_rbmblocks; - scounter += delta; - if (scounter < 0) { - ASSERT(0); - return -EINVAL; - } - mp->m_sb.sb_rbmblocks = scounter; - return 0; - case XFS_SBS_RBLOCKS: - lcounter = (long long)mp->m_sb.sb_rblocks; - lcounter += delta; - if (lcounter < 0) { - ASSERT(0); - return -EINVAL; - } - mp->m_sb.sb_rblocks = lcounter; - return 0; - case XFS_SBS_REXTENTS: - lcounter = (long long)mp->m_sb.sb_rextents; - lcounter += delta; - if (lcounter < 0) { - ASSERT(0); - return -EINVAL; - } - mp->m_sb.sb_rextents = lcounter; - return 0; - case XFS_SBS_REXTSLOG: - scounter = mp->m_sb.sb_rextslog; - scounter += delta; - if (scounter < 0) { - ASSERT(0); - return -EINVAL; - } - mp->m_sb.sb_rextslog = scounter; - return 0; - default: + /* deltas are +/-64, hence the large batch size of 128. */ + __percpu_counter_add(&mp->m_icount, delta, 128); + if (percpu_counter_compare(&mp->m_icount, 0) < 0) { ASSERT(0); + percpu_counter_add(&mp->m_icount, -delta); return -EINVAL; } + return 0; } -/* - * xfs_mod_incore_sb() is used to change a field in the in-core - * superblock structure by the specified delta. This modification - * is protected by the m_sb_lock. Just use the xfs_mod_incore_sb_unlocked() - * routine to do the work. - */ int -xfs_mod_incore_sb( +xfs_mod_ifree( struct xfs_mount *mp, - xfs_sb_field_t field, - int64_t delta, - int rsvd) + int64_t delta) { - int status; - -#ifdef HAVE_PERCPU_SB - ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS); -#endif - spin_lock(&mp->m_sb_lock); - status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); - spin_unlock(&mp->m_sb_lock); - - return status; + percpu_counter_add(&mp->m_ifree, delta); + if (percpu_counter_compare(&mp->m_ifree, 0) < 0) { + ASSERT(0); + percpu_counter_add(&mp->m_ifree, -delta); + return -EINVAL; + } + return 0; } -/* - * Change more than one field in the in-core superblock structure at a time. - * - * The fields and changes to those fields are specified in the array of - * xfs_mod_sb structures passed in. Either all of the specified deltas - * will be applied or none of them will. If any modified field dips below 0, - * then all modifications will be backed out and EINVAL will be returned. - * - * Note that this function may not be used for the superblock values that - * are tracked with the in-memory per-cpu counters - a direct call to - * xfs_icsb_modify_counters is required for these. - */ int -xfs_mod_incore_sb_batch( +xfs_mod_fdblocks( struct xfs_mount *mp, - xfs_mod_sb_t *msb, - uint nmsb, - int rsvd) + int64_t delta, + bool rsvd) { - xfs_mod_sb_t *msbp; - int error = 0; + int64_t lcounter; + long long res_used; + s32 batch; + + if (delta > 0) { + /* + * If the reserve pool is depleted, put blocks back into it + * first. Most of the time the pool is full. + */ + if (likely(mp->m_resblks == mp->m_resblks_avail)) { + percpu_counter_add(&mp->m_fdblocks, delta); + return 0; + } + + spin_lock(&mp->m_sb_lock); + res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); + + if (res_used > delta) { + mp->m_resblks_avail += delta; + } else { + delta -= res_used; + mp->m_resblks_avail = mp->m_resblks; + percpu_counter_add(&mp->m_fdblocks, delta); + } + spin_unlock(&mp->m_sb_lock); + return 0; + } /* - * Loop through the array of mod structures and apply each individually. - * If any fail, then back out all those which have already been applied. - * Do all of this within the scope of the m_sb_lock so that all of the - * changes will be atomic. + * Taking blocks away, need to be more accurate the closer we + * are to zero. + * + * batch size is set to a maximum of 1024 blocks - if we are + * allocating of freeing extents larger than this then we aren't + * going to be hammering the counter lock so a lock per update + * is not a problem. + * + * If the counter has a value of less than 2 * max batch size, + * then make everything serialise as we are real close to + * ENOSPC. + */ +#define __BATCH 1024 + if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0) + batch = 1; + else + batch = __BATCH; +#undef __BATCH + + __percpu_counter_add(&mp->m_fdblocks, delta, batch); + if (percpu_counter_compare(&mp->m_fdblocks, + XFS_ALLOC_SET_ASIDE(mp)) >= 0) { + /* we had space! */ + return 0; + } + + /* + * lock up the sb for dipping into reserves before releasing the space + * that took us to ENOSPC. */ spin_lock(&mp->m_sb_lock); - for (msbp = msb; msbp < (msb + nmsb); msbp++) { - ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || - msbp->msb_field > XFS_SBS_FDBLOCKS); + percpu_counter_add(&mp->m_fdblocks, -delta); + if (!rsvd) + goto fdblocks_enospc; - error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, - msbp->msb_delta, rsvd); - if (error) - goto unwind; + lcounter = (long long)mp->m_resblks_avail + delta; + if (lcounter >= 0) { + mp->m_resblks_avail = lcounter; + spin_unlock(&mp->m_sb_lock); + return 0; } + printk_once(KERN_WARNING + "Filesystem \"%s\": reserve blocks depleted! " + "Consider increasing reserve pool size.", + mp->m_fsname); +fdblocks_enospc: spin_unlock(&mp->m_sb_lock); - return 0; + return -ENOSPC; +} -unwind: - while (--msbp >= msb) { - error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, - -msbp->msb_delta, rsvd); - ASSERT(error == 0); - } +int +xfs_mod_frextents( + struct xfs_mount *mp, + int64_t delta) +{ + int64_t lcounter; + int ret = 0; + + spin_lock(&mp->m_sb_lock); + lcounter = mp->m_sb.sb_frextents + delta; + if (lcounter < 0) + ret = -ENOSPC; + else + mp->m_sb.sb_frextents = lcounter; spin_unlock(&mp->m_sb_lock); - return error; + return ret; } /* @@ -1407,573 +1275,3 @@ xfs_dev_is_read_only( } return 0; } - -#ifdef HAVE_PERCPU_SB -/* - * Per-cpu incore superblock counters - * - * Simple concept, difficult implementation - * - * Basically, replace the incore superblock counters with a distributed per cpu - * counter for contended fields (e.g. free block count). - * - * Difficulties arise in that the incore sb is used for ENOSPC checking, and - * hence needs to be accurately read when we are running low on space. Hence - * there is a method to enable and disable the per-cpu counters based on how - * much "stuff" is available in them. - * - * Basically, a counter is enabled if there is enough free resource to justify - * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local - * ENOSPC), then we disable the counters to synchronise all callers and - * re-distribute the available resources. - * - * If, once we redistributed the available resources, we still get a failure, - * we disable the per-cpu counter and go through the slow path. - * - * The slow path is the current xfs_mod_incore_sb() function. This means that - * when we disable a per-cpu counter, we need to drain its resources back to - * the global superblock. We do this after disabling the counter to prevent - * more threads from queueing up on the counter. - * - * Essentially, this means that we still need a lock in the fast path to enable - * synchronisation between the global counters and the per-cpu counters. This - * is not a problem because the lock will be local to a CPU almost all the time - * and have little contention except when we get to ENOSPC conditions. - * - * Basically, this lock becomes a barrier that enables us to lock out the fast - * path while we do things like enabling and disabling counters and - * synchronising the counters. - * - * Locking rules: - * - * 1. m_sb_lock before picking up per-cpu locks - * 2. per-cpu locks always picked up via for_each_online_cpu() order - * 3. accurate counter sync requires m_sb_lock + per cpu locks - * 4. modifying per-cpu counters requires holding per-cpu lock - * 5. modifying global counters requires holding m_sb_lock - * 6. enabling or disabling a counter requires holding the m_sb_lock - * and _none_ of the per-cpu locks. - * - * Disabled counters are only ever re-enabled by a balance operation - * that results in more free resources per CPU than a given threshold. - * To ensure counters don't remain disabled, they are rebalanced when - * the global resource goes above a higher threshold (i.e. some hysteresis - * is present to prevent thrashing). - */ - -#ifdef CONFIG_HOTPLUG_CPU -/* - * hot-plug CPU notifier support. - * - * We need a notifier per filesystem as we need to be able to identify - * the filesystem to balance the counters out. This is achieved by - * having a notifier block embedded in the xfs_mount_t and doing pointer - * magic to get the mount pointer from the notifier block address. - */ -STATIC int -xfs_icsb_cpu_notify( - struct notifier_block *nfb, - unsigned long action, - void *hcpu) -{ - xfs_icsb_cnts_t *cntp; - xfs_mount_t *mp; - - mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier); - cntp = (xfs_icsb_cnts_t *) - per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu); - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - /* Easy Case - initialize the area and locks, and - * then rebalance when online does everything else for us. */ - memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); - break; - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - xfs_icsb_lock(mp); - xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); - xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); - xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0); - xfs_icsb_unlock(mp); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - /* Disable all the counters, then fold the dead cpu's - * count into the total on the global superblock and - * re-enable the counters. */ - xfs_icsb_lock(mp); - spin_lock(&mp->m_sb_lock); - xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT); - xfs_icsb_disable_counter(mp, XFS_SBS_IFREE); - xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS); - - mp->m_sb.sb_icount += cntp->icsb_icount; - mp->m_sb.sb_ifree += cntp->icsb_ifree; - mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks; - - memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); - - xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0); - xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0); - xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0); - spin_unlock(&mp->m_sb_lock); - xfs_icsb_unlock(mp); - break; - } - - return NOTIFY_OK; -} -#endif /* CONFIG_HOTPLUG_CPU */ - -int -xfs_icsb_init_counters( - xfs_mount_t *mp) -{ - xfs_icsb_cnts_t *cntp; - int i; - - mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t); - if (mp->m_sb_cnts == NULL) - return -ENOMEM; - - for_each_online_cpu(i) { - cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); - memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); - } - - mutex_init(&mp->m_icsb_mutex); - - /* - * start with all counters disabled so that the - * initial balance kicks us off correctly - */ - mp->m_icsb_counters = -1; - -#ifdef CONFIG_HOTPLUG_CPU - mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify; - mp->m_icsb_notifier.priority = 0; - register_hotcpu_notifier(&mp->m_icsb_notifier); -#endif /* CONFIG_HOTPLUG_CPU */ - - return 0; -} - -void -xfs_icsb_reinit_counters( - xfs_mount_t *mp) -{ - xfs_icsb_lock(mp); - /* - * start with all counters disabled so that the - * initial balance kicks us off correctly - */ - mp->m_icsb_counters = -1; - xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); - xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); - xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0); - xfs_icsb_unlock(mp); -} - -void -xfs_icsb_destroy_counters( - xfs_mount_t *mp) -{ - if (mp->m_sb_cnts) { - unregister_hotcpu_notifier(&mp->m_icsb_notifier); - free_percpu(mp->m_sb_cnts); - } - mutex_destroy(&mp->m_icsb_mutex); -} - -STATIC void -xfs_icsb_lock_cntr( - xfs_icsb_cnts_t *icsbp) -{ - while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) { - ndelay(1000); - } -} - -STATIC void -xfs_icsb_unlock_cntr( - xfs_icsb_cnts_t *icsbp) -{ - clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags); -} - - -STATIC void -xfs_icsb_lock_all_counters( - xfs_mount_t *mp) -{ - xfs_icsb_cnts_t *cntp; - int i; - - for_each_online_cpu(i) { - cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); - xfs_icsb_lock_cntr(cntp); - } -} - -STATIC void -xfs_icsb_unlock_all_counters( - xfs_mount_t *mp) -{ - xfs_icsb_cnts_t *cntp; - int i; - - for_each_online_cpu(i) { - cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); - xfs_icsb_unlock_cntr(cntp); - } -} - -STATIC void -xfs_icsb_count( - xfs_mount_t *mp, - xfs_icsb_cnts_t *cnt, - int flags) -{ - xfs_icsb_cnts_t *cntp; - int i; - - memset(cnt, 0, sizeof(xfs_icsb_cnts_t)); - - if (!(flags & XFS_ICSB_LAZY_COUNT)) - xfs_icsb_lock_all_counters(mp); - - for_each_online_cpu(i) { - cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); - cnt->icsb_icount += cntp->icsb_icount; - cnt->icsb_ifree += cntp->icsb_ifree; - cnt->icsb_fdblocks += cntp->icsb_fdblocks; - } - - if (!(flags & XFS_ICSB_LAZY_COUNT)) - xfs_icsb_unlock_all_counters(mp); -} - -STATIC int -xfs_icsb_counter_disabled( - xfs_mount_t *mp, - xfs_sb_field_t field) -{ - ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); - return test_bit(field, &mp->m_icsb_counters); -} - -STATIC void -xfs_icsb_disable_counter( - xfs_mount_t *mp, - xfs_sb_field_t field) -{ - xfs_icsb_cnts_t cnt; - - ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); - - /* - * If we are already disabled, then there is nothing to do - * here. We check before locking all the counters to avoid - * the expensive lock operation when being called in the - * slow path and the counter is already disabled. This is - * safe because the only time we set or clear this state is under - * the m_icsb_mutex. - */ - if (xfs_icsb_counter_disabled(mp, field)) - return; - - xfs_icsb_lock_all_counters(mp); - if (!test_and_set_bit(field, &mp->m_icsb_counters)) { - /* drain back to superblock */ - - xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT); - switch(field) { - case XFS_SBS_ICOUNT: - mp->m_sb.sb_icount = cnt.icsb_icount; - break; - case XFS_SBS_IFREE: - mp->m_sb.sb_ifree = cnt.icsb_ifree; - break; - case XFS_SBS_FDBLOCKS: - mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; - break; - default: - BUG(); - } - } - - xfs_icsb_unlock_all_counters(mp); -} - -STATIC void -xfs_icsb_enable_counter( - xfs_mount_t *mp, - xfs_sb_field_t field, - uint64_t count, - uint64_t resid) -{ - xfs_icsb_cnts_t *cntp; - int i; - - ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); - - xfs_icsb_lock_all_counters(mp); - for_each_online_cpu(i) { - cntp = per_cpu_ptr(mp->m_sb_cnts, i); - switch (field) { - case XFS_SBS_ICOUNT: - cntp->icsb_icount = count + resid; - break; - case XFS_SBS_IFREE: - cntp->icsb_ifree = count + resid; - break; - case XFS_SBS_FDBLOCKS: - cntp->icsb_fdblocks = count + resid; - break; - default: - BUG(); - break; - } - resid = 0; - } - clear_bit(field, &mp->m_icsb_counters); - xfs_icsb_unlock_all_counters(mp); -} - -void -xfs_icsb_sync_counters_locked( - xfs_mount_t *mp, - int flags) -{ - xfs_icsb_cnts_t cnt; - - xfs_icsb_count(mp, &cnt, flags); - - if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT)) - mp->m_sb.sb_icount = cnt.icsb_icount; - if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE)) - mp->m_sb.sb_ifree = cnt.icsb_ifree; - if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS)) - mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; -} - -/* - * Accurate update of per-cpu counters to incore superblock - */ -void -xfs_icsb_sync_counters( - xfs_mount_t *mp, - int flags) -{ - spin_lock(&mp->m_sb_lock); - xfs_icsb_sync_counters_locked(mp, flags); - spin_unlock(&mp->m_sb_lock); -} - -/* - * Balance and enable/disable counters as necessary. - * - * Thresholds for re-enabling counters are somewhat magic. inode counts are - * chosen to be the same number as single on disk allocation chunk per CPU, and - * free blocks is something far enough zero that we aren't going thrash when we - * get near ENOSPC. We also need to supply a minimum we require per cpu to - * prevent looping endlessly when xfs_alloc_space asks for more than will - * be distributed to a single CPU but each CPU has enough blocks to be - * reenabled. - * - * Note that we can be called when counters are already disabled. - * xfs_icsb_disable_counter() optimises the counter locking in this case to - * prevent locking every per-cpu counter needlessly. - */ - -#define XFS_ICSB_INO_CNTR_REENABLE (uint64_t)64 -#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \ - (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp)) -STATIC void -xfs_icsb_balance_counter_locked( - xfs_mount_t *mp, - xfs_sb_field_t field, - int min_per_cpu) -{ - uint64_t count, resid; - int weight = num_online_cpus(); - uint64_t min = (uint64_t)min_per_cpu; - - /* disable counter and sync counter */ - xfs_icsb_disable_counter(mp, field); - - /* update counters - first CPU gets residual*/ - switch (field) { - case XFS_SBS_ICOUNT: - count = mp->m_sb.sb_icount; - resid = do_div(count, weight); - if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) - return; - break; - case XFS_SBS_IFREE: - count = mp->m_sb.sb_ifree; - resid = do_div(count, weight); - if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) - return; - break; - case XFS_SBS_FDBLOCKS: - count = mp->m_sb.sb_fdblocks; - resid = do_div(count, weight); - if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp))) - return; - break; - default: - BUG(); - count = resid = 0; /* quiet, gcc */ - break; - } - - xfs_icsb_enable_counter(mp, field, count, resid); -} - -STATIC void -xfs_icsb_balance_counter( - xfs_mount_t *mp, - xfs_sb_field_t fields, - int min_per_cpu) -{ - spin_lock(&mp->m_sb_lock); - xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu); - spin_unlock(&mp->m_sb_lock); -} - -int -xfs_icsb_modify_counters( - xfs_mount_t *mp, - xfs_sb_field_t field, - int64_t delta, - int rsvd) -{ - xfs_icsb_cnts_t *icsbp; - long long lcounter; /* long counter for 64 bit fields */ - int ret = 0; - - might_sleep(); -again: - preempt_disable(); - icsbp = this_cpu_ptr(mp->m_sb_cnts); - - /* - * if the counter is disabled, go to slow path - */ - if (unlikely(xfs_icsb_counter_disabled(mp, field))) - goto slow_path; - xfs_icsb_lock_cntr(icsbp); - if (unlikely(xfs_icsb_counter_disabled(mp, field))) { - xfs_icsb_unlock_cntr(icsbp); - goto slow_path; - } - - switch (field) { - case XFS_SBS_ICOUNT: - lcounter = icsbp->icsb_icount; - lcounter += delta; - if (unlikely(lcounter < 0)) - goto balance_counter; - icsbp->icsb_icount = lcounter; - break; - - case XFS_SBS_IFREE: - lcounter = icsbp->icsb_ifree; - lcounter += delta; - if (unlikely(lcounter < 0)) - goto balance_counter; - icsbp->icsb_ifree = lcounter; - break; - - case XFS_SBS_FDBLOCKS: - BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0); - - lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); - lcounter += delta; - if (unlikely(lcounter < 0)) - goto balance_counter; - icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); - break; - default: - BUG(); - break; - } - xfs_icsb_unlock_cntr(icsbp); - preempt_enable(); - return 0; - -slow_path: - preempt_enable(); - - /* - * serialise with a mutex so we don't burn lots of cpu on - * the superblock lock. We still need to hold the superblock - * lock, however, when we modify the global structures. - */ - xfs_icsb_lock(mp); - - /* - * Now running atomically. - * - * If the counter is enabled, someone has beaten us to rebalancing. - * Drop the lock and try again in the fast path.... - */ - if (!(xfs_icsb_counter_disabled(mp, field))) { - xfs_icsb_unlock(mp); - goto again; - } - - /* - * The counter is currently disabled. Because we are - * running atomically here, we know a rebalance cannot - * be in progress. Hence we can go straight to operating - * on the global superblock. We do not call xfs_mod_incore_sb() - * here even though we need to get the m_sb_lock. Doing so - * will cause us to re-enter this function and deadlock. - * Hence we get the m_sb_lock ourselves and then call - * xfs_mod_incore_sb_unlocked() as the unlocked path operates - * directly on the global counters. - */ - spin_lock(&mp->m_sb_lock); - ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); - spin_unlock(&mp->m_sb_lock); - - /* - * Now that we've modified the global superblock, we - * may be able to re-enable the distributed counters - * (e.g. lots of space just got freed). After that - * we are done. - */ - if (ret != -ENOSPC) - xfs_icsb_balance_counter(mp, field, 0); - xfs_icsb_unlock(mp); - return ret; - -balance_counter: - xfs_icsb_unlock_cntr(icsbp); - preempt_enable(); - - /* - * We may have multiple threads here if multiple per-cpu - * counters run dry at the same time. This will mean we can - * do more balances than strictly necessary but it is not - * the common slowpath case. - */ - xfs_icsb_lock(mp); - - /* - * running atomically. - * - * This will leave the counter in the correct state for future - * accesses. After the rebalance, we simply try again and our retry - * will either succeed through the fast path or slow path without - * another balance operation being required. - */ - xfs_icsb_balance_counter(mp, field, delta); - xfs_icsb_unlock(mp); - goto again; -} - -#endif diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 0d8abd6364d9..8c995a2ccb6f 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -18,8 +18,6 @@ #ifndef __XFS_MOUNT_H__ #define __XFS_MOUNT_H__ -#ifdef __KERNEL__ - struct xlog; struct xfs_inode; struct xfs_mru_cache; @@ -29,44 +27,6 @@ struct xfs_quotainfo; struct xfs_dir_ops; struct xfs_da_geometry; -#ifdef HAVE_PERCPU_SB - -/* - * Valid per-cpu incore superblock counters. Note that if you add new counters, - * you may need to define new counter disabled bit field descriptors as there - * are more possible fields in the superblock that can fit in a bitfield on a - * 32 bit platform. The XFS_SBS_* values for the current current counters just - * fit. - */ -typedef struct xfs_icsb_cnts { - uint64_t icsb_fdblocks; - uint64_t icsb_ifree; - uint64_t icsb_icount; - unsigned long icsb_flags; -} xfs_icsb_cnts_t; - -#define XFS_ICSB_FLAG_LOCK (1 << 0) /* counter lock bit */ - -#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */ - -extern int xfs_icsb_init_counters(struct xfs_mount *); -extern void xfs_icsb_reinit_counters(struct xfs_mount *); -extern void xfs_icsb_destroy_counters(struct xfs_mount *); -extern void xfs_icsb_sync_counters(struct xfs_mount *, int); -extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); -extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t, - int64_t, int); - -#else -#define xfs_icsb_init_counters(mp) (0) -#define xfs_icsb_destroy_counters(mp) do { } while (0) -#define xfs_icsb_reinit_counters(mp) do { } while (0) -#define xfs_icsb_sync_counters(mp, flags) do { } while (0) -#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0) -#define xfs_icsb_modify_counters(mp, field, delta, rsvd) \ - xfs_mod_incore_sb(mp, field, delta, rsvd) -#endif - /* dynamic preallocation free space thresholds, 5% down to 1% */ enum { XFS_LOWSP_1_PCNT = 0, @@ -81,8 +41,13 @@ typedef struct xfs_mount { struct super_block *m_super; xfs_tid_t m_tid; /* next unused tid for fs */ struct xfs_ail *m_ail; /* fs active log item list */ - xfs_sb_t m_sb; /* copy of fs superblock */ + + struct xfs_sb m_sb; /* copy of fs superblock */ spinlock_t m_sb_lock; /* sb counter lock */ + struct percpu_counter m_icount; /* allocated inodes counter */ + struct percpu_counter m_ifree; /* free inodes counter */ + struct percpu_counter m_fdblocks; /* free block counter */ + struct xfs_buf *m_sb_bp; /* buffer for superblock */ char *m_fsname; /* filesystem name */ int m_fsname_len; /* strlen of fs name */ @@ -152,12 +117,6 @@ typedef struct xfs_mount { const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */ uint m_chsize; /* size of next field */ atomic_t m_active_trans; /* number trans frozen */ -#ifdef HAVE_PERCPU_SB - xfs_icsb_cnts_t __percpu *m_sb_cnts; /* per-cpu superblock counters */ - unsigned long m_icsb_counters; /* disabled per-cpu counters */ - struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */ - struct mutex m_icsb_mutex; /* balancer sync lock */ -#endif struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ struct delayed_work m_reclaim_work; /* background inode reclaim */ struct delayed_work m_eofblocks_work; /* background eof blocks @@ -301,35 +260,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) } /* - * Per-cpu superblock locking functions - */ -#ifdef HAVE_PERCPU_SB -static inline void -xfs_icsb_lock(xfs_mount_t *mp) -{ - mutex_lock(&mp->m_icsb_mutex); -} - -static inline void -xfs_icsb_unlock(xfs_mount_t *mp) -{ - mutex_unlock(&mp->m_icsb_mutex); -} -#else -#define xfs_icsb_lock(mp) -#define xfs_icsb_unlock(mp) -#endif - -/* - * This structure is for use by the xfs_mod_incore_sb_batch() routine. - * xfs_growfs can specify a few fields which are more than int limit - */ -typedef struct xfs_mod_sb { - xfs_sb_field_t msb_field; /* Field to modify, see below */ - int64_t msb_delta; /* Change to make to specified field */ -} xfs_mod_sb_t; - -/* * Per-ag incore structure, copies of information in agf and agi, to improve the * performance of allocation group selection. */ @@ -383,11 +313,14 @@ extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); extern int xfs_mountfs(xfs_mount_t *mp); extern int xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount, xfs_agnumber_t *maxagi); - extern void xfs_unmountfs(xfs_mount_t *); -extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); -extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, - uint, int); + +extern int xfs_mod_icount(struct xfs_mount *mp, int64_t delta); +extern int xfs_mod_ifree(struct xfs_mount *mp, int64_t delta); +extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta, + bool reserved); +extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta); + extern int xfs_mount_log_sb(xfs_mount_t *); extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); extern int xfs_readsb(xfs_mount_t *, int); @@ -399,6 +332,4 @@ extern int xfs_dev_is_read_only(struct xfs_mount *, char *); extern void xfs_set_low_space_thresholds(struct xfs_mount *); -#endif /* __KERNEL__ */ - #endif /* __XFS_MOUNT_H__ */ diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 30ecca3037e3..f8a674d7f092 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -437,7 +437,7 @@ xfs_mru_cache_insert( if (!mru || !mru->lists) return -EINVAL; - if (radix_tree_preload(GFP_KERNEL)) + if (radix_tree_preload(GFP_NOFS)) return -ENOMEM; INIT_LIST_HEAD(&elem->list_node); diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 4b33ef112400..365dd57ea760 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -300,8 +300,10 @@ xfs_fs_commit_blocks( tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); - if (error) + if (error) { + xfs_trans_cancel(tp, 0); goto out_drop_iolock; + } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index c6b22e1e77ed..5538468c7f63 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -841,6 +841,11 @@ xfs_qm_reset_dqcounts( */ xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, "xfs_quotacheck"); + /* + * Reset type in case we are reusing group quota file for + * project quotas or vice versa + */ + ddq->d_flags = type; ddq->d_bcount = 0; ddq->d_icount = 0; ddq->d_rtbcount = 0; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 58453e3255f8..8782b36517b5 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -966,6 +966,8 @@ xfs_fs_inode_init_once( atomic_set(&ip->i_pincount, 0); spin_lock_init(&ip->i_flags_lock); + mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, + "xfsino", ip->i_ino); mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, "xfsino", ip->i_ino); } @@ -1013,24 +1015,6 @@ xfs_free_fsname( kfree(mp->m_logname); } -STATIC void -xfs_fs_put_super( - struct super_block *sb) -{ - struct xfs_mount *mp = XFS_M(sb); - - xfs_notice(mp, "Unmounting Filesystem"); - xfs_filestream_unmount(mp); - xfs_unmountfs(mp); - - xfs_freesb(mp); - xfs_icsb_destroy_counters(mp); - xfs_destroy_mount_workqueues(mp); - xfs_close_devices(mp); - xfs_free_fsname(mp); - kfree(mp); -} - STATIC int xfs_fs_sync_fs( struct super_block *sb, @@ -1066,6 +1050,9 @@ xfs_fs_statfs( xfs_sb_t *sbp = &mp->m_sb; struct xfs_inode *ip = XFS_I(dentry->d_inode); __uint64_t fakeinos, id; + __uint64_t icount; + __uint64_t ifree; + __uint64_t fdblocks; xfs_extlen_t lsize; __int64_t ffree; @@ -1076,17 +1063,21 @@ xfs_fs_statfs( statp->f_fsid.val[0] = (u32)id; statp->f_fsid.val[1] = (u32)(id >> 32); - xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); + icount = percpu_counter_sum(&mp->m_icount); + ifree = percpu_counter_sum(&mp->m_ifree); + fdblocks = percpu_counter_sum(&mp->m_fdblocks); spin_lock(&mp->m_sb_lock); statp->f_bsize = sbp->sb_blocksize; lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; statp->f_blocks = sbp->sb_dblocks - lsize; - statp->f_bfree = statp->f_bavail = - sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); + spin_unlock(&mp->m_sb_lock); + + statp->f_bfree = fdblocks - XFS_ALLOC_SET_ASIDE(mp); + statp->f_bavail = statp->f_bfree; + fakeinos = statp->f_bfree << sbp->sb_inopblog; - statp->f_files = - MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER); + statp->f_files = MIN(icount + fakeinos, (__uint64_t)XFS_MAXINUMBER); if (mp->m_maxicount) statp->f_files = min_t(typeof(statp->f_files), statp->f_files, @@ -1098,10 +1089,9 @@ xfs_fs_statfs( sbp->sb_icount); /* make sure statp->f_ffree does not underflow */ - ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); + ffree = statp->f_files - (icount - ifree); statp->f_ffree = max_t(__int64_t, ffree, 0); - spin_unlock(&mp->m_sb_lock); if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == @@ -1382,6 +1372,51 @@ xfs_finish_flags( return 0; } +static int +xfs_init_percpu_counters( + struct xfs_mount *mp) +{ + int error; + + error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL); + if (error) + return -ENOMEM; + + error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL); + if (error) + goto free_icount; + + error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL); + if (error) + goto free_ifree; + + return 0; + +free_ifree: + percpu_counter_destroy(&mp->m_ifree); +free_icount: + percpu_counter_destroy(&mp->m_icount); + return -ENOMEM; +} + +void +xfs_reinit_percpu_counters( + struct xfs_mount *mp) +{ + percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount); + percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree); + percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks); +} + +static void +xfs_destroy_percpu_counters( + struct xfs_mount *mp) +{ + percpu_counter_destroy(&mp->m_icount); + percpu_counter_destroy(&mp->m_ifree); + percpu_counter_destroy(&mp->m_fdblocks); +} + STATIC int xfs_fs_fill_super( struct super_block *sb, @@ -1430,7 +1465,7 @@ xfs_fs_fill_super( if (error) goto out_close_devices; - error = xfs_icsb_init_counters(mp); + error = xfs_init_percpu_counters(mp); if (error) goto out_destroy_workqueues; @@ -1488,7 +1523,7 @@ xfs_fs_fill_super( out_free_sb: xfs_freesb(mp); out_destroy_counters: - xfs_icsb_destroy_counters(mp); + xfs_destroy_percpu_counters(mp); out_destroy_workqueues: xfs_destroy_mount_workqueues(mp); out_close_devices: @@ -1505,6 +1540,24 @@ out_destroy_workqueues: goto out_free_sb; } +STATIC void +xfs_fs_put_super( + struct super_block *sb) +{ + struct xfs_mount *mp = XFS_M(sb); + + xfs_notice(mp, "Unmounting Filesystem"); + xfs_filestream_unmount(mp); + xfs_unmountfs(mp); + + xfs_freesb(mp); + xfs_destroy_percpu_counters(mp); + xfs_destroy_mount_workqueues(mp); + xfs_close_devices(mp); + xfs_free_fsname(mp); + kfree(mp); +} + STATIC struct dentry * xfs_fs_mount( struct file_system_type *fs_type, diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index 2b830c2f322e..499058fea303 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h @@ -72,6 +72,8 @@ extern const struct export_operations xfs_export_operations; extern const struct xattr_handler *xfs_xattr_handlers[]; extern const struct quotactl_ops xfs_quotactl_operations; +extern void xfs_reinit_percpu_counters(struct xfs_mount *mp); + #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) #endif /* __XFS_SUPER_H__ */ diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 7e45fa155ea8..b2a45cc9eceb 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -115,7 +115,7 @@ DECLARE_EVENT_CLASS(xfs_perag_class, __entry->refcount = refcount; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d agno %u refcount %d caller %pf", + TP_printk("dev %d:%d agno %u refcount %d caller %ps", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->refcount, @@ -239,7 +239,7 @@ TRACE_EVENT(xfs_iext_insert, __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " - "offset %lld block %lld count %lld flag %d caller %pf", + "offset %lld block %lld count %lld flag %d caller %ps", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), @@ -283,7 +283,7 @@ DECLARE_EVENT_CLASS(xfs_bmap_class, __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " - "offset %lld block %lld count %lld flag %d caller %pf", + "offset %lld block %lld count %lld flag %d caller %ps", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), @@ -329,7 +329,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class, __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d " - "lock %d flags %s caller %pf", + "lock %d flags %s caller %ps", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->bno, __entry->nblks, @@ -402,7 +402,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class, __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " - "lock %d flags %s caller %pf", + "lock %d flags %s caller %ps", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->bno, __entry->buffer_length, @@ -447,7 +447,7 @@ TRACE_EVENT(xfs_buf_ioerror, __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " - "lock %d error %d flags %s caller %pf", + "lock %d error %d flags %s caller %ps", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->bno, __entry->buffer_length, @@ -613,7 +613,7 @@ DECLARE_EVENT_CLASS(xfs_lock_class, __entry->lock_flags = lock_flags; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf", + TP_printk("dev %d:%d ino 0x%llx flags %s caller %ps", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), @@ -686,6 +686,9 @@ DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag); DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag); DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid); +DEFINE_INODE_EVENT(xfs_filemap_fault); +DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite); + DECLARE_EVENT_CLASS(xfs_iref_class, TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), TP_ARGS(ip, caller_ip), @@ -703,7 +706,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class, __entry->pincount = atomic_read(&ip->i_pincount); __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf", + TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %ps", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->count, @@ -1334,7 +1337,7 @@ TRACE_EVENT(xfs_bunmap, __entry->flags = flags; ), TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx" - "flags %s caller %pf", + "flags %s caller %ps", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->size, @@ -1467,7 +1470,7 @@ TRACE_EVENT(xfs_agf, ), TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u " "levels b %u c %u flfirst %u fllast %u flcount %u " - "freeblks %u longest %u caller %pf", + "freeblks %u longest %u caller %ps", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __print_flags(__entry->flags, "|", XFS_AGF_FLAGS), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index eb90cd59a0ec..220ef2c906b2 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -173,7 +173,7 @@ xfs_trans_reserve( uint rtextents) { int error = 0; - int rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; + bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; /* Mark this thread as being in a transaction */ current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); @@ -184,8 +184,7 @@ xfs_trans_reserve( * fail if the count would go below zero. */ if (blocks > 0) { - error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, - -((int64_t)blocks), rsvd); + error = xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd); if (error != 0) { current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); return -ENOSPC; @@ -236,8 +235,7 @@ xfs_trans_reserve( * fail if the count would go below zero. */ if (rtextents > 0) { - error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS, - -((int64_t)rtextents), rsvd); + error = xfs_mod_frextents(tp->t_mountp, -((int64_t)rtextents)); if (error) { error = -ENOSPC; goto undo_log; @@ -268,8 +266,7 @@ undo_log: undo_blocks: if (blocks > 0) { - xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, - (int64_t)blocks, rsvd); + xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd); tp->t_blk_res = 0; } @@ -488,6 +485,54 @@ xfs_trans_apply_sb_deltas( sizeof(sbp->sb_frextents) - 1); } +STATIC int +xfs_sb_mod8( + uint8_t *field, + int8_t delta) +{ + int8_t counter = *field; + + counter += delta; + if (counter < 0) { + ASSERT(0); + return -EINVAL; + } + *field = counter; + return 0; +} + +STATIC int +xfs_sb_mod32( + uint32_t *field, + int32_t delta) +{ + int32_t counter = *field; + + counter += delta; + if (counter < 0) { + ASSERT(0); + return -EINVAL; + } + *field = counter; + return 0; +} + +STATIC int +xfs_sb_mod64( + uint64_t *field, + int64_t delta) +{ + int64_t counter = *field; + + counter += delta; + if (counter < 0) { + ASSERT(0); + return -EINVAL; + } + *field = counter; + return 0; +} + /* * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations * and apply superblock counter changes to the in-core superblock. The @@ -495,13 +540,6 @@ xfs_trans_apply_sb_deltas( * applied to the in-core superblock. The idea is that that has already been * done. * - * This is done efficiently with a single call to xfs_mod_incore_sb_batch(). - * However, we have to ensure that we only modify each superblock field only - * once because the application of the delta values may not be atomic. That can - * lead to ENOSPC races occurring if we have two separate modifcations of the - * free space counter to put back the entire reservation and then take away - * what we used. - * * If we are not logging superblock counters, then the inode allocated/free and * used block counts are not updated in the on disk superblock. In this case, * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we @@ -509,21 +547,15 @@ xfs_trans_apply_sb_deltas( */ void xfs_trans_unreserve_and_mod_sb( - xfs_trans_t *tp) + struct xfs_trans *tp) { - xfs_mod_sb_t msb[9]; /* If you add cases, add entries */ - xfs_mod_sb_t *msbp; - xfs_mount_t *mp = tp->t_mountp; - /* REFERENCED */ - int error; - int rsvd; - int64_t blkdelta = 0; - int64_t rtxdelta = 0; - int64_t idelta = 0; - int64_t ifreedelta = 0; - - msbp = msb; - rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; + struct xfs_mount *mp = tp->t_mountp; + bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; + int64_t blkdelta = 0; + int64_t rtxdelta = 0; + int64_t idelta = 0; + int64_t ifreedelta = 0; + int error; /* calculate deltas */ if (tp->t_blk_res > 0) @@ -547,97 +579,115 @@ xfs_trans_unreserve_and_mod_sb( /* apply the per-cpu counters */ if (blkdelta) { - error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, - blkdelta, rsvd); + error = xfs_mod_fdblocks(mp, blkdelta, rsvd); if (error) goto out; } if (idelta) { - error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, - idelta, rsvd); + error = xfs_mod_icount(mp, idelta); if (error) goto out_undo_fdblocks; } if (ifreedelta) { - error = xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, - ifreedelta, rsvd); + error = xfs_mod_ifree(mp, ifreedelta); if (error) goto out_undo_icount; } + if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY)) + return; + /* apply remaining deltas */ - if (rtxdelta != 0) { - msbp->msb_field = XFS_SBS_FREXTENTS; - msbp->msb_delta = rtxdelta; - msbp++; + spin_lock(&mp->m_sb_lock); + if (rtxdelta) { + error = xfs_sb_mod64(&mp->m_sb.sb_frextents, rtxdelta); + if (error) + goto out_undo_ifree; } - if (tp->t_flags & XFS_TRANS_SB_DIRTY) { - if (tp->t_dblocks_delta != 0) { - msbp->msb_field = XFS_SBS_DBLOCKS; - msbp->msb_delta = tp->t_dblocks_delta; - msbp++; - } - if (tp->t_agcount_delta != 0) { - msbp->msb_field = XFS_SBS_AGCOUNT; - msbp->msb_delta = tp->t_agcount_delta; - msbp++; - } - if (tp->t_imaxpct_delta != 0) { - msbp->msb_field = XFS_SBS_IMAX_PCT; - msbp->msb_delta = tp->t_imaxpct_delta; - msbp++; - } - if (tp->t_rextsize_delta != 0) { - msbp->msb_field = XFS_SBS_REXTSIZE; - msbp->msb_delta = tp->t_rextsize_delta; - msbp++; - } - if (tp->t_rbmblocks_delta != 0) { - msbp->msb_field = XFS_SBS_RBMBLOCKS; - msbp->msb_delta = tp->t_rbmblocks_delta; - msbp++; - } - if (tp->t_rblocks_delta != 0) { - msbp->msb_field = XFS_SBS_RBLOCKS; - msbp->msb_delta = tp->t_rblocks_delta; - msbp++; - } - if (tp->t_rextents_delta != 0) { - msbp->msb_field = XFS_SBS_REXTENTS; - msbp->msb_delta = tp->t_rextents_delta; - msbp++; - } - if (tp->t_rextslog_delta != 0) { - msbp->msb_field = XFS_SBS_REXTSLOG; - msbp->msb_delta = tp->t_rextslog_delta; - msbp++; - } + if (tp->t_dblocks_delta != 0) { + error = xfs_sb_mod64(&mp->m_sb.sb_dblocks, tp->t_dblocks_delta); + if (error) + goto out_undo_frextents; } - - /* - * If we need to change anything, do it. - */ - if (msbp > msb) { - error = xfs_mod_incore_sb_batch(tp->t_mountp, msb, - (uint)(msbp - msb), rsvd); + if (tp->t_agcount_delta != 0) { + error = xfs_sb_mod32(&mp->m_sb.sb_agcount, tp->t_agcount_delta); if (error) - goto out_undo_ifreecount; + goto out_undo_dblocks; } - + if (tp->t_imaxpct_delta != 0) { + error = xfs_sb_mod8(&mp->m_sb.sb_imax_pct, tp->t_imaxpct_delta); + if (error) + goto out_undo_agcount; + } + if (tp->t_rextsize_delta != 0) { + error = xfs_sb_mod32(&mp->m_sb.sb_rextsize, + tp->t_rextsize_delta); + if (error) + goto out_undo_imaxpct; + } + if (tp->t_rbmblocks_delta != 0) { + error = xfs_sb_mod32(&mp->m_sb.sb_rbmblocks, + tp->t_rbmblocks_delta); + if (error) + goto out_undo_rextsize; + } + if (tp->t_rblocks_delta != 0) { + error = xfs_sb_mod64(&mp->m_sb.sb_rblocks, tp->t_rblocks_delta); + if (error) + goto out_undo_rbmblocks; + } + if (tp->t_rextents_delta != 0) { + error = xfs_sb_mod64(&mp->m_sb.sb_rextents, + tp->t_rextents_delta); + if (error) + goto out_undo_rblocks; + } + if (tp->t_rextslog_delta != 0) { + error = xfs_sb_mod8(&mp->m_sb.sb_rextslog, + tp->t_rextslog_delta); + if (error) + goto out_undo_rextents; + } + spin_unlock(&mp->m_sb_lock); return; -out_undo_ifreecount: +out_undo_rextents: + if (tp->t_rextents_delta) + xfs_sb_mod64(&mp->m_sb.sb_rextents, -tp->t_rextents_delta); +out_undo_rblocks: + if (tp->t_rblocks_delta) + xfs_sb_mod64(&mp->m_sb.sb_rblocks, -tp->t_rblocks_delta); +out_undo_rbmblocks: + if (tp->t_rbmblocks_delta) + xfs_sb_mod32(&mp->m_sb.sb_rbmblocks, -tp->t_rbmblocks_delta); +out_undo_rextsize: + if (tp->t_rextsize_delta) + xfs_sb_mod32(&mp->m_sb.sb_rextsize, -tp->t_rextsize_delta); +out_undo_imaxpct: + if (tp->t_rextsize_delta) + xfs_sb_mod8(&mp->m_sb.sb_imax_pct, -tp->t_imaxpct_delta); +out_undo_agcount: + if (tp->t_agcount_delta) + xfs_sb_mod32(&mp->m_sb.sb_agcount, -tp->t_agcount_delta); +out_undo_dblocks: + if (tp->t_dblocks_delta) + xfs_sb_mod64(&mp->m_sb.sb_dblocks, -tp->t_dblocks_delta); +out_undo_frextents: + if (rtxdelta) + xfs_sb_mod64(&mp->m_sb.sb_frextents, -rtxdelta); +out_undo_ifree: + spin_unlock(&mp->m_sb_lock); if (ifreedelta) - xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd); + xfs_mod_ifree(mp, -ifreedelta); out_undo_icount: if (idelta) - xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd); + xfs_mod_icount(mp, -idelta); out_undo_fdblocks: if (blkdelta) - xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd); + xfs_mod_fdblocks(mp, -blkdelta, rsvd); out: ASSERT(error == 0); return; |