diff options
| author | Sage Weil <sage@inktank.com> | 2013-08-15 11:11:45 -0700 | 
|---|---|---|
| committer | Sage Weil <sage@inktank.com> | 2013-08-15 11:11:45 -0700 | 
| commit | ee3e542fec6e69bc9fb668698889a37d93950ddf (patch) | |
| tree | e74ee766a4764769ef1d3d45d266b4dea64101d3 /fs/xfs | |
| parent | fe2a801b50c0bb8039d627e5ae1fec249d10ff39 (diff) | |
| parent | f1d6e17f540af37bb1891480143669ba7636c4cf (diff) | |
Merge remote-tracking branch 'linus/master' into testing
Diffstat (limited to 'fs/xfs')
57 files changed, 1689 insertions, 854 deletions
| diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 6313b69b6644..4a4508023a3c 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -71,6 +71,7 @@ xfs-y				+= xfs_alloc.o \  				   xfs_dir2_sf.o \  				   xfs_ialloc.o \  				   xfs_ialloc_btree.o \ +				   xfs_icreate_item.o \  				   xfs_inode.o \  				   xfs_log_recover.o \  				   xfs_mount.o \ diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 5673bcfda2f0..71596e57283a 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -175,6 +175,7 @@ xfs_alloc_compute_diff(  	xfs_agblock_t	wantbno,	/* target starting block */  	xfs_extlen_t	wantlen,	/* target length */  	xfs_extlen_t	alignment,	/* target alignment */ +	char		userdata,	/* are we allocating data? */  	xfs_agblock_t	freebno,	/* freespace's starting block */  	xfs_extlen_t	freelen,	/* freespace's length */  	xfs_agblock_t	*newbnop)	/* result: best start block from free */ @@ -189,7 +190,14 @@ xfs_alloc_compute_diff(  	ASSERT(freelen >= wantlen);  	freeend = freebno + freelen;  	wantend = wantbno + wantlen; -	if (freebno >= wantbno) { +	/* +	 * We want to allocate from the start of a free extent if it is past +	 * the desired block or if we are allocating user data and the free +	 * extent is before desired block. The second case is there to allow +	 * for contiguous allocation from the remaining free space if the file +	 * grows in the short term. +	 */ +	if (freebno >= wantbno || (userdata && freeend < wantend)) {  		if ((newbno1 = roundup(freebno, alignment)) >= freeend)  			newbno1 = NULLAGBLOCK;  	} else if (freeend >= wantend && alignment > 1) { @@ -805,7 +813,8 @@ xfs_alloc_find_best_extent(  			xfs_alloc_fix_len(args);  			sdiff = xfs_alloc_compute_diff(args->agbno, args->len, -						       args->alignment, *sbnoa, +						       args->alignment, +						       args->userdata, *sbnoa,  						       *slena, &new);  			/* @@ -976,7 +985,8 @@ restart:  			if (args->len < blen)  				continue;  			ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, -				args->alignment, ltbnoa, ltlena, <new); +				args->alignment, args->userdata, ltbnoa, +				ltlena, <new);  			if (ltnew != NULLAGBLOCK &&  			    (args->len > blen || ltdiff < bdiff)) {  				bdiff = ltdiff; @@ -1128,7 +1138,8 @@ restart:  			args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);  			xfs_alloc_fix_len(args);  			ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, -				args->alignment, ltbnoa, ltlena, <new); +				args->alignment, args->userdata, ltbnoa, +				ltlena, <new);  			error = xfs_alloc_find_best_extent(args,  						&bno_cur_lt, &bno_cur_gt, @@ -1144,7 +1155,8 @@ restart:  			args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);  			xfs_alloc_fix_len(args);  			gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, -				args->alignment, gtbnoa, gtlena, >new); +				args->alignment, args->userdata, gtbnoa, +				gtlena, >new);  			error = xfs_alloc_find_best_extent(args,  						&bno_cur_gt, &bno_cur_lt, @@ -1203,7 +1215,7 @@ restart:  	}  	rlen = args->len;  	(void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, -				     ltbnoa, ltlena, <new); +				     args->userdata, ltbnoa, ltlena, <new);  	ASSERT(ltnew >= ltbno);  	ASSERT(ltnew + rlen <= ltbnoa + ltlena);  	ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 41a695048be7..596ec71da00e 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -843,10 +843,12 @@ xfs_cluster_write(  STATIC void  xfs_vm_invalidatepage(  	struct page		*page, -	unsigned long		offset) +	unsigned int		offset, +	unsigned int		length)  { -	trace_xfs_invalidatepage(page->mapping->host, page, offset); -	block_invalidatepage(page, offset); +	trace_xfs_invalidatepage(page->mapping->host, page, offset, +				 length); +	block_invalidatepage(page, offset, length);  }  /* @@ -910,7 +912,7 @@ next_buffer:  	xfs_iunlock(ip, XFS_ILOCK_EXCL);  out_invalidate: -	xfs_vm_invalidatepage(page, 0); +	xfs_vm_invalidatepage(page, 0, PAGE_CACHE_SIZE);  	return;  } @@ -940,7 +942,7 @@ xfs_vm_writepage(  	int			count = 0;  	int			nonblocking = 0; -	trace_xfs_writepage(inode, page, 0); +	trace_xfs_writepage(inode, page, 0, 0);  	ASSERT(page_has_buffers(page)); @@ -1171,7 +1173,7 @@ xfs_vm_releasepage(  {  	int			delalloc, unwritten; -	trace_xfs_releasepage(page->mapping->host, page, 0); +	trace_xfs_releasepage(page->mapping->host, page, 0, 0);  	xfs_count_page_state(page, &delalloc, &unwritten); diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 31d3cd129269..b800fbcafc7f 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -690,6 +690,8 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)  	sf = (xfs_attr_shortform_t *)tmpbuffer;  	xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); +	xfs_bmap_local_to_extents_empty(dp, XFS_ATTR_FORK); +  	bp = NULL;  	error = xfs_da_grow_inode(args, &blkno);  	if (error) { diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 89042848f9ec..05c698ccb238 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -1161,6 +1161,24 @@ xfs_bmap_extents_to_btree(   * since the file data needs to get logged so things will stay consistent.   * (The bmap-level manipulations are ok, though).   */ +void +xfs_bmap_local_to_extents_empty( +	struct xfs_inode	*ip, +	int			whichfork) +{ +	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork); + +	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); +	ASSERT(ifp->if_bytes == 0); +	ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); + +	xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork); +	ifp->if_flags &= ~XFS_IFINLINE; +	ifp->if_flags |= XFS_IFEXTENTS; +	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); +} + +  STATIC int				/* error */  xfs_bmap_local_to_extents(  	xfs_trans_t	*tp,		/* transaction pointer */ @@ -1174,9 +1192,12 @@ xfs_bmap_local_to_extents(  				   struct xfs_inode *ip,  				   struct xfs_ifork *ifp))  { -	int		error;		/* error return value */ +	int		error = 0;  	int		flags;		/* logging flags returned */  	xfs_ifork_t	*ifp;		/* inode fork pointer */ +	xfs_alloc_arg_t	args;		/* allocation arguments */ +	xfs_buf_t	*bp;		/* buffer for extent block */ +	xfs_bmbt_rec_host_t *ep;	/* extent record pointer */  	/*  	 * We don't want to deal with the case of keeping inode data inline yet. @@ -1185,68 +1206,65 @@ xfs_bmap_local_to_extents(  	ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));  	ifp = XFS_IFORK_PTR(ip, whichfork);  	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); + +	if (!ifp->if_bytes) { +		xfs_bmap_local_to_extents_empty(ip, whichfork); +		flags = XFS_ILOG_CORE; +		goto done; +	} +  	flags = 0;  	error = 0; -	if (ifp->if_bytes) { -		xfs_alloc_arg_t	args;	/* allocation arguments */ -		xfs_buf_t	*bp;	/* buffer for extent block */ -		xfs_bmbt_rec_host_t *ep;/* extent record pointer */ - -		ASSERT((ifp->if_flags & -			(XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE); -		memset(&args, 0, sizeof(args)); -		args.tp = tp; -		args.mp = ip->i_mount; -		args.firstblock = *firstblock; -		/* -		 * Allocate a block.  We know we need only one, since the -		 * file currently fits in an inode. -		 */ -		if (*firstblock == NULLFSBLOCK) { -			args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); -			args.type = XFS_ALLOCTYPE_START_BNO; -		} else { -			args.fsbno = *firstblock; -			args.type = XFS_ALLOCTYPE_NEAR_BNO; -		} -		args.total = total; -		args.minlen = args.maxlen = args.prod = 1; -		error = xfs_alloc_vextent(&args); -		if (error) -			goto done; +	ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == +								XFS_IFINLINE); +	memset(&args, 0, sizeof(args)); +	args.tp = tp; +	args.mp = ip->i_mount; +	args.firstblock = *firstblock; +	/* +	 * Allocate a block.  We know we need only one, since the +	 * file currently fits in an inode. +	 */ +	if (*firstblock == NULLFSBLOCK) { +		args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); +		args.type = XFS_ALLOCTYPE_START_BNO; +	} else { +		args.fsbno = *firstblock; +		args.type = XFS_ALLOCTYPE_NEAR_BNO; +	} +	args.total = total; +	args.minlen = args.maxlen = args.prod = 1; +	error = xfs_alloc_vextent(&args); +	if (error) +		goto done; -		/* Can't fail, the space was reserved. */ -		ASSERT(args.fsbno != NULLFSBLOCK); -		ASSERT(args.len == 1); -		*firstblock = args.fsbno; -		bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); +	/* Can't fail, the space was reserved. */ +	ASSERT(args.fsbno != NULLFSBLOCK); +	ASSERT(args.len == 1); +	*firstblock = args.fsbno; +	bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); -		/* initialise the block and copy the data */ -		init_fn(tp, bp, ip, ifp); +	/* initialise the block and copy the data */ +	init_fn(tp, bp, ip, ifp); -		/* account for the change in fork size and log everything */ -		xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); -		xfs_bmap_forkoff_reset(args.mp, ip, whichfork); -		xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); -		xfs_iext_add(ifp, 0, 1); -		ep = xfs_iext_get_ext(ifp, 0); -		xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); -		trace_xfs_bmap_post_update(ip, 0, -				whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0, -				_THIS_IP_); -		XFS_IFORK_NEXT_SET(ip, whichfork, 1); -		ip->i_d.di_nblocks = 1; -		xfs_trans_mod_dquot_byino(tp, ip, -			XFS_TRANS_DQ_BCOUNT, 1L); -		flags |= xfs_ilog_fext(whichfork); -	} else { -		ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); -		xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork); -	} -	ifp->if_flags &= ~XFS_IFINLINE; -	ifp->if_flags |= XFS_IFEXTENTS; -	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); +	/* account for the change in fork size and log everything */ +	xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); +	xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); +	xfs_bmap_local_to_extents_empty(ip, whichfork);  	flags |= XFS_ILOG_CORE; + +	xfs_iext_add(ifp, 0, 1); +	ep = xfs_iext_get_ext(ifp, 0); +	xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); +	trace_xfs_bmap_post_update(ip, 0, +			whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0, +			_THIS_IP_); +	XFS_IFORK_NEXT_SET(ip, whichfork, 1); +	ip->i_d.di_nblocks = 1; +	xfs_trans_mod_dquot_byino(tp, ip, +		XFS_TRANS_DQ_BCOUNT, 1L); +	flags |= xfs_ilog_fext(whichfork); +  done:  	*logflagsp = flags;  	return error; @@ -1323,25 +1341,6 @@ xfs_bmap_add_attrfork_extents(  }  /* - * Block initialisation function for local to extent format conversion. - * - * This shouldn't actually be called by anyone, so make sure debug kernels cause - * a noticable failure. - */ -STATIC void -xfs_bmap_local_to_extents_init_fn( -	struct xfs_trans	*tp, -	struct xfs_buf		*bp, -	struct xfs_inode	*ip, -	struct xfs_ifork	*ifp) -{ -	ASSERT(0); -	bp->b_ops = &xfs_bmbt_buf_ops; -	memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); -	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF); -} - -/*   * Called from xfs_bmap_add_attrfork to handle local format files. Each   * different data fork content type needs a different callout to do the   * conversion. Some are basic and only require special block initialisation @@ -1381,9 +1380,9 @@ xfs_bmap_add_attrfork_local(  						 flags, XFS_DATA_FORK,  						 xfs_symlink_local_to_remote); -	return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags, -					 XFS_DATA_FORK, -					 xfs_bmap_local_to_extents_init_fn); +	/* should only be called for types that support local format data */ +	ASSERT(0); +	return EFSCORRUPTED;  }  /* @@ -4907,20 +4906,19 @@ xfs_bmapi_write(  	orig_mval = mval;  	orig_nmap = *nmap;  #endif +	whichfork = (flags & XFS_BMAPI_ATTRFORK) ? +		XFS_ATTR_FORK : XFS_DATA_FORK;  	ASSERT(*nmap >= 1);  	ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);  	ASSERT(!(flags & XFS_BMAPI_IGSTATE));  	ASSERT(tp != NULL);  	ASSERT(len > 0); - -	whichfork = (flags & XFS_BMAPI_ATTRFORK) ? -		XFS_ATTR_FORK : XFS_DATA_FORK; +	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);  	if (unlikely(XFS_TEST_ERROR(  	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && -	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && -	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL), +	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),  	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {  		XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);  		return XFS_ERROR(EFSCORRUPTED); @@ -4933,37 +4931,6 @@ xfs_bmapi_write(  	XFS_STATS_INC(xs_blk_mapw); -	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { -		/* -		 * XXX (dgc): This assumes we are only called for inodes that -		 * contain content neutral data in local format. Anything that -		 * contains caller-specific data in local format that needs -		 * transformation to move to a block format needs to do the -		 * conversion to extent format itself. -		 * -		 * Directory data forks and attribute forks handle this -		 * themselves, but with the addition of metadata verifiers every -		 * data fork in local format now contains caller specific data -		 * and as such conversion through this function is likely to be -		 * broken. -		 * -		 * The only likely user of this branch is for remote symlinks, -		 * but we cannot overwrite the data fork contents of the symlink -		 * (EEXIST occurs higher up the stack) and so it will never go -		 * from local format to extent format here. Hence I don't think -		 * this branch is ever executed intentionally and we should -		 * consider removing it and asserting that xfs_bmapi_write() -		 * cannot be called directly on local format forks. i.e. callers -		 * are completely responsible for local to extent format -		 * conversion, not xfs_bmapi_write(). -		 */ -		error = xfs_bmap_local_to_extents(tp, ip, firstblock, total, -					&bma.logflags, whichfork, -					xfs_bmap_local_to_extents_init_fn); -		if (error) -			goto error0; -	} -  	if (*firstblock == NULLFSBLOCK) {  		if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)  			bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1; diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 5f469c3516eb..1cf1292d29b7 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -172,6 +172,7 @@ void	xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,  #endif  int	xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); +void	xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);  void	xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len,  		struct xfs_bmap_free *flist, struct xfs_mount *mp);  void	xfs_bmap_cancel(struct xfs_bmap_free *flist); diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 70c43d9f72c1..1b726d626941 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -196,6 +196,8 @@ typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t;  #define XFS_BMDR_SPACE_CALC(nrecs) \  	(int)(sizeof(xfs_bmdr_block_t) + \  	       ((nrecs) * (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t)))) +#define XFS_BMAP_BMDR_SPACE(bb) \ +	(XFS_BMDR_SPACE_CALC(be16_to_cpu((bb)->bb_numrecs)))  /*   * Maximum number of bmap btree levels. diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 4ec431777048..bfc4e0c26fd3 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -140,6 +140,16 @@ xfs_buf_item_size(  	ASSERT(bip->bli_flags & XFS_BLI_LOGGED); +	if (bip->bli_flags & XFS_BLI_ORDERED) { +		/* +		 * The buffer has been logged just to order it. +		 * It is not being included in the transaction +		 * commit, so no vectors are used at all. +		 */ +		trace_xfs_buf_item_size_ordered(bip); +		return XFS_LOG_VEC_ORDERED; +	} +  	/*  	 * the vector count is based on the number of buffer vectors we have  	 * dirty bits in. This will only be greater than one when we have a @@ -212,6 +222,7 @@ xfs_buf_item_format_segment(  		goto out;  	} +  	/*  	 * Fill in an iovec for each set of contiguous chunks.  	 */ @@ -299,18 +310,36 @@ xfs_buf_item_format(  	/*  	 * If it is an inode buffer, transfer the in-memory state to the -	 * format flags and clear the in-memory state. We do not transfer +	 * format flags and clear the in-memory state. +	 * +	 * For buffer based inode allocation, we do not transfer  	 * this state if the inode buffer allocation has not yet been committed  	 * to the log as setting the XFS_BLI_INODE_BUF flag will prevent  	 * correct replay of the inode allocation. +	 * +	 * For icreate item based inode allocation, the buffers aren't written +	 * to the journal during allocation, and hence we should always tag the +	 * buffer as an inode buffer so that the correct unlinked list replay +	 * occurs during recovery.  	 */  	if (bip->bli_flags & XFS_BLI_INODE_BUF) { -		if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && +		if (xfs_sb_version_hascrc(&lip->li_mountp->m_sb) || +		    !((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&  		      xfs_log_item_in_current_chkpt(lip)))  			bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF;  		bip->bli_flags &= ~XFS_BLI_INODE_BUF;  	} +	if ((bip->bli_flags & (XFS_BLI_ORDERED|XFS_BLI_STALE)) == +							XFS_BLI_ORDERED) { +		/* +		 * The buffer has been logged just to order it.  It is not being +		 * included in the transaction commit, so don't format it. +		 */ +		trace_xfs_buf_item_format_ordered(bip); +		return; +	} +  	for (i = 0; i < bip->bli_format_count; i++) {  		vecp = xfs_buf_item_format_segment(bip, vecp, offset,  						&bip->bli_formats[i]); @@ -340,6 +369,7 @@ xfs_buf_item_pin(  	ASSERT(atomic_read(&bip->bli_refcount) > 0);  	ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || +	       (bip->bli_flags & XFS_BLI_ORDERED) ||  	       (bip->bli_flags & XFS_BLI_STALE));  	trace_xfs_buf_item_pin(bip); @@ -512,8 +542,9 @@ xfs_buf_item_unlock(  {  	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);  	struct xfs_buf		*bp = bip->bli_buf; -	int			aborted, clean, i; -	uint			hold; +	bool			clean; +	bool			aborted; +	int			flags;  	/* Clear the buffer's association with this transaction. */  	bp->b_transp = NULL; @@ -524,23 +555,21 @@ xfs_buf_item_unlock(  	 * (cancelled) buffers at unpin time, but we'll never go through the  	 * pin/unpin cycle if we abort inside commit.  	 */ -	aborted = (lip->li_flags & XFS_LI_ABORTED) != 0; - +	aborted = (lip->li_flags & XFS_LI_ABORTED) ? true : false;  	/* -	 * Before possibly freeing the buf item, determine if we should -	 * release the buffer at the end of this routine. +	 * Before possibly freeing the buf item, copy the per-transaction state +	 * so we can reference it safely later after clearing it from the +	 * buffer log item.  	 */ -	hold = bip->bli_flags & XFS_BLI_HOLD; - -	/* Clear the per transaction state. */ -	bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD); +	flags = bip->bli_flags; +	bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);  	/*  	 * If the buf item is marked stale, then don't do anything.  We'll  	 * unlock the buffer and free the buf item when the buffer is unpinned  	 * for the last time.  	 */ -	if (bip->bli_flags & XFS_BLI_STALE) { +	if (flags & XFS_BLI_STALE) {  		trace_xfs_buf_item_unlock_stale(bip);  		ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);  		if (!aborted) { @@ -557,13 +586,19 @@ xfs_buf_item_unlock(  	 * be the only reference to the buf item, so we free it anyway  	 * regardless of whether it is dirty or not. A dirty abort implies a  	 * shutdown, anyway. +	 * +	 * Ordered buffers are dirty but may have no recorded changes, so ensure +	 * we only release clean items here.  	 */ -	clean = 1; -	for (i = 0; i < bip->bli_format_count; i++) { -		if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map, -			     bip->bli_formats[i].blf_map_size)) { -			clean = 0; -			break; +	clean = (flags & XFS_BLI_DIRTY) ? false : true; +	if (clean) { +		int i; +		for (i = 0; i < bip->bli_format_count; i++) { +			if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map, +				     bip->bli_formats[i].blf_map_size)) { +				clean = false; +				break; +			}  		}  	}  	if (clean) @@ -576,7 +611,7 @@ xfs_buf_item_unlock(  	} else  		atomic_dec(&bip->bli_refcount); -	if (!hold) +	if (!(flags & XFS_BLI_HOLD))  		xfs_buf_relse(bp);  } @@ -842,12 +877,6 @@ xfs_buf_item_log(  	struct xfs_buf		*bp = bip->bli_buf;  	/* -	 * Mark the item as having some dirty data for -	 * quick reference in xfs_buf_item_dirty. -	 */ -	bip->bli_flags |= XFS_BLI_DIRTY; - -	/*  	 * walk each buffer segment and mark them dirty appropriately.  	 */  	start = 0; @@ -873,7 +902,7 @@ xfs_buf_item_log(  /* - * Return 1 if the buffer has some data that has been logged (at any + * Return 1 if the buffer has been logged or ordered in a transaction (at any   * point, not just the current transaction) and 0 if not.   */  uint @@ -907,11 +936,11 @@ void  xfs_buf_item_relse(  	xfs_buf_t	*bp)  { -	xfs_buf_log_item_t	*bip; +	xfs_buf_log_item_t	*bip = bp->b_fspriv;  	trace_xfs_buf_item_relse(bp, _RET_IP_); +	ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); -	bip = bp->b_fspriv;  	bp->b_fspriv = bip->bli_item.li_bio_list;  	if (bp->b_fspriv == NULL)  		bp->b_iodone = NULL; diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 2573d2a75fc8..0f1c247dc680 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -120,6 +120,7 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf)  #define	XFS_BLI_INODE_ALLOC_BUF	0x10  #define XFS_BLI_STALE_INODE	0x20  #define	XFS_BLI_INODE_BUF	0x40 +#define	XFS_BLI_ORDERED		0x80  #define XFS_BLI_FLAGS \  	{ XFS_BLI_HOLD,		"HOLD" }, \ @@ -128,7 +129,8 @@ xfs_blft_from_flags(struct xfs_buf_log_format *blf)  	{ XFS_BLI_LOGGED,	"LOGGED" }, \  	{ XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \  	{ XFS_BLI_STALE_INODE,	"STALE_INODE" }, \ -	{ XFS_BLI_INODE_BUF,	"INODE_BUF" } +	{ XFS_BLI_INODE_BUF,	"INODE_BUF" }, \ +	{ XFS_BLI_ORDERED,	"ORDERED" }  #ifdef __KERNEL__ diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index c407e1ccff43..e36445ceaf80 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -24,6 +24,9 @@  #include "xfs_ag.h"  #include "xfs_mount.h"  #include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h"  #include "xfs_dinode.h"  #include "xfs_inode.h"  #include "xfs_inode_item.h" @@ -182,7 +185,7 @@ xfs_swap_extents_check_format(  	 */  	if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {  		if (XFS_IFORK_BOFF(ip) && -		    tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) +		    XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip))  			return EINVAL;  		if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=  		    XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)) @@ -192,9 +195,8 @@ xfs_swap_extents_check_format(  	/* Reciprocal target->temp btree format checks */  	if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {  		if (XFS_IFORK_BOFF(tip) && -		    ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) +		    XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip))  			return EINVAL; -  		if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=  		    XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))  			return EINVAL; diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index f7a0e95d197a..e5869b50dc41 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -39,6 +39,9 @@ typedef struct xfs_timestamp {   * There is a very similar struct icdinode in xfs_inode which matches the   * layout of the first 96 bytes of this structure, but is kept in native   * format instead of big endian. + * + * Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed + * padding field for v3 inodes.   */  typedef struct xfs_dinode {  	__be16		di_magic;	/* inode magic # = XFS_DINODE_MAGIC */ @@ -132,9 +135,6 @@ typedef enum xfs_dinode_fmt {  #define XFS_LITINO(mp, version) \  	((int)(((mp)->m_sb.sb_inodesize) - xfs_dinode_size(version))) -#define XFS_BROOT_SIZE_ADJ(ip) \ -	(XFS_BMBT_BLOCK_LEN((ip)->i_mount) - sizeof(xfs_bmdr_block_t)) -  /*   * Inode data & attribute fork sizes, per inode.   */ diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index b26a50f9921d..8f023dee404d 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -368,10 +368,8 @@ xfs_dir_removename(  int  xfs_readdir(  	xfs_inode_t	*dp, -	void		*dirent, -	size_t		bufsize, -	xfs_off_t	*offset, -	filldir_t	filldir) +	struct dir_context *ctx, +	size_t		bufsize)  {  	int		rval;		/* return value */  	int		v;		/* type-checking value */ @@ -385,14 +383,13 @@ xfs_readdir(  	XFS_STATS_INC(xs_dir_getdents);  	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) -		rval = xfs_dir2_sf_getdents(dp, dirent, offset, filldir); +		rval = xfs_dir2_sf_getdents(dp, ctx);  	else if ((rval = xfs_dir2_isblock(NULL, dp, &v)))  		;  	else if (v) -		rval = xfs_dir2_block_getdents(dp, dirent, offset, filldir); +		rval = xfs_dir2_block_getdents(dp, ctx);  	else -		rval = xfs_dir2_leaf_getdents(dp, dirent, bufsize, offset, -					      filldir); +		rval = xfs_dir2_leaf_getdents(dp, ctx, bufsize);  	return rval;  } diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index e59f5fc816fe..5e7fbd72cf52 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -29,6 +29,7 @@  #include "xfs_dinode.h"  #include "xfs_inode.h"  #include "xfs_inode_item.h" +#include "xfs_bmap.h"  #include "xfs_buf_item.h"  #include "xfs_dir2.h"  #include "xfs_dir2_format.h" @@ -569,9 +570,7 @@ xfs_dir2_block_addname(  int						/* error */  xfs_dir2_block_getdents(  	xfs_inode_t		*dp,		/* incore inode */ -	void			*dirent, -	xfs_off_t		*offset, -	filldir_t		filldir) +	struct dir_context	*ctx)  {  	xfs_dir2_data_hdr_t	*hdr;		/* block header */  	struct xfs_buf		*bp;		/* buffer for block */ @@ -589,7 +588,7 @@ xfs_dir2_block_getdents(  	/*  	 * If the block number in the offset is out of range, we're done.  	 */ -	if (xfs_dir2_dataptr_to_db(mp, *offset) > mp->m_dirdatablk) +	if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk)  		return 0;  	error = xfs_dir3_block_read(NULL, dp, &bp); @@ -600,7 +599,7 @@ xfs_dir2_block_getdents(  	 * Extract the byte offset we start at from the seek pointer.  	 * We'll skip entries before this.  	 */ -	wantoff = xfs_dir2_dataptr_to_off(mp, *offset); +	wantoff = xfs_dir2_dataptr_to_off(mp, ctx->pos);  	hdr = bp->b_addr;  	xfs_dir3_data_check(dp, bp);  	/* @@ -639,13 +638,12 @@ xfs_dir2_block_getdents(  		cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,  					    (char *)dep - (char *)hdr); +		ctx->pos = cook & 0x7fffffff;  		/*  		 * If it didn't fit, set the final offset to here & return.  		 */ -		if (filldir(dirent, (char *)dep->name, dep->namelen, -			    cook & 0x7fffffff, be64_to_cpu(dep->inumber), -			    DT_UNKNOWN)) { -			*offset = cook & 0x7fffffff; +		if (!dir_emit(ctx, (char *)dep->name, dep->namelen, +			    be64_to_cpu(dep->inumber), DT_UNKNOWN)) {  			xfs_trans_brelse(NULL, bp);  			return 0;  		} @@ -655,7 +653,7 @@ xfs_dir2_block_getdents(  	 * Reached the end of the block.  	 * Set the offset to a non-existent block 1 and return.  	 */ -	*offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & +	ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) &  			0x7fffffff;  	xfs_trans_brelse(NULL, bp);  	return 0; @@ -1167,13 +1165,15 @@ xfs_dir2_sf_to_block(  	__be16			*tagp;		/* end of data entry */  	xfs_trans_t		*tp;		/* transaction pointer */  	struct xfs_name		name; +	struct xfs_ifork	*ifp;  	trace_xfs_dir2_sf_to_block(args);  	dp = args->dp;  	tp = args->trans;  	mp = dp->i_mount; -	ASSERT(dp->i_df.if_flags & XFS_IFINLINE); +	ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK); +	ASSERT(ifp->if_flags & XFS_IFINLINE);  	/*  	 * Bomb out if the shortform directory is way too short.  	 */ @@ -1182,22 +1182,23 @@ xfs_dir2_sf_to_block(  		return XFS_ERROR(EIO);  	} -	oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; +	oldsfp = (xfs_dir2_sf_hdr_t *)ifp->if_u1.if_data; -	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); -	ASSERT(dp->i_df.if_u1.if_data != NULL); +	ASSERT(ifp->if_bytes == dp->i_d.di_size); +	ASSERT(ifp->if_u1.if_data != NULL);  	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count)); +	ASSERT(dp->i_d.di_nextents == 0);  	/*  	 * Copy the directory into a temporary buffer.  	 * Then pitch the incore inode data so we can make extents.  	 */ -	sfp = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP); -	memcpy(sfp, oldsfp, dp->i_df.if_bytes); +	sfp = kmem_alloc(ifp->if_bytes, KM_SLEEP); +	memcpy(sfp, oldsfp, ifp->if_bytes); -	xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK); +	xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK); +	xfs_bmap_local_to_extents_empty(dp, XFS_DATA_FORK);  	dp->i_d.di_size = 0; -	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);  	/*  	 * Add block 0 to the inode. diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index da71a1819d78..2aed25cae04d 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -1108,6 +1108,7 @@ xfs_dir2_leaf_readbuf(  	struct xfs_mount	*mp = dp->i_mount;  	struct xfs_buf		*bp = *bpp;  	struct xfs_bmbt_irec	*map = mip->map; +	struct blk_plug		plug;  	int			error = 0;  	int			length;  	int			i; @@ -1236,6 +1237,7 @@ xfs_dir2_leaf_readbuf(  	/*  	 * Do we need more readahead?  	 */ +	blk_start_plug(&plug);  	for (mip->ra_index = mip->ra_offset = i = 0;  	     mip->ra_want > mip->ra_current && i < mip->map_blocks;  	     i += mp->m_dirblkfsbs) { @@ -1287,6 +1289,7 @@ xfs_dir2_leaf_readbuf(  			}  		}  	} +	blk_finish_plug(&plug);  out:  	*bpp = bp; @@ -1300,10 +1303,8 @@ out:  int						/* error */  xfs_dir2_leaf_getdents(  	xfs_inode_t		*dp,		/* incore directory inode */ -	void			*dirent, -	size_t			bufsize, -	xfs_off_t		*offset, -	filldir_t		filldir) +	struct dir_context	*ctx, +	size_t			bufsize)  {  	struct xfs_buf		*bp = NULL;	/* data block buffer */  	xfs_dir2_data_hdr_t	*hdr;		/* data block header */ @@ -1322,7 +1323,7 @@ xfs_dir2_leaf_getdents(  	 * If the offset is at or past the largest allowed value,  	 * give up right away.  	 */ -	if (*offset >= XFS_DIR2_MAX_DATAPTR) +	if (ctx->pos >= XFS_DIR2_MAX_DATAPTR)  		return 0;  	mp = dp->i_mount; @@ -1343,7 +1344,7 @@ xfs_dir2_leaf_getdents(  	 * Inside the loop we keep the main offset value as a byte offset  	 * in the directory file.  	 */ -	curoff = xfs_dir2_dataptr_to_byte(mp, *offset); +	curoff = xfs_dir2_dataptr_to_byte(mp, ctx->pos);  	/*  	 * Force this conversion through db so we truncate the offset @@ -1444,8 +1445,8 @@ xfs_dir2_leaf_getdents(  		dep = (xfs_dir2_data_entry_t *)ptr;  		length = xfs_dir2_data_entsize(dep->namelen); -		if (filldir(dirent, (char *)dep->name, dep->namelen, -			    xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff, +		ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; +		if (!dir_emit(ctx, (char *)dep->name, dep->namelen,  			    be64_to_cpu(dep->inumber), DT_UNKNOWN))  			break; @@ -1462,9 +1463,9 @@ xfs_dir2_leaf_getdents(  	 * All done.  Set output offset value to current offset.  	 */  	if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR)) -		*offset = XFS_DIR2_MAX_DATAPTR & 0x7fffffff; +		ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;  	else -		*offset = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; +		ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;  	kmem_free(map_info);  	if (bp)  		xfs_trans_brelse(NULL, bp); diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h index 7cf573c88aad..0511cda4a712 100644 --- a/fs/xfs/xfs_dir2_priv.h +++ b/fs/xfs/xfs_dir2_priv.h @@ -33,8 +33,8 @@ extern int xfs_dir_cilookup_result(struct xfs_da_args *args,  extern const struct xfs_buf_ops xfs_dir3_block_buf_ops;  extern int xfs_dir2_block_addname(struct xfs_da_args *args); -extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent, -		xfs_off_t *offset, filldir_t filldir); +extern int xfs_dir2_block_getdents(struct xfs_inode *dp, +		struct dir_context *ctx);  extern int xfs_dir2_block_lookup(struct xfs_da_args *args);  extern int xfs_dir2_block_removename(struct xfs_da_args *args);  extern int xfs_dir2_block_replace(struct xfs_da_args *args); @@ -91,8 +91,8 @@ extern void xfs_dir3_leaf_compact(struct xfs_da_args *args,  extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr,  		struct xfs_dir2_leaf_entry *ents, int *indexp,  		int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); -extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent, -		size_t bufsize, xfs_off_t *offset, filldir_t filldir); +extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, struct dir_context *ctx, +		size_t bufsize);  extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno,  		struct xfs_buf **bpp, __uint16_t magic);  extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp, @@ -153,8 +153,7 @@ extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_buf *bp,  		int size, xfs_dir2_sf_hdr_t *sfhp);  extern int xfs_dir2_sf_addname(struct xfs_da_args *args);  extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); -extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent, -		xfs_off_t *offset, filldir_t filldir); +extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, struct dir_context *ctx);  extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);  extern int xfs_dir2_sf_removename(struct xfs_da_args *args);  extern int xfs_dir2_sf_replace(struct xfs_da_args *args); diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index 6157424dbf8f..97676a347da1 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c @@ -768,9 +768,7 @@ xfs_dir2_sf_create(  int						/* error */  xfs_dir2_sf_getdents(  	xfs_inode_t		*dp,		/* incore directory inode */ -	void			*dirent, -	xfs_off_t		*offset, -	filldir_t		filldir) +	struct dir_context	*ctx)  {  	int			i;		/* shortform entry number */  	xfs_mount_t		*mp;		/* filesystem mount point */ @@ -802,7 +800,7 @@ xfs_dir2_sf_getdents(  	/*  	 * If the block number in the offset is out of range, we're done.  	 */ -	if (xfs_dir2_dataptr_to_db(mp, *offset) > mp->m_dirdatablk) +	if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk)  		return 0;  	/* @@ -819,22 +817,20 @@ xfs_dir2_sf_getdents(  	/*  	 * Put . entry unless we're starting past it.  	 */ -	if (*offset <= dot_offset) { -		if (filldir(dirent, ".", 1, dot_offset & 0x7fffffff, dp->i_ino, DT_DIR)) { -			*offset = dot_offset & 0x7fffffff; +	if (ctx->pos <= dot_offset) { +		ctx->pos = dot_offset & 0x7fffffff; +		if (!dir_emit(ctx, ".", 1, dp->i_ino, DT_DIR))  			return 0; -		}  	}  	/*  	 * Put .. entry unless we're starting past it.  	 */ -	if (*offset <= dotdot_offset) { +	if (ctx->pos <= dotdot_offset) {  		ino = xfs_dir2_sf_get_parent_ino(sfp); -		if (filldir(dirent, "..", 2, dotdot_offset & 0x7fffffff, ino, DT_DIR)) { -			*offset = dotdot_offset & 0x7fffffff; +		ctx->pos = dotdot_offset & 0x7fffffff; +		if (!dir_emit(ctx, "..", 2, ino, DT_DIR))  			return 0; -		}  	}  	/* @@ -845,21 +841,20 @@ xfs_dir2_sf_getdents(  		off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,  				xfs_dir2_sf_get_offset(sfep)); -		if (*offset > off) { +		if (ctx->pos > off) {  			sfep = xfs_dir2_sf_nextentry(sfp, sfep);  			continue;  		}  		ino = xfs_dir2_sfe_get_ino(sfp, sfep); -		if (filldir(dirent, (char *)sfep->name, sfep->namelen, -			    off & 0x7fffffff, ino, DT_UNKNOWN)) { -			*offset = off & 0x7fffffff; +		ctx->pos = off & 0x7fffffff; +		if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, +			    ino, DT_UNKNOWN))  			return 0; -		}  		sfep = xfs_dir2_sf_nextentry(sfp, sfep);  	} -	*offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & +	ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) &  			0x7fffffff;  	return 0;  } diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 044e97a33c8d..0adf27ecf3f1 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -570,13 +570,13 @@ xfs_qm_dqtobp(  	xfs_buf_t		**O_bpp,  	uint			flags)  { -	xfs_bmbt_irec_t map; -	int		nmaps = 1, error; -	xfs_buf_t	*bp; -	xfs_inode_t	*quotip = XFS_DQ_TO_QIP(dqp); -	xfs_mount_t	*mp = dqp->q_mount; -	xfs_dqid_t	id = be32_to_cpu(dqp->q_core.d_id); -	xfs_trans_t	*tp = (tpp ? *tpp : NULL); +	struct xfs_bmbt_irec	map; +	int			nmaps = 1, error; +	struct xfs_buf		*bp; +	struct xfs_inode	*quotip = xfs_dq_to_quota_inode(dqp); +	struct xfs_mount	*mp = dqp->q_mount; +	xfs_dqid_t		id = be32_to_cpu(dqp->q_core.d_id); +	struct xfs_trans	*tp = (tpp ? *tpp : NULL);  	dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; @@ -804,7 +804,7 @@ xfs_qm_dqget(  	xfs_dquot_t	**O_dqpp) /* OUT : locked incore dquot */  {  	struct xfs_quotainfo	*qi = mp->m_quotainfo; -	struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type); +	struct radix_tree_root *tree = xfs_dquot_tree(qi, type);  	struct xfs_dquot	*dqp;  	int			error; @@ -936,6 +936,7 @@ xfs_qm_dqput_final(  {  	struct xfs_quotainfo	*qi = dqp->q_mount->m_quotainfo;  	struct xfs_dquot	*gdqp; +	struct xfs_dquot	*pdqp;  	trace_xfs_dqput_free(dqp); @@ -949,21 +950,29 @@ xfs_qm_dqput_final(  	/*  	 * If we just added a udquot to the freelist, then we want to release -	 * the gdquot reference that it (probably) has. Otherwise it'll keep -	 * the gdquot from getting reclaimed. +	 * the gdquot/pdquot reference that it (probably) has. Otherwise it'll +	 * keep the gdquot/pdquot from getting reclaimed.  	 */  	gdqp = dqp->q_gdquot;  	if (gdqp) {  		xfs_dqlock(gdqp);  		dqp->q_gdquot = NULL;  	} + +	pdqp = dqp->q_pdquot; +	if (pdqp) { +		xfs_dqlock(pdqp); +		dqp->q_pdquot = NULL; +	}  	xfs_dqunlock(dqp);  	/* -	 * If we had a group quota hint, release it now. +	 * If we had a group/project quota hint, release it now.  	 */  	if (gdqp)  		xfs_qm_dqput(gdqp); +	if (pdqp) +		xfs_qm_dqput(pdqp);  }  /* diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 4f0ebfc43cc9..55abbca2883d 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -53,6 +53,7 @@ typedef struct xfs_dquot {  	xfs_fileoff_t	 q_fileoffset;	/* offset in quotas file */  	struct xfs_dquot*q_gdquot;	/* group dquot, hint only */ +	struct xfs_dquot*q_pdquot;	/* project dquot, hint only */  	xfs_disk_dquot_t q_core;	/* actual usage & quotas */  	xfs_dq_logitem_t q_logitem;	/* dquot log item */  	xfs_qcnt_t	 q_res_bcount;	/* total regular nblks used+reserved */ @@ -118,8 +119,9 @@ static inline int xfs_this_quota_on(struct xfs_mount *mp, int type)  	case XFS_DQ_USER:  		return XFS_IS_UQUOTA_ON(mp);  	case XFS_DQ_GROUP: +		return XFS_IS_GQUOTA_ON(mp);  	case XFS_DQ_PROJ: -		return XFS_IS_OQUOTA_ON(mp); +		return XFS_IS_PQUOTA_ON(mp);  	default:  		return 0;  	} @@ -131,8 +133,9 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type)  	case XFS_DQ_USER:  		return ip->i_udquot;  	case XFS_DQ_GROUP: -	case XFS_DQ_PROJ:  		return ip->i_gdquot; +	case XFS_DQ_PROJ: +		return ip->i_pdquot;  	default:  		return NULL;  	} @@ -143,10 +146,6 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type)  #define XFS_QM_ISUDQ(dqp)	((dqp)->dq_flags & XFS_DQ_USER)  #define XFS_QM_ISPDQ(dqp)	((dqp)->dq_flags & XFS_DQ_PROJ)  #define XFS_QM_ISGDQ(dqp)	((dqp)->dq_flags & XFS_DQ_GROUP) -#define XFS_DQ_TO_QINF(dqp)	((dqp)->q_mount->m_quotainfo) -#define XFS_DQ_TO_QIP(dqp)	(XFS_QM_ISUDQ(dqp) ? \ -				 XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \ -				 XFS_DQ_TO_QINF(dqp)->qi_gquotaip)  extern int		xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint,  					uint, struct xfs_dquot	**); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index a5f2042aec8b..de3dc98f4e8f 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -906,11 +906,10 @@ xfs_file_release(  STATIC int  xfs_file_readdir( -	struct file	*filp, -	void		*dirent, -	filldir_t	filldir) +	struct file	*file, +	struct dir_context *ctx)  { -	struct inode	*inode = file_inode(filp); +	struct inode	*inode = file_inode(file);  	xfs_inode_t	*ip = XFS_I(inode);  	int		error;  	size_t		bufsize; @@ -929,8 +928,7 @@ xfs_file_readdir(  	 */  	bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size); -	error = xfs_readdir(ip, dirent, bufsize, -				(xfs_off_t *)&filp->f_pos, filldir); +	error = xfs_readdir(ip, ctx, bufsize);  	if (error)  		return -error;  	return 0; @@ -1270,8 +1268,7 @@ xfs_seek_data(  	}  out: -	if (offset != file->f_pos) -		file->f_pos = offset; +	offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);  out_unlock:  	xfs_iunlock_map_shared(ip, lock); @@ -1379,8 +1376,7 @@ out:  	 * situation in particular.  	 */  	offset = min_t(loff_t, offset, isize); -	if (offset != file->f_pos) -		file->f_pos = offset; +	offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);  out_unlock:  	xfs_iunlock_map_shared(ip, lock); @@ -1432,7 +1428,7 @@ const struct file_operations xfs_file_operations = {  const struct file_operations xfs_dir_file_operations = {  	.open		= xfs_dir_open,  	.read		= generic_read_dir, -	.readdir	= xfs_file_readdir, +	.iterate	= xfs_file_readdir,  	.llseek		= generic_file_llseek,  	.unlocked_ioctl	= xfs_file_ioctl,  #ifdef CONFIG_COMPAT diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 3c3644ea825b..614eb0cc3608 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -176,7 +176,7 @@ xfs_growfs_data_private(  	if (!bp)  		return EIO;  	if (bp->b_error) { -		int	error = bp->b_error; +		error = bp->b_error;  		xfs_buf_relse(bp);  		return error;  	} diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index c8f5ae1debf2..7a0c17d7ec09 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -38,6 +38,7 @@  #include "xfs_bmap.h"  #include "xfs_cksum.h"  #include "xfs_buf_item.h" +#include "xfs_icreate_item.h"  /* @@ -150,12 +151,16 @@ xfs_check_agi_freecount(  #endif  /* - * Initialise a new set of inodes. + * Initialise a new set of inodes. When called without a transaction context + * (e.g. from recovery) we initiate a delayed write of the inode buffers rather + * than logging them (which in a transaction context puts them into the AIL + * for writeback rather than the xfsbufd queue).   */ -STATIC int +int  xfs_ialloc_inode_init(  	struct xfs_mount	*mp,  	struct xfs_trans	*tp, +	struct list_head	*buffer_list,  	xfs_agnumber_t		agno,  	xfs_agblock_t		agbno,  	xfs_agblock_t		length, @@ -208,6 +213,18 @@ xfs_ialloc_inode_init(  		version = 3;  		ino = XFS_AGINO_TO_INO(mp, agno,  				       XFS_OFFBNO_TO_AGINO(mp, agbno, 0)); + +		/* +		 * log the initialisation that is about to take place as an +		 * logical operation. This means the transaction does not +		 * need to log the physical changes to the inode buffers as log +		 * recovery will know what initialisation is actually needed. +		 * Hence we only need to log the buffers as "ordered" buffers so +		 * they track in the AIL as if they were physically logged. +		 */ +		if (tp) +			xfs_icreate_log(tp, agno, agbno, XFS_IALLOC_INODES(mp), +					mp->m_sb.sb_inodesize, length, gen);  	} else if (xfs_sb_version_hasnlink(&mp->m_sb))  		version = 2;  	else @@ -223,13 +240,8 @@ xfs_ialloc_inode_init(  					 XBF_UNMAPPED);  		if (!fbuf)  			return ENOMEM; -		/* -		 * Initialize all inodes in this buffer and then log them. -		 * -		 * XXX: It would be much better if we had just one transaction -		 *	to log a whole cluster of inodes instead of all the -		 *	individual transactions causing a lot of log traffic. -		 */ + +		/* Initialize the inode buffers and log them appropriately. */  		fbuf->b_ops = &xfs_inode_buf_ops;  		xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));  		for (i = 0; i < ninodes; i++) { @@ -247,18 +259,39 @@ xfs_ialloc_inode_init(  				ino++;  				uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid);  				xfs_dinode_calc_crc(mp, free); -			} else { +			} else if (tp) {  				/* just log the inode core */  				xfs_trans_log_buf(tp, fbuf, ioffset,  						  ioffset + isize - 1);  			}  		} -		if (version == 3) { -			/* need to log the entire buffer */ -			xfs_trans_log_buf(tp, fbuf, 0, -					  BBTOB(fbuf->b_length) - 1); + +		if (tp) { +			/* +			 * Mark the buffer as an inode allocation buffer so it +			 * sticks in AIL at the point of this allocation +			 * transaction. This ensures the they are on disk before +			 * the tail of the log can be moved past this +			 * transaction (i.e. by preventing relogging from moving +			 * it forward in the log). +			 */ +			xfs_trans_inode_alloc_buf(tp, fbuf); +			if (version == 3) { +				/* +				 * Mark the buffer as ordered so that they are +				 * not physically logged in the transaction but +				 * still tracked in the AIL as part of the +				 * transaction and pin the log appropriately. +				 */ +				xfs_trans_ordered_buf(tp, fbuf); +				xfs_trans_log_buf(tp, fbuf, 0, +						  BBTOB(fbuf->b_length) - 1); +			} +		} else { +			fbuf->b_flags |= XBF_DONE; +			xfs_buf_delwri_queue(fbuf, buffer_list); +			xfs_buf_relse(fbuf);  		} -		xfs_trans_inode_alloc_buf(tp, fbuf);  	}  	return 0;  } @@ -303,7 +336,7 @@ xfs_ialloc_ag_alloc(  	 * First try to allocate inodes contiguous with the last-allocated  	 * chunk of inodes.  If the filesystem is striped, this will fill  	 * an entire stripe unit with inodes. - 	 */ +	 */  	agi = XFS_BUF_TO_AGI(agbp);  	newino = be32_to_cpu(agi->agi_newino);  	agno = be32_to_cpu(agi->agi_seqno); @@ -402,7 +435,7 @@ xfs_ialloc_ag_alloc(  	 * rather than a linear progression to prevent the next generation  	 * number from being easily guessable.  	 */ -	error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, +	error = xfs_ialloc_inode_init(args.mp, tp, NULL, agno, args.agbno,  			args.len, prandom_u32());  	if (error) @@ -615,8 +648,7 @@ xfs_ialloc_get_rec(  	struct xfs_btree_cur	*cur,  	xfs_agino_t		agino,  	xfs_inobt_rec_incore_t	*rec, -	int			*done, -	int			left) +	int			*done)  {  	int                     error;  	int			i; @@ -724,12 +756,12 @@ xfs_dialloc_ag(  		    pag->pagl_leftrec != NULLAGINO &&  		    pag->pagl_rightrec != NULLAGINO) {  			error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec, -						   &trec, &doneleft, 1); +						   &trec, &doneleft);  			if (error)  				goto error1;  			error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec, -						   &rec, &doneright, 0); +						   &rec, &doneright);  			if (error)  				goto error1;  		} else { diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index c8da3df271e6..68c07320f096 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -150,6 +150,14 @@ int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino,  int xfs_inobt_get_rec(struct xfs_btree_cur *cur,  		xfs_inobt_rec_incore_t *rec, int *stat); +/* + * Inode chunk initialisation routine + */ +int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp, +			  struct list_head *buffer_list, +			  xfs_agnumber_t agno, xfs_agblock_t agbno, +			  xfs_agblock_t length, unsigned int gen); +  extern const struct xfs_buf_ops xfs_agi_buf_ops;  #endif	/* __XFS_IALLOC_H__ */ diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 96e344e3e927..3f90e1ceb8d6 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -335,7 +335,9 @@ xfs_iget_cache_miss(  	iflags = XFS_INEW;  	if (flags & XFS_IGET_DONTCACHE)  		iflags |= XFS_IDONTCACHE; -	ip->i_udquot = ip->i_gdquot = NULL; +	ip->i_udquot = NULL; +	ip->i_gdquot = NULL; +	ip->i_pdquot = NULL;  	xfs_iflags_set(ip, iflags);  	/* insert the new inode */ diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index e0f138c70a2f..a01afbb3909a 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -40,7 +40,6 @@ void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);  int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *);  void xfs_eofblocks_worker(struct work_struct *); -int xfs_sync_inode_grab(struct xfs_inode *ip);  int xfs_inode_ag_iterator(struct xfs_mount *mp,  	int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag,  		int flags, void *args), diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c new file mode 100644 index 000000000000..7716a4e7375e --- /dev/null +++ b/fs/xfs/xfs_icreate_item.c @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2008-2010, 2013 Dave Chinner + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_types.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_buf_item.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_mount.h" +#include "xfs_trans_priv.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_attr_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_inode_item.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_error.h" +#include "xfs_icreate_item.h" + +kmem_zone_t	*xfs_icreate_zone;		/* inode create item zone */ + +static inline struct xfs_icreate_item *ICR_ITEM(struct xfs_log_item *lip) +{ +	return container_of(lip, struct xfs_icreate_item, ic_item); +} + +/* + * This returns the number of iovecs needed to log the given inode item. + * + * We only need one iovec for the icreate log structure. + */ +STATIC uint +xfs_icreate_item_size( +	struct xfs_log_item	*lip) +{ +	return 1; +} + +/* + * This is called to fill in the vector of log iovecs for the + * given inode create log item. + */ +STATIC void +xfs_icreate_item_format( +	struct xfs_log_item	*lip, +	struct xfs_log_iovec	*log_vector) +{ +	struct xfs_icreate_item	*icp = ICR_ITEM(lip); + +	log_vector->i_addr = (xfs_caddr_t)&icp->ic_format; +	log_vector->i_len  = sizeof(struct xfs_icreate_log); +	log_vector->i_type = XLOG_REG_TYPE_ICREATE; +} + + +/* Pinning has no meaning for the create item, so just return. */ +STATIC void +xfs_icreate_item_pin( +	struct xfs_log_item	*lip) +{ +} + + +/* pinning has no meaning for the create item, so just return. */ +STATIC void +xfs_icreate_item_unpin( +	struct xfs_log_item	*lip, +	int			remove) +{ +} + +STATIC void +xfs_icreate_item_unlock( +	struct xfs_log_item	*lip) +{ +	struct xfs_icreate_item	*icp = ICR_ITEM(lip); + +	if (icp->ic_item.li_flags & XFS_LI_ABORTED) +		kmem_zone_free(xfs_icreate_zone, icp); +	return; +} + +/* + * Because we have ordered buffers being tracked in the AIL for the inode + * creation, we don't need the create item after this. Hence we can free + * the log item and return -1 to tell the caller we're done with the item. + */ +STATIC xfs_lsn_t +xfs_icreate_item_committed( +	struct xfs_log_item	*lip, +	xfs_lsn_t		lsn) +{ +	struct xfs_icreate_item	*icp = ICR_ITEM(lip); + +	kmem_zone_free(xfs_icreate_zone, icp); +	return (xfs_lsn_t)-1; +} + +/* item can never get into the AIL */ +STATIC uint +xfs_icreate_item_push( +	struct xfs_log_item	*lip, +	struct list_head	*buffer_list) +{ +	ASSERT(0); +	return XFS_ITEM_SUCCESS; +} + +/* Ordered buffers do the dependency tracking here, so this does nothing. */ +STATIC void +xfs_icreate_item_committing( +	struct xfs_log_item	*lip, +	xfs_lsn_t		lsn) +{ +} + +/* + * This is the ops vector shared by all buf log items. + */ +static struct xfs_item_ops xfs_icreate_item_ops = { +	.iop_size	= xfs_icreate_item_size, +	.iop_format	= xfs_icreate_item_format, +	.iop_pin	= xfs_icreate_item_pin, +	.iop_unpin	= xfs_icreate_item_unpin, +	.iop_push	= xfs_icreate_item_push, +	.iop_unlock	= xfs_icreate_item_unlock, +	.iop_committed	= xfs_icreate_item_committed, +	.iop_committing = xfs_icreate_item_committing, +}; + + +/* + * Initialize the inode log item for a newly allocated (in-core) inode. + * + * Inode extents can only reside within an AG. Hence specify the starting + * block for the inode chunk by offset within an AG as well as the + * length of the allocated extent. + * + * This joins the item to the transaction and marks it dirty so + * that we don't need a separate call to do this, nor does the + * caller need to know anything about the icreate item. + */ +void +xfs_icreate_log( +	struct xfs_trans	*tp, +	xfs_agnumber_t		agno, +	xfs_agblock_t		agbno, +	unsigned int		count, +	unsigned int		inode_size, +	xfs_agblock_t		length, +	unsigned int		generation) +{ +	struct xfs_icreate_item	*icp; + +	icp = kmem_zone_zalloc(xfs_icreate_zone, KM_SLEEP); + +	xfs_log_item_init(tp->t_mountp, &icp->ic_item, XFS_LI_ICREATE, +			  &xfs_icreate_item_ops); + +	icp->ic_format.icl_type = XFS_LI_ICREATE; +	icp->ic_format.icl_size = 1;	/* single vector */ +	icp->ic_format.icl_ag = cpu_to_be32(agno); +	icp->ic_format.icl_agbno = cpu_to_be32(agbno); +	icp->ic_format.icl_count = cpu_to_be32(count); +	icp->ic_format.icl_isize = cpu_to_be32(inode_size); +	icp->ic_format.icl_length = cpu_to_be32(length); +	icp->ic_format.icl_gen = cpu_to_be32(generation); + +	xfs_trans_add_item(tp, &icp->ic_item); +	tp->t_flags |= XFS_TRANS_DIRTY; +	icp->ic_item.li_desc->lid_flags |= XFS_LID_DIRTY; +} diff --git a/fs/xfs/xfs_icreate_item.h b/fs/xfs/xfs_icreate_item.h new file mode 100644 index 000000000000..88ba8aa0bc41 --- /dev/null +++ b/fs/xfs/xfs_icreate_item.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2008-2010, Dave Chinner + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + */ +#ifndef XFS_ICREATE_ITEM_H +#define XFS_ICREATE_ITEM_H	1 + +/* + * on disk log item structure + * + * Log recovery assumes the first two entries are the type and size and they fit + * in 32 bits. Also in host order (ugh) so they have to be 32 bit aligned so + * decoding can be done correctly. + */ +struct xfs_icreate_log { +	__uint16_t	icl_type;	/* type of log format structure */ +	__uint16_t	icl_size;	/* size of log format structure */ +	__be32		icl_ag;		/* ag being allocated in */ +	__be32		icl_agbno;	/* start block of inode range */ +	__be32		icl_count;	/* number of inodes to initialise */ +	__be32		icl_isize;	/* size of inodes */ +	__be32		icl_length;	/* length of extent to initialise */ +	__be32		icl_gen;	/* inode generation number to use */ +}; + +/* in memory log item structure */ +struct xfs_icreate_item { +	struct xfs_log_item	ic_item; +	struct xfs_icreate_log	ic_format; +}; + +extern kmem_zone_t *xfs_icreate_zone;	/* inode create item zone */ + +void xfs_icreate_log(struct xfs_trans *tp, xfs_agnumber_t agno, +			xfs_agblock_t agbno, unsigned int count, +			unsigned int inode_size, xfs_agblock_t length, +			unsigned int generation); + +#endif	/* XFS_ICREATE_ITEM_H */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7f7be5f98f52..bb262c25c8de 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -896,7 +896,6 @@ xfs_dinode_to_disk(  	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);  	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);  	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); -	to->di_flushiter = cpu_to_be16(from->di_flushiter);  	to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);  	to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);  	to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); @@ -924,6 +923,9 @@ xfs_dinode_to_disk(  		to->di_lsn = cpu_to_be64(from->di_lsn);  		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));  		uuid_copy(&to->di_uuid, &from->di_uuid); +		to->di_flushiter = 0; +	} else { +		to->di_flushiter = cpu_to_be16(from->di_flushiter);  	}  } @@ -1028,6 +1030,15 @@ xfs_dinode_calc_crc(  /*   * Read the disk inode attributes into the in-core inode structure. + * + * For version 5 superblocks, if we are initialising a new inode and we are not + * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new + * inode core with a random generation number. If we are keeping inodes around, + * we need to read the inode cluster to get the existing generation number off + * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode + * format) then log recovery is dependent on the di_flushiter field being + * initialised from the current on-disk value and hence we must also read the + * inode off disk.   */  int  xfs_iread( @@ -1047,6 +1058,23 @@ xfs_iread(  	if (error)  		return error; +	/* shortcut IO on inode allocation if possible */ +	if ((iget_flags & XFS_IGET_CREATE) && +	    xfs_sb_version_hascrc(&mp->m_sb) && +	    !(mp->m_flags & XFS_MOUNT_IKEEP)) { +		/* initialise the on-disk inode core */ +		memset(&ip->i_d, 0, sizeof(ip->i_d)); +		ip->i_d.di_magic = XFS_DINODE_MAGIC; +		ip->i_d.di_gen = prandom_u32(); +		if (xfs_sb_version_hascrc(&mp->m_sb)) { +			ip->i_d.di_version = 3; +			ip->i_d.di_ino = ip->i_ino; +			uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid); +		} else +			ip->i_d.di_version = 2; +		return 0; +	} +  	/*  	 * Get pointers to the on-disk inode and the buffer containing it.  	 */ @@ -1133,17 +1161,16 @@ xfs_iread(  	xfs_buf_set_ref(bp, XFS_INO_REF);  	/* -	 * Use xfs_trans_brelse() to release the buffer containing the -	 * on-disk inode, because it was acquired with xfs_trans_read_buf() -	 * in xfs_imap_to_bp() above.  If tp is NULL, this is just a normal +	 * Use xfs_trans_brelse() to release the buffer containing the on-disk +	 * inode, because it was acquired with xfs_trans_read_buf() in +	 * xfs_imap_to_bp() above.  If tp is NULL, this is just a normal  	 * brelse().  If we're within a transaction, then xfs_trans_brelse()  	 * will only release the buffer if it is not dirty within the  	 * transaction.  It will be OK to release the buffer in this case, -	 * because inodes on disk are never destroyed and we will be -	 * locking the new in-core inode before putting it in the hash -	 * table where other processes can find it.  Thus we don't have -	 * to worry about the inode being changed just because we released -	 * the buffer. +	 * because inodes on disk are never destroyed and we will be locking the +	 * new in-core inode before putting it in the cache where other +	 * processes can find it.  Thus we don't have to worry about the inode +	 * being changed just because we released the buffer.  	 */   out_brelse:  	xfs_trans_brelse(tp, bp); @@ -2028,8 +2055,6 @@ xfs_ifree(  	int			error;  	int			delete;  	xfs_ino_t		first_ino; -	xfs_dinode_t    	*dip; -	xfs_buf_t       	*ibp;  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));  	ASSERT(ip->i_d.di_nlink == 0); @@ -2042,14 +2067,13 @@ xfs_ifree(  	 * Pull the on-disk inode from the AGI unlinked list.  	 */  	error = xfs_iunlink_remove(tp, ip); -	if (error != 0) { +	if (error)  		return error; -	}  	error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino); -	if (error != 0) { +	if (error)  		return error; -	} +  	ip->i_d.di_mode = 0;		/* mark incore inode as free */  	ip->i_d.di_flags = 0;  	ip->i_d.di_dmevmask = 0; @@ -2061,31 +2085,10 @@ xfs_ifree(  	 * by reincarnations of this inode.  	 */  	ip->i_d.di_gen++; -  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); -	error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &dip, &ibp, -			       0, 0); -	if (error) -		return error; - -        /* -	* Clear the on-disk di_mode. This is to prevent xfs_bulkstat -	* from picking up this inode when it is reclaimed (its incore state -	* initialzed but not flushed to disk yet). The in-core di_mode is -	* already cleared  and a corresponding transaction logged. -	* The hack here just synchronizes the in-core to on-disk -	* di_mode value in advance before the actual inode sync to disk. -	* This is OK because the inode is already unlinked and would never -	* change its di_mode again for this inode generation. -	* This is a temporary hack that would require a proper fix -	* in the future. -	*/ -	dip->di_mode = 0; - -	if (delete) { +	if (delete)  		error = xfs_ifree_cluster(ip, tp, first_ino); -	}  	return error;  } @@ -2160,8 +2163,8 @@ xfs_iroot_realloc(  		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,  						     (int)new_size);  		ifp->if_broot_bytes = (int)new_size; -		ASSERT(ifp->if_broot_bytes <= -			XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip)); +		ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= +			XFS_IFORK_SIZE(ip, whichfork));  		memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));  		return;  	} @@ -2214,8 +2217,9 @@ xfs_iroot_realloc(  	kmem_free(ifp->if_broot);  	ifp->if_broot = new_broot;  	ifp->if_broot_bytes = (int)new_size; -	ASSERT(ifp->if_broot_bytes <= -		XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip)); +	if (ifp->if_broot) +		ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= +			XFS_IFORK_SIZE(ip, whichfork));  	return;  } @@ -2526,9 +2530,8 @@ xfs_iflush_fork(  		if ((iip->ili_fields & brootflag[whichfork]) &&  		    (ifp->if_broot_bytes > 0)) {  			ASSERT(ifp->if_broot != NULL); -			ASSERT(ifp->if_broot_bytes <= -			       (XFS_IFORK_SIZE(ip, whichfork) + -				XFS_BROOT_SIZE_ADJ(ip))); +			ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= +			        XFS_IFORK_SIZE(ip, whichfork));  			xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,  				(xfs_bmdr_block_t *)cp,  				XFS_DFORK_SIZE(dip, mp, whichfork)); @@ -2886,12 +2889,18 @@ xfs_iflush_int(  			__func__, ip->i_ino, ip->i_d.di_forkoff, ip);  		goto corrupt_out;  	} +  	/* -	 * bump the flush iteration count, used to detect flushes which -	 * postdate a log record during recovery. This is redundant as we now -	 * log every change and hence this can't happen. Still, it doesn't hurt. +	 * Inode item log recovery for v1/v2 inodes are dependent on the +	 * di_flushiter count for correct sequencing. We bump the flush +	 * iteration count so we can detect flushes which postdate a log record +	 * during recovery. This is redundant as we now log every change and +	 * hence this can't happen but we need to still do it to ensure +	 * backwards compatibility with old kernels that predate logging all +	 * inode changes.  	 */ -	ip->i_d.di_flushiter++; +	if (ip->i_d.di_version < 3) +		ip->i_d.di_flushiter++;  	/*  	 * Copy the dirty parts of the inode into the on-disk diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 91129794aaec..b55fd347ab5b 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -250,6 +250,7 @@ typedef struct xfs_inode {  	struct xfs_mount	*i_mount;	/* fs mount struct ptr */  	struct xfs_dquot	*i_udquot;	/* user dquot */  	struct xfs_dquot	*i_gdquot;	/* group dquot */ +	struct xfs_dquot	*i_pdquot;	/* project dquot */  	/* Inode location stuff */  	xfs_ino_t		i_ino;		/* inode number (agno/agino)*/ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 5e999680094a..6e2bca5d44d6 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -248,7 +248,7 @@ xfs_open_by_handle(  		goto out_dput;  	} -	fd = get_unused_fd(); +	fd = get_unused_fd_flags(0);  	if (fd < 0) {  		error = fd;  		goto out_dput; @@ -928,7 +928,7 @@ xfs_ioctl_setattr(  	struct xfs_trans	*tp;  	unsigned int		lock_flags = 0;  	struct xfs_dquot	*udqp = NULL; -	struct xfs_dquot	*gdqp = NULL; +	struct xfs_dquot	*pdqp = NULL;  	struct xfs_dquot	*olddquot = NULL;  	int			code; @@ -957,7 +957,7 @@ xfs_ioctl_setattr(  	if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {  		code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid,  					 ip->i_d.di_gid, fa->fsx_projid, -					 XFS_QMOPT_PQUOTA, &udqp, &gdqp); +					 XFS_QMOPT_PQUOTA, &udqp, NULL, &pdqp);  		if (code)  			return code;  	} @@ -994,8 +994,8 @@ xfs_ioctl_setattr(  		    XFS_IS_PQUOTA_ON(mp) &&  		    xfs_get_projid(ip) != fa->fsx_projid) {  			ASSERT(tp); -			code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, -						capable(CAP_FOWNER) ? +			code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, +						pdqp, capable(CAP_FOWNER) ?  						XFS_QMOPT_FORCE_RES : 0);  			if (code)	/* out of quota */  				goto error_return; @@ -1113,7 +1113,7 @@ xfs_ioctl_setattr(  		if (xfs_get_projid(ip) != fa->fsx_projid) {  			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {  				olddquot = xfs_qm_vop_chown(tp, ip, -							&ip->i_gdquot, gdqp); +							&ip->i_pdquot, pdqp);  			}  			xfs_set_projid(ip, fa->fsx_projid); @@ -1160,13 +1160,13 @@ xfs_ioctl_setattr(  	 */  	xfs_qm_dqrele(olddquot);  	xfs_qm_dqrele(udqp); -	xfs_qm_dqrele(gdqp); +	xfs_qm_dqrele(pdqp);  	return code;   error_return:  	xfs_qm_dqrele(udqp); -	xfs_qm_dqrele(gdqp); +	xfs_qm_dqrele(pdqp);  	xfs_trans_cancel(tp, 0);  	if (lock_flags)  		xfs_iunlock(ip, lock_flags); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 8f8aaee7f379..6a7096422295 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -284,6 +284,15 @@ xfs_iomap_eof_want_preallocate(  		return 0;  	/* +	 * If the file is smaller than the minimum prealloc and we are using +	 * dynamic preallocation, don't do any preallocation at all as it is +	 * likely this is the only write to the file that is going to be done. +	 */ +	if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) && +	    XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_writeio_blocks)) +		return 0; + +	/*  	 * If there are any real blocks past eof, then don't  	 * do any speculative allocation.  	 */ @@ -345,6 +354,10 @@ xfs_iomap_eof_prealloc_initial_size(  	if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)  		return 0; +	/* If the file is small, then use the minimum prealloc */ +	if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign)) +		return 0; +  	/*  	 * As we write multiple pages, the offset will always align to the  	 * start of a page and hence point to a hole at EOF. i.e. if the size is diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ca9ecaa81112..96dda62d497b 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -467,9 +467,6 @@ xfs_setattr_mode(  	ASSERT(tp);  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); -	if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) -		mode &= ~S_ISGID; -  	ip->i_d.di_mode &= S_IFMT;  	ip->i_d.di_mode |= mode & ~S_IFMT; @@ -495,15 +492,18 @@ xfs_setattr_nonsize(  	trace_xfs_setattr(ip); -	if (mp->m_flags & XFS_MOUNT_RDONLY) -		return XFS_ERROR(EROFS); +	/* If acls are being inherited, we already have this checked */ +	if (!(flags & XFS_ATTR_NOACL)) { +		if (mp->m_flags & XFS_MOUNT_RDONLY) +			return XFS_ERROR(EROFS); -	if (XFS_FORCED_SHUTDOWN(mp)) -		return XFS_ERROR(EIO); +		if (XFS_FORCED_SHUTDOWN(mp)) +			return XFS_ERROR(EIO); -	error = -inode_change_ok(inode, iattr); -	if (error) -		return XFS_ERROR(error); +		error = -inode_change_ok(inode, iattr); +		if (error) +			return XFS_ERROR(error); +	}  	ASSERT((mask & ATTR_SIZE) == 0); @@ -539,7 +539,7 @@ xfs_setattr_nonsize(  		ASSERT(udqp == NULL);  		ASSERT(gdqp == NULL);  		error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), -					 qflags, &udqp, &gdqp); +					 qflags, &udqp, &gdqp, NULL);  		if (error)  			return error;  	} @@ -575,7 +575,7 @@ xfs_setattr_nonsize(  		     (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {  			ASSERT(tp);  			error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, -						capable(CAP_FOWNER) ? +						NULL, capable(CAP_FOWNER) ?  						XFS_QMOPT_FORCE_RES : 0);  			if (error)	/* out of quota */  				goto out_trans_cancel; @@ -987,7 +987,8 @@ xfs_fiemap_format(  	if (bmv->bmv_oflags & BMV_OF_PREALLOC)  		fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;  	else if (bmv->bmv_oflags & BMV_OF_DELALLOC) { -		fiemap_flags |= FIEMAP_EXTENT_DELALLOC; +		fiemap_flags |= (FIEMAP_EXTENT_DELALLOC | +				 FIEMAP_EXTENT_UNKNOWN);  		physical = 0;   /* no block yet */  	}  	if (bmv->bmv_oflags & BMV_OF_LAST) diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 2ea7d402188d..b93e14b86754 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -43,7 +43,7 @@ xfs_internal_inum(  {  	return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||  		(xfs_sb_version_hasquota(&mp->m_sb) && -		 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))); +		 xfs_is_quota_inode(&mp->m_sb, ino)));  }  /* @@ -221,7 +221,6 @@ xfs_bulkstat(  	char			__user *ubufp;	/* pointer into user's buffer */  	int			ubelem;	/* spaces used in user's buffer */  	int			ubused;	/* bytes used by formatter */ -	xfs_buf_t		*bp;	/* ptr to on-disk inode cluster buf */  	/*  	 * Get the last inode value, see if there's nothing to do. @@ -263,7 +262,6 @@ xfs_bulkstat(  	rval = 0;  	while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) {  		cond_resched(); -		bp = NULL;  		error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);  		if (error) {  			/* @@ -383,11 +381,13 @@ xfs_bulkstat(  			 * Also start read-ahead now for this chunk.  			 */  			if (r.ir_freecount < XFS_INODES_PER_CHUNK) { +				struct blk_plug	plug;  				/*  				 * Loop over all clusters in the next chunk.  				 * Do a readahead if there are any allocated  				 * inodes in that cluster.  				 */ +				blk_start_plug(&plug);  				agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino);  				for (chunkidx = 0;  				     chunkidx < XFS_INODES_PER_CHUNK; @@ -399,6 +399,7 @@ xfs_bulkstat(  							agbno, nbcluster,  							&xfs_inode_buf_ops);  				} +				blk_finish_plug(&plug);  				irbp->ir_startino = r.ir_startino;  				irbp->ir_freecount = r.ir_freecount;  				irbp->ir_free = r.ir_free; @@ -433,27 +434,7 @@ xfs_bulkstat(  				irbp->ir_freecount < XFS_INODES_PER_CHUNK;  			     chunkidx++, clustidx++, agino++) {  				ASSERT(chunkidx < XFS_INODES_PER_CHUNK); -				/* -				 * Recompute agbno if this is the -				 * first inode of the cluster. -				 * -				 * Careful with clustidx.   There can be -				 * multiple clusters per chunk, a single -				 * cluster per chunk or a cluster that has -				 * inodes represented from several different -				 * chunks (if blocksize is large). -				 * -				 * Because of this, the starting clustidx is -				 * initialized to zero in this loop but must -				 * later be reset after reading in the cluster -				 * buffer. -				 */ -				if ((chunkidx & (nicluster - 1)) == 0) { -					agbno = XFS_AGINO_TO_AGBNO(mp, -							irbp->ir_startino) + -						((chunkidx & nimask) >> -						 mp->m_sb.sb_inopblog); -				} +  				ino = XFS_AGINO_TO_INO(mp, agno, agino);  				/*  				 * Skip if this inode is free. @@ -499,10 +480,6 @@ xfs_bulkstat(  			cond_resched();  		} - -		if (bp) -			xfs_buf_relse(bp); -  		/*  		 * Set up for the next loop iteration.  		 */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index b345a7c85153..d852a2b3e1fd 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1963,6 +1963,10 @@ xlog_write_calc_vec_length(  		headers++;  	for (lv = log_vector; lv; lv = lv->lv_next) { +		/* we don't write ordered log vectors */ +		if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) +			continue; +  		headers += lv->lv_niovecs;  		for (i = 0; i < lv->lv_niovecs; i++) { @@ -2216,7 +2220,7 @@ xlog_write(  	index = 0;  	lv = log_vector;  	vecp = lv->lv_iovecp; -	while (lv && index < lv->lv_niovecs) { +	while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {  		void		*ptr;  		int		log_offset; @@ -2236,13 +2240,22 @@ xlog_write(  		 * This loop writes out as many regions as can fit in the amount  		 * of space which was allocated by xlog_state_get_iclog_space().  		 */ -		while (lv && index < lv->lv_niovecs) { -			struct xfs_log_iovec	*reg = &vecp[index]; +		while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { +			struct xfs_log_iovec	*reg;  			struct xlog_op_header	*ophdr;  			int			start_rec_copy;  			int			copy_len;  			int			copy_off; +			bool			ordered = false; + +			/* ordered log vectors have no regions to write */ +			if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) { +				ASSERT(lv->lv_niovecs == 0); +				ordered = true; +				goto next_lv; +			} +			reg = &vecp[index];  			ASSERT(reg->i_len % sizeof(__int32_t) == 0);  			ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0); @@ -2302,12 +2315,13 @@ xlog_write(  				break;  			if (++index == lv->lv_niovecs) { +next_lv:  				lv = lv->lv_next;  				index = 0;  				if (lv)  					vecp = lv->lv_iovecp;  			} -			if (record_cnt == 0) { +			if (record_cnt == 0 && ordered == false) {  				if (!lv)  					return 0;  				break; diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 5caee96059df..fb630e496c12 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -88,7 +88,8 @@ static inline xfs_lsn_t	_lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)  #define XLOG_REG_TYPE_UNMOUNT		17  #define XLOG_REG_TYPE_COMMIT		18  #define XLOG_REG_TYPE_TRANSHDR		19 -#define XLOG_REG_TYPE_MAX		19 +#define XLOG_REG_TYPE_ICREATE		20 +#define XLOG_REG_TYPE_MAX		20  typedef struct xfs_log_iovec {  	void		*i_addr;	/* beginning address of region */ @@ -105,6 +106,8 @@ struct xfs_log_vec {  	int			lv_buf_len;	/* size of formatted buffer */  }; +#define XFS_LOG_VEC_ORDERED	(-1) +  /*   * Structure used to pass callback function and the function's argument   * to the log manager. diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index d0833b54e55d..02b9cf3f8252 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -127,6 +127,7 @@ xlog_cil_prepare_log_vecs(  		int	index;  		int	len = 0;  		uint	niovecs; +		bool	ordered = false;  		/* Skip items which aren't dirty in this transaction. */  		if (!(lidp->lid_flags & XFS_LID_DIRTY)) @@ -137,14 +138,30 @@ xlog_cil_prepare_log_vecs(  		if (!niovecs)  			continue; +		/* +		 * Ordered items need to be tracked but we do not wish to write +		 * them. We need a logvec to track the object, but we do not +		 * need an iovec or buffer to be allocated for copying data. +		 */ +		if (niovecs == XFS_LOG_VEC_ORDERED) { +			ordered = true; +			niovecs = 0; +		} +  		new_lv = kmem_zalloc(sizeof(*new_lv) +  				niovecs * sizeof(struct xfs_log_iovec),  				KM_SLEEP|KM_NOFS); +		new_lv->lv_item = lidp->lid_item; +		new_lv->lv_niovecs = niovecs; +		if (ordered) { +			/* track as an ordered logvec */ +			new_lv->lv_buf_len = XFS_LOG_VEC_ORDERED; +			goto next; +		} +  		/* The allocated iovec region lies beyond the log vector. */  		new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1]; -		new_lv->lv_niovecs = niovecs; -		new_lv->lv_item = lidp->lid_item;  		/* build the vector array and calculate it's length */  		IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp); @@ -165,6 +182,7 @@ xlog_cil_prepare_log_vecs(  		}  		ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len); +next:  		if (!ret_lv)  			ret_lv = new_lv;  		else @@ -191,8 +209,18 @@ xfs_cil_prepare_item(  	if (old) {  		/* existing lv on log item, space used is a delta */ -		ASSERT(!list_empty(&lv->lv_item->li_cil)); -		ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs); +		ASSERT((old->lv_buf && old->lv_buf_len && old->lv_niovecs) || +			old->lv_buf_len == XFS_LOG_VEC_ORDERED); + +		/* +		 * If the new item is ordered, keep the old one that is already +		 * tracking dirty or ordered regions +		 */ +		if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) { +			ASSERT(!lv->lv_buf); +			kmem_free(lv); +			return; +		}  		*len += lv->lv_buf_len - old->lv_buf_len;  		*diff_iovecs += lv->lv_niovecs - old->lv_niovecs; @@ -201,10 +229,11 @@ xfs_cil_prepare_item(  	} else {  		/* new lv, must pin the log item */  		ASSERT(!lv->lv_item->li_lv); -		ASSERT(list_empty(&lv->lv_item->li_cil)); -		*len += lv->lv_buf_len; -		*diff_iovecs += lv->lv_niovecs; +		if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) { +			*len += lv->lv_buf_len; +			*diff_iovecs += lv->lv_niovecs; +		}  		IOP_PIN(lv->lv_item);  	} @@ -259,18 +288,24 @@ xlog_cil_insert_items(  	 * We can do this safely because the context can't checkpoint until we  	 * are done so it doesn't matter exactly how we update the CIL.  	 */ -	for (lv = log_vector; lv; lv = lv->lv_next) -		xfs_cil_prepare_item(log, lv, &len, &diff_iovecs); - -	/* account for space used by new iovec headers  */ -	len += diff_iovecs * sizeof(xlog_op_header_t); -  	spin_lock(&cil->xc_cil_lock); +	for (lv = log_vector; lv; ) { +		struct xfs_log_vec *next = lv->lv_next; -	/* move the items to the tail of the CIL */ -	for (lv = log_vector; lv; lv = lv->lv_next) +		ASSERT(lv->lv_item->li_lv || list_empty(&lv->lv_item->li_cil)); +		lv->lv_next = NULL; + +		/* +		 * xfs_cil_prepare_item() may free the lv, so move the item on +		 * the CIL first. +		 */  		list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil); +		xfs_cil_prepare_item(log, lv, &len, &diff_iovecs); +		lv = next; +	} +	/* account for space used by new iovec headers  */ +	len += diff_iovecs * sizeof(xlog_op_header_t);  	ctx->nvecs += diff_iovecs;  	/* @@ -381,9 +416,7 @@ xlog_cil_push(  	struct xfs_cil_ctx	*new_ctx;  	struct xlog_in_core	*commit_iclog;  	struct xlog_ticket	*tic; -	int			num_lv;  	int			num_iovecs; -	int			len;  	int			error = 0;  	struct xfs_trans_header thdr;  	struct xfs_log_iovec	lhdr; @@ -428,12 +461,9 @@ xlog_cil_push(  	 * side which is currently locked out by the flush lock.  	 */  	lv = NULL; -	num_lv = 0;  	num_iovecs = 0; -	len = 0;  	while (!list_empty(&cil->xc_cil)) {  		struct xfs_log_item	*item; -		int			i;  		item = list_first_entry(&cil->xc_cil,  					struct xfs_log_item, li_cil); @@ -444,11 +474,7 @@ xlog_cil_push(  			lv->lv_next = item->li_lv;  		lv = item->li_lv;  		item->li_lv = NULL; - -		num_lv++;  		num_iovecs += lv->lv_niovecs; -		for (i = 0; i < lv->lv_niovecs; i++) -			len += lv->lv_iovecp[i].i_len;  	}  	/* @@ -701,6 +727,7 @@ xfs_log_commit_cil(  	if (commit_lsn)  		*commit_lsn = log->l_cilp->xc_ctx->sequence; +	/* xlog_cil_insert_items() destroys log_vector list */  	xlog_cil_insert_items(log, log_vector, tp->t_ticket);  	/* check we didn't blow the reservation */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 7cf5e4eafe28..7681b19aa5dc 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -45,6 +45,7 @@  #include "xfs_cksum.h"  #include "xfs_trace.h"  #include "xfs_icache.h" +#include "xfs_icreate_item.h"  /* Need all the magic numbers and buffer ops structures from these headers */  #include "xfs_symlink.h" @@ -1617,7 +1618,10 @@ xlog_recover_add_to_trans(   *	   form the cancelled buffer table. Hence they have tobe done last.   *   *	3. Inode allocation buffers must be replayed before inode items that - *	   read the buffer and replay changes into it. + *	   read the buffer and replay changes into it. For filesystems using the + *	   ICREATE transactions, this means XFS_LI_ICREATE objects need to get + *	   treated the same as inode allocation buffers as they create and + *	   initialise the buffers directly.   *   *	4. Inode unlink buffers must be replayed after inode items are replayed.   *	   This ensures that inodes are completely flushed to the inode buffer @@ -1632,10 +1636,17 @@ xlog_recover_add_to_trans(   * from all the other buffers and move them to last.   *   * Hence, 4 lists, in order from head to tail: - * 	- buffer_list for all buffers except cancelled/inode unlink buffers - * 	- item_list for all non-buffer items - * 	- inode_buffer_list for inode unlink buffers - * 	- cancel_list for the cancelled buffers + *	- buffer_list for all buffers except cancelled/inode unlink buffers + *	- item_list for all non-buffer items + *	- inode_buffer_list for inode unlink buffers + *	- cancel_list for the cancelled buffers + * + * Note that we add objects to the tail of the lists so that first-to-last + * ordering is preserved within the lists. Adding objects to the head of the + * list means when we traverse from the head we walk them in last-to-first + * order. For cancelled buffers and inode unlink buffers this doesn't matter, + * but for all other items there may be specific ordering that we need to + * preserve.   */  STATIC int  xlog_recover_reorder_trans( @@ -1655,6 +1666,9 @@ xlog_recover_reorder_trans(  		xfs_buf_log_format_t	*buf_f = item->ri_buf[0].i_addr;  		switch (ITEM_TYPE(item)) { +		case XFS_LI_ICREATE: +			list_move_tail(&item->ri_list, &buffer_list); +			break;  		case XFS_LI_BUF:  			if (buf_f->blf_flags & XFS_BLF_CANCEL) {  				trace_xfs_log_recover_item_reorder_head(log, @@ -2578,8 +2592,16 @@ xlog_recover_inode_pass2(  		goto error;  	} -	/* Skip replay when the on disk inode is newer than the log one */ -	if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { +	/* +	 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes +	 * are transactional and if ordering is necessary we can determine that +	 * more accurately by the LSN field in the V3 inode core. Don't trust +	 * the inode versions we might be changing them here - use the +	 * superblock flag to determine whether we need to look at di_flushiter +	 * to skip replay when the on disk inode is newer than the log one +	 */ +	if (!xfs_sb_version_hascrc(&mp->m_sb) && +	    dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {  		/*  		 * Deal with the wrap case, DI_MAX_FLUSH is less  		 * than smaller numbers @@ -2594,6 +2616,7 @@ xlog_recover_inode_pass2(  			goto error;  		}  	} +  	/* Take the opportunity to reset the flush iteration count */  	dicp->di_flushiter = 0; @@ -2982,6 +3005,93 @@ xlog_recover_efd_pass2(  }  /* + * This routine is called when an inode create format structure is found in a + * committed transaction in the log.  It's purpose is to initialise the inodes + * being allocated on disk. This requires us to get inode cluster buffers that + * match the range to be intialised, stamped with inode templates and written + * by delayed write so that subsequent modifications will hit the cached buffer + * and only need writing out at the end of recovery. + */ +STATIC int +xlog_recover_do_icreate_pass2( +	struct xlog		*log, +	struct list_head	*buffer_list, +	xlog_recover_item_t	*item) +{ +	struct xfs_mount	*mp = log->l_mp; +	struct xfs_icreate_log	*icl; +	xfs_agnumber_t		agno; +	xfs_agblock_t		agbno; +	unsigned int		count; +	unsigned int		isize; +	xfs_agblock_t		length; + +	icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr; +	if (icl->icl_type != XFS_LI_ICREATE) { +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type"); +		return EINVAL; +	} + +	if (icl->icl_size != 1) { +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size"); +		return EINVAL; +	} + +	agno = be32_to_cpu(icl->icl_ag); +	if (agno >= mp->m_sb.sb_agcount) { +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno"); +		return EINVAL; +	} +	agbno = be32_to_cpu(icl->icl_agbno); +	if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) { +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno"); +		return EINVAL; +	} +	isize = be32_to_cpu(icl->icl_isize); +	if (isize != mp->m_sb.sb_inodesize) { +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize"); +		return EINVAL; +	} +	count = be32_to_cpu(icl->icl_count); +	if (!count) { +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count"); +		return EINVAL; +	} +	length = be32_to_cpu(icl->icl_length); +	if (!length || length >= mp->m_sb.sb_agblocks) { +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length"); +		return EINVAL; +	} + +	/* existing allocation is fixed value */ +	ASSERT(count == XFS_IALLOC_INODES(mp)); +	ASSERT(length == XFS_IALLOC_BLOCKS(mp)); +	if (count != XFS_IALLOC_INODES(mp) || +	     length != XFS_IALLOC_BLOCKS(mp)) { +		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2"); +		return EINVAL; +	} + +	/* +	 * Inode buffers can be freed. Do not replay the inode initialisation as +	 * we could be overwriting something written after this inode buffer was +	 * cancelled. +	 * +	 * XXX: we need to iterate all buffers and only init those that are not +	 * cancelled. I think that a more fine grained factoring of +	 * xfs_ialloc_inode_init may be appropriate here to enable this to be +	 * done easily. +	 */ +	if (xlog_check_buffer_cancelled(log, +			XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0)) +		return 0; + +	xfs_ialloc_inode_init(mp, NULL, buffer_list, agno, agbno, length, +					be32_to_cpu(icl->icl_gen)); +	return 0; +} + +/*   * Free up any resources allocated by the transaction   *   * Remember that EFIs, EFDs, and IUNLINKs are handled later. @@ -3023,6 +3133,7 @@ xlog_recover_commit_pass1(  	case XFS_LI_EFI:  	case XFS_LI_EFD:  	case XFS_LI_DQUOT: +	case XFS_LI_ICREATE:  		/* nothing to do in pass 1 */  		return 0;  	default: @@ -3053,6 +3164,8 @@ xlog_recover_commit_pass2(  		return xlog_recover_efd_pass2(log, item);  	case XFS_LI_DQUOT:  		return xlog_recover_dquot_pass2(log, buffer_list, item); +	case XFS_LI_ICREATE: +		return xlog_recover_do_icreate_pass2(log, buffer_list, item);  	case XFS_LI_QUOTAOFF:  		/* nothing to do in pass2 */  		return 0; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index e8e310c05097..2b0ba3581656 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -336,6 +336,14 @@ xfs_mount_validate_sb(  		return XFS_ERROR(EWRONGFS);  	} +	if ((sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) && +			(sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | +				XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD))) { +		xfs_notice(mp, +"Super block has XFS_OQUOTA bits along with XFS_PQUOTA and/or XFS_GQUOTA bits.\n"); +		return XFS_ERROR(EFSCORRUPTED); +	} +  	/*  	 * Version 5 superblock feature mask validation. Reject combinations the  	 * kernel cannot support up front before checking anything else. For @@ -561,6 +569,18 @@ out_unwind:  	return error;  } +static void +xfs_sb_quota_from_disk(struct xfs_sb *sbp) +{ +	if (sbp->sb_qflags & XFS_OQUOTA_ENFD) +		sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ? +					XFS_PQUOTA_ENFD : XFS_GQUOTA_ENFD; +	if (sbp->sb_qflags & XFS_OQUOTA_CHKD) +		sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ? +					XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD; +	sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD); +} +  void  xfs_sb_from_disk(  	struct xfs_sb	*to, @@ -622,6 +642,35 @@ xfs_sb_from_disk(  	to->sb_lsn = be64_to_cpu(from->sb_lsn);  } +static inline void +xfs_sb_quota_to_disk( +	xfs_dsb_t	*to, +	xfs_sb_t	*from, +	__int64_t	*fields) +{ +	__uint16_t	qflags = from->sb_qflags; + +	if (*fields & XFS_SB_QFLAGS) { +		/* +		 * The in-core version of sb_qflags do not have +		 * XFS_OQUOTA_* flags, whereas the on-disk version +		 * does.  So, convert incore XFS_{PG}QUOTA_* flags +		 * to on-disk XFS_OQUOTA_* flags. +		 */ +		qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD | +				XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD); + +		if (from->sb_qflags & +				(XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD)) +			qflags |= XFS_OQUOTA_ENFD; +		if (from->sb_qflags & +				(XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) +			qflags |= XFS_OQUOTA_CHKD; +		to->sb_qflags = cpu_to_be16(qflags); +		*fields &= ~XFS_SB_QFLAGS; +	} +} +  /*   * Copy in core superblock to ondisk one.   * @@ -643,6 +692,7 @@ xfs_sb_to_disk(  	if (!fields)  		return; +	xfs_sb_quota_to_disk(to, from, &fields);  	while (fields) {  		f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);  		first = xfs_sb_info[f].offset; @@ -835,6 +885,7 @@ reread:  	 */  	xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); +	xfs_sb_quota_from_disk(&mp->m_sb);  	/*  	 * We must be able to do sector-sized and sector-aligned IO.  	 */ @@ -987,42 +1038,27 @@ xfs_update_alignment(xfs_mount_t *mp)  		 */  		if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||  		    (BBTOB(mp->m_swidth) & mp->m_blockmask)) { -			if (mp->m_flags & XFS_MOUNT_RETERR) { -				xfs_warn(mp, "alignment check failed: " -					 "(sunit/swidth vs. blocksize)"); -				return XFS_ERROR(EINVAL); -			} -			mp->m_dalign = mp->m_swidth = 0; +			xfs_warn(mp, +		"alignment check failed: sunit/swidth vs. blocksize(%d)", +				sbp->sb_blocksize); +			return XFS_ERROR(EINVAL);  		} else {  			/*  			 * Convert the stripe unit and width to FSBs.  			 */  			mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);  			if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { -				if (mp->m_flags & XFS_MOUNT_RETERR) { -					xfs_warn(mp, "alignment check failed: " -						 "(sunit/swidth vs. ag size)"); -					return XFS_ERROR(EINVAL); -				}  				xfs_warn(mp, -		"stripe alignment turned off: sunit(%d)/swidth(%d) " -		"incompatible with agsize(%d)", -					mp->m_dalign, mp->m_swidth, -					sbp->sb_agblocks); - -				mp->m_dalign = 0; -				mp->m_swidth = 0; +			"alignment check failed: sunit/swidth vs. agsize(%d)", +					 sbp->sb_agblocks); +				return XFS_ERROR(EINVAL);  			} else if (mp->m_dalign) {  				mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);  			} else { -				if (mp->m_flags & XFS_MOUNT_RETERR) { -					xfs_warn(mp, "alignment check failed: " -						"sunit(%d) less than bsize(%d)", -						mp->m_dalign, -						mp->m_blockmask +1); -					return XFS_ERROR(EINVAL); -				} -				mp->m_swidth = 0; +				xfs_warn(mp, +			"alignment check failed: sunit(%d) less than bsize(%d)", +					 mp->m_dalign, sbp->sb_blocksize); +				return XFS_ERROR(EINVAL);  			}  		} @@ -1039,6 +1075,10 @@ xfs_update_alignment(xfs_mount_t *mp)  				sbp->sb_width = mp->m_swidth;  				mp->m_update_flags |= XFS_SB_WIDTH;  			} +		} else { +			xfs_warn(mp, +	"cannot change alignment: superblock does not support data alignment"); +			return XFS_ERROR(EINVAL);  		}  	} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&  		    xfs_sb_version_hasdalign(&mp->m_sb)) { diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index b004cecdfb04..4e374d4a9189 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -192,8 +192,6 @@ typedef struct xfs_mount {  	xfs_dablk_t		m_dirleafblk;	/* blockno of dir non-data v2 */  	xfs_dablk_t		m_dirfreeblk;	/* blockno of dirfreeindex v2 */  	uint			m_chsize;	/* size of next field */ -	struct xfs_chash	*m_chash;	/* fs private inode per-cluster -						 * hash table */  	atomic_t		m_active_trans;	/* number trans frozen */  #ifdef HAVE_PERCPU_SB  	xfs_icsb_cnts_t __percpu *m_sb_cnts;	/* per-cpu superblock counters */ @@ -229,8 +227,6 @@ typedef struct xfs_mount {  						   operations, typically for  						   disk errors in metadata */  #define XFS_MOUNT_DISCARD	(1ULL << 5)	/* discard unused blocks */ -#define XFS_MOUNT_RETERR	(1ULL << 6)     /* return alignment errors to -						   user */  #define XFS_MOUNT_NOALIGN	(1ULL << 7)	/* turn off stripe alignment  						   allocations */  #define XFS_MOUNT_ATTR2		(1ULL << 8)	/* allow use of attr2 format */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index b75c9bb6e71e..d320794d03ce 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -70,7 +70,7 @@ xfs_qm_dquot_walk(  	void			*data)  {  	struct xfs_quotainfo	*qi = mp->m_quotainfo; -	struct radix_tree_root	*tree = XFS_DQUOT_TREE(qi, type); +	struct radix_tree_root	*tree = xfs_dquot_tree(qi, type);  	uint32_t		next_index;  	int			last_error = 0;  	int			skipped; @@ -137,6 +137,7 @@ xfs_qm_dqpurge(  	struct xfs_mount	*mp = dqp->q_mount;  	struct xfs_quotainfo	*qi = mp->m_quotainfo;  	struct xfs_dquot	*gdqp = NULL; +	struct xfs_dquot	*pdqp = NULL;  	xfs_dqlock(dqp);  	if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { @@ -145,8 +146,7 @@ xfs_qm_dqpurge(  	}  	/* -	 * If this quota has a group hint attached, prepare for releasing it -	 * now. +	 * If this quota has a hint attached, prepare for releasing it now.  	 */  	gdqp = dqp->q_gdquot;  	if (gdqp) { @@ -154,6 +154,12 @@ xfs_qm_dqpurge(  		dqp->q_gdquot = NULL;  	} +	pdqp = dqp->q_pdquot; +	if (pdqp) { +		xfs_dqlock(pdqp); +		dqp->q_pdquot = NULL; +	} +  	dqp->dq_flags |= XFS_DQ_FREEING;  	xfs_dqflock(dqp); @@ -189,7 +195,7 @@ xfs_qm_dqpurge(  	xfs_dqfunlock(dqp);  	xfs_dqunlock(dqp); -	radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), +	radix_tree_delete(xfs_dquot_tree(qi, dqp->q_core.d_flags),  			  be32_to_cpu(dqp->q_core.d_id));  	qi->qi_dquots--; @@ -208,6 +214,8 @@ xfs_qm_dqpurge(  	if (gdqp)  		xfs_qm_dqput(gdqp); +	if (pdqp) +		xfs_qm_dqput(pdqp);  	return 0;  } @@ -299,8 +307,10 @@ xfs_qm_mount_quotas(  	 */  	if (!XFS_IS_UQUOTA_ON(mp))  		mp->m_qflags &= ~XFS_UQUOTA_CHKD; -	if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) -		mp->m_qflags &= ~XFS_OQUOTA_CHKD; +	if (!XFS_IS_GQUOTA_ON(mp)) +		mp->m_qflags &= ~XFS_GQUOTA_CHKD; +	if (!XFS_IS_PQUOTA_ON(mp)) +		mp->m_qflags &= ~XFS_PQUOTA_CHKD;   write_changes:  	/* @@ -362,6 +372,10 @@ xfs_qm_unmount_quotas(  			IRELE(mp->m_quotainfo->qi_gquotaip);  			mp->m_quotainfo->qi_gquotaip = NULL;  		} +		if (mp->m_quotainfo->qi_pquotaip) { +			IRELE(mp->m_quotainfo->qi_pquotaip); +			mp->m_quotainfo->qi_pquotaip = NULL; +		}  	}  } @@ -408,7 +422,10 @@ xfs_qm_dqattach_one(  		 * be reclaimed as long as we have a ref from inode and we  		 * hold the ilock.  		 */ -		dqp = udqhint->q_gdquot; +		if (type == XFS_DQ_GROUP) +			dqp = udqhint->q_gdquot; +		else +			dqp = udqhint->q_pdquot;  		if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {  			ASSERT(*IO_idqpp == NULL); @@ -451,28 +468,42 @@ xfs_qm_dqattach_one(  /* - * Given a udquot and gdquot, attach a ptr to the group dquot in the - * udquot as a hint for future lookups. + * Given a udquot and group/project type, attach the group/project + * dquot pointer to the udquot as a hint for future lookups.   */  STATIC void -xfs_qm_dqattach_grouphint( -	xfs_dquot_t	*udq, -	xfs_dquot_t	*gdq) +xfs_qm_dqattach_hint( +	struct xfs_inode	*ip, +	int			type)  { -	xfs_dquot_t	*tmp; +	struct xfs_dquot **dqhintp; +	struct xfs_dquot *dqp; +	struct xfs_dquot *udq = ip->i_udquot; + +	ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);  	xfs_dqlock(udq); -	tmp = udq->q_gdquot; -	if (tmp) { -		if (tmp == gdq) +	if (type == XFS_DQ_GROUP) { +		dqp = ip->i_gdquot; +		dqhintp = &udq->q_gdquot; +	} else { +		dqp = ip->i_pdquot; +		dqhintp = &udq->q_pdquot; +	} + +	if (*dqhintp) { +		struct xfs_dquot *tmp; + +		if (*dqhintp == dqp)  			goto done; -		udq->q_gdquot = NULL; +		tmp = *dqhintp; +		*dqhintp = NULL;  		xfs_qm_dqrele(tmp);  	} -	udq->q_gdquot = xfs_qm_dqhold(gdq); +	*dqhintp = xfs_qm_dqhold(dqp);  done:  	xfs_dqunlock(udq);  } @@ -489,8 +520,7 @@ xfs_qm_need_dqattach(  		return false;  	if (!XFS_NOT_DQATTACHED(mp, ip))  		return false; -	if (ip->i_ino == mp->m_sb.sb_uquotino || -	    ip->i_ino == mp->m_sb.sb_gquotino) +	if (xfs_is_quota_inode(&mp->m_sb, ip->i_ino))  		return false;  	return true;  } @@ -526,12 +556,8 @@ xfs_qm_dqattach_locked(  	}  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); -	if (XFS_IS_OQUOTA_ON(mp)) { -		error = XFS_IS_GQUOTA_ON(mp) ? -			xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, -						flags & XFS_QMOPT_DQALLOC, -						ip->i_udquot, &ip->i_gdquot) : -			xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, +	if (XFS_IS_GQUOTA_ON(mp)) { +		error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,  						flags & XFS_QMOPT_DQALLOC,  						ip->i_udquot, &ip->i_gdquot);  		/* @@ -543,14 +569,28 @@ xfs_qm_dqattach_locked(  		nquotas++;  	} +	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); +	if (XFS_IS_PQUOTA_ON(mp)) { +		error = xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, +						flags & XFS_QMOPT_DQALLOC, +						ip->i_udquot, &ip->i_pdquot); +		/* +		 * Don't worry about the udquot that we may have +		 * attached above. It'll get detached, if not already. +		 */ +		if (error) +			goto done; +		nquotas++; +	} +  	/* -	 * Attach this group quota to the user quota as a hint. +	 * Attach this group/project quota to the user quota as a hint.  	 * This WON'T, in general, result in a thrash.  	 */ -	if (nquotas == 2) { +	if (nquotas > 1 && ip->i_udquot) {  		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); -		ASSERT(ip->i_udquot); -		ASSERT(ip->i_gdquot); +		ASSERT(ip->i_gdquot || !XFS_IS_GQUOTA_ON(mp)); +		ASSERT(ip->i_pdquot || !XFS_IS_PQUOTA_ON(mp));  		/*  		 * We do not have i_udquot locked at this point, but this check @@ -559,7 +599,10 @@ xfs_qm_dqattach_locked(  		 * succeed in general.  		 */  		if (ip->i_udquot->q_gdquot != ip->i_gdquot) -			xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot); +			xfs_qm_dqattach_hint(ip, XFS_DQ_GROUP); + +		if (ip->i_udquot->q_pdquot != ip->i_pdquot) +			xfs_qm_dqattach_hint(ip, XFS_DQ_PROJ);  	}   done: @@ -567,8 +610,10 @@ xfs_qm_dqattach_locked(  	if (!error) {  		if (XFS_IS_UQUOTA_ON(mp))  			ASSERT(ip->i_udquot); -		if (XFS_IS_OQUOTA_ON(mp)) +		if (XFS_IS_GQUOTA_ON(mp))  			ASSERT(ip->i_gdquot); +		if (XFS_IS_PQUOTA_ON(mp)) +			ASSERT(ip->i_pdquot);  	}  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));  #endif @@ -601,13 +646,12 @@ void  xfs_qm_dqdetach(  	xfs_inode_t	*ip)  { -	if (!(ip->i_udquot || ip->i_gdquot)) +	if (!(ip->i_udquot || ip->i_gdquot || ip->i_pdquot))  		return;  	trace_xfs_dquot_dqdetach(ip); -	ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino); -	ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino); +	ASSERT(!xfs_is_quota_inode(&ip->i_mount->m_sb, ip->i_ino));  	if (ip->i_udquot) {  		xfs_qm_dqrele(ip->i_udquot);  		ip->i_udquot = NULL; @@ -616,6 +660,10 @@ xfs_qm_dqdetach(  		xfs_qm_dqrele(ip->i_gdquot);  		ip->i_gdquot = NULL;  	} +	if (ip->i_pdquot) { +		xfs_qm_dqrele(ip->i_pdquot); +		ip->i_pdquot = NULL; +	}  }  int @@ -660,6 +708,7 @@ xfs_qm_init_quotainfo(  	INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS);  	INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS); +	INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_NOFS);  	mutex_init(&qinf->qi_tree_lock);  	INIT_LIST_HEAD(&qinf->qi_lru_list); @@ -761,6 +810,10 @@ xfs_qm_destroy_quotainfo(  		IRELE(qi->qi_gquotaip);  		qi->qi_gquotaip = NULL;  	} +	if (qi->qi_pquotaip) { +		IRELE(qi->qi_pquotaip); +		qi->qi_pquotaip = NULL; +	}  	mutex_destroy(&qi->qi_quotaofflock);  	kmem_free(qi);  	mp->m_quotainfo = NULL; @@ -1152,7 +1205,7 @@ xfs_qm_dqusage_adjust(  	 * rootino must have its resources accounted for, not so with the quota  	 * inodes.  	 */ -	if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { +	if (xfs_is_quota_inode(&mp->m_sb, ino)) {  		*res = BULKSTAT_RV_NOTHING;  		return XFS_ERROR(EINVAL);  	} @@ -1262,19 +1315,21 @@ int  xfs_qm_quotacheck(  	xfs_mount_t	*mp)  { -	int		done, count, error, error2; -	xfs_ino_t	lastino; -	size_t		structsz; -	xfs_inode_t	*uip, *gip; -	uint		flags; -	LIST_HEAD	(buffer_list); +	int			done, count, error, error2; +	xfs_ino_t		lastino; +	size_t			structsz; +	uint			flags; +	LIST_HEAD		(buffer_list); +	struct xfs_inode	*uip = mp->m_quotainfo->qi_uquotaip; +	struct xfs_inode	*gip = mp->m_quotainfo->qi_gquotaip; +	struct xfs_inode	*pip = mp->m_quotainfo->qi_pquotaip;  	count = INT_MAX;  	structsz = 1;  	lastino = 0;  	flags = 0; -	ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip); +	ASSERT(uip || gip || pip);  	ASSERT(XFS_IS_QUOTA_RUNNING(mp));  	xfs_notice(mp, "Quotacheck needed: Please wait."); @@ -1284,7 +1339,6 @@ xfs_qm_quotacheck(  	 * their counters to zero. We need a clean slate.  	 * We don't log our changes till later.  	 */ -	uip = mp->m_quotainfo->qi_uquotaip;  	if (uip) {  		error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA,  					 &buffer_list); @@ -1293,14 +1347,20 @@ xfs_qm_quotacheck(  		flags |= XFS_UQUOTA_CHKD;  	} -	gip = mp->m_quotainfo->qi_gquotaip;  	if (gip) { -		error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? -					 XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA, +		error = xfs_qm_dqiterate(mp, gip, XFS_QMOPT_GQUOTA,  					 &buffer_list);  		if (error)  			goto error_return; -		flags |= XFS_OQUOTA_CHKD; +		flags |= XFS_GQUOTA_CHKD; +	} + +	if (pip) { +		error = xfs_qm_dqiterate(mp, pip, XFS_QMOPT_PQUOTA, +					 &buffer_list); +		if (error) +			goto error_return; +		flags |= XFS_PQUOTA_CHKD;  	}  	do { @@ -1395,15 +1455,14 @@ STATIC int  xfs_qm_init_quotainos(  	xfs_mount_t	*mp)  { -	xfs_inode_t	*uip, *gip; -	int		error; -	__int64_t	sbflags; -	uint		flags; +	struct xfs_inode	*uip = NULL; +	struct xfs_inode	*gip = NULL; +	struct xfs_inode	*pip = NULL; +	int			error; +	__int64_t		sbflags = 0; +	uint			flags = 0;  	ASSERT(mp->m_quotainfo); -	uip = gip = NULL; -	sbflags = 0; -	flags = 0;  	/*  	 * Get the uquota and gquota inodes @@ -1412,19 +1471,27 @@ xfs_qm_init_quotainos(  		if (XFS_IS_UQUOTA_ON(mp) &&  		    mp->m_sb.sb_uquotino != NULLFSINO) {  			ASSERT(mp->m_sb.sb_uquotino > 0); -			if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, -					     0, 0, &uip))) +			error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, +					     0, 0, &uip); +			if (error)  				return XFS_ERROR(error);  		} -		if (XFS_IS_OQUOTA_ON(mp) && +		if (XFS_IS_GQUOTA_ON(mp) &&  		    mp->m_sb.sb_gquotino != NULLFSINO) {  			ASSERT(mp->m_sb.sb_gquotino > 0); -			if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, -					     0, 0, &gip))) { -				if (uip) -					IRELE(uip); -				return XFS_ERROR(error); -			} +			error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, +					     0, 0, &gip); +			if (error) +				goto error_rele; +		} +		/* XXX: Use gquotino for now */ +		if (XFS_IS_PQUOTA_ON(mp) && +		    mp->m_sb.sb_gquotino != NULLFSINO) { +			ASSERT(mp->m_sb.sb_gquotino > 0); +			error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, +					     0, 0, &pip); +			if (error) +				goto error_rele;  		}  	} else {  		flags |= XFS_QMOPT_SBVERSION; @@ -1433,36 +1500,52 @@ xfs_qm_init_quotainos(  	}  	/* -	 * Create the two inodes, if they don't exist already. The changes +	 * Create the three inodes, if they don't exist already. The changes  	 * made above will get added to a transaction and logged in one of  	 * the qino_alloc calls below.  If the device is readonly,  	 * temporarily switch to read-write to do this.  	 */  	if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) { -		if ((error = xfs_qm_qino_alloc(mp, &uip, +		error = xfs_qm_qino_alloc(mp, &uip,  					      sbflags | XFS_SB_UQUOTINO, -					      flags | XFS_QMOPT_UQUOTA))) -			return XFS_ERROR(error); +					      flags | XFS_QMOPT_UQUOTA); +		if (error) +			goto error_rele;  		flags &= ~XFS_QMOPT_SBVERSION;  	} -	if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) { -		flags |= (XFS_IS_GQUOTA_ON(mp) ? -				XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); +	if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) {  		error = xfs_qm_qino_alloc(mp, &gip, -					  sbflags | XFS_SB_GQUOTINO, flags); -		if (error) { -			if (uip) -				IRELE(uip); +					  sbflags | XFS_SB_GQUOTINO, +					  flags | XFS_QMOPT_GQUOTA); +		if (error) +			goto error_rele; -			return XFS_ERROR(error); -		} +		flags &= ~XFS_QMOPT_SBVERSION; +	} +	if (XFS_IS_PQUOTA_ON(mp) && pip == NULL) { +		/* XXX: Use XFS_SB_GQUOTINO for now */ +		error = xfs_qm_qino_alloc(mp, &pip, +					  sbflags | XFS_SB_GQUOTINO, +					  flags | XFS_QMOPT_PQUOTA); +		if (error) +			goto error_rele;  	}  	mp->m_quotainfo->qi_uquotaip = uip;  	mp->m_quotainfo->qi_gquotaip = gip; +	mp->m_quotainfo->qi_pquotaip = pip;  	return 0; + +error_rele: +	if (uip) +		IRELE(uip); +	if (gip) +		IRELE(gip); +	if (pip) +		IRELE(pip); +	return XFS_ERROR(error);  }  STATIC void @@ -1473,7 +1556,7 @@ xfs_qm_dqfree_one(  	struct xfs_quotainfo	*qi = mp->m_quotainfo;  	mutex_lock(&qi->qi_tree_lock); -	radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags), +	radix_tree_delete(xfs_dquot_tree(qi, dqp->q_core.d_flags),  			  be32_to_cpu(dqp->q_core.d_id));  	qi->qi_dquots--; @@ -1656,10 +1739,13 @@ xfs_qm_vop_dqalloc(  	prid_t			prid,  	uint			flags,  	struct xfs_dquot	**O_udqpp, -	struct xfs_dquot	**O_gdqpp) +	struct xfs_dquot	**O_gdqpp, +	struct xfs_dquot	**O_pdqpp)  {  	struct xfs_mount	*mp = ip->i_mount; -	struct xfs_dquot	*uq, *gq; +	struct xfs_dquot	*uq = NULL; +	struct xfs_dquot	*gq = NULL; +	struct xfs_dquot	*pq = NULL;  	int			error;  	uint			lockflags; @@ -1684,7 +1770,6 @@ xfs_qm_vop_dqalloc(  		}  	} -	uq = gq = NULL;  	if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {  		if (ip->i_d.di_uid != uid) {  			/* @@ -1697,11 +1782,12 @@ xfs_qm_vop_dqalloc(  			 * holding ilock.  			 */  			xfs_iunlock(ip, lockflags); -			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid, +			error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,  						 XFS_DQ_USER,  						 XFS_QMOPT_DQALLOC |  						 XFS_QMOPT_DOWARN, -						 &uq))) { +						 &uq); +			if (error) {  				ASSERT(error != ENOENT);  				return error;  			} @@ -1723,15 +1809,14 @@ xfs_qm_vop_dqalloc(  	if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {  		if (ip->i_d.di_gid != gid) {  			xfs_iunlock(ip, lockflags); -			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid, +			error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,  						 XFS_DQ_GROUP,  						 XFS_QMOPT_DQALLOC |  						 XFS_QMOPT_DOWARN, -						 &gq))) { -				if (uq) -					xfs_qm_dqrele(uq); +						 &gq); +			if (error) {  				ASSERT(error != ENOENT); -				return error; +				goto error_rele;  			}  			xfs_dqunlock(gq);  			lockflags = XFS_ILOCK_SHARED; @@ -1740,25 +1825,25 @@ xfs_qm_vop_dqalloc(  			ASSERT(ip->i_gdquot);  			gq = xfs_qm_dqhold(ip->i_gdquot);  		} -	} else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { +	} +	if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {  		if (xfs_get_projid(ip) != prid) {  			xfs_iunlock(ip, lockflags); -			if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, +			error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,  						 XFS_DQ_PROJ,  						 XFS_QMOPT_DQALLOC |  						 XFS_QMOPT_DOWARN, -						 &gq))) { -				if (uq) -					xfs_qm_dqrele(uq); +						 &pq); +			if (error) {  				ASSERT(error != ENOENT); -				return (error); +				goto error_rele;  			} -			xfs_dqunlock(gq); +			xfs_dqunlock(pq);  			lockflags = XFS_ILOCK_SHARED;  			xfs_ilock(ip, lockflags);  		} else { -			ASSERT(ip->i_gdquot); -			gq = xfs_qm_dqhold(ip->i_gdquot); +			ASSERT(ip->i_pdquot); +			pq = xfs_qm_dqhold(ip->i_pdquot);  		}  	}  	if (uq) @@ -1773,7 +1858,18 @@ xfs_qm_vop_dqalloc(  		*O_gdqpp = gq;  	else if (gq)  		xfs_qm_dqrele(gq); +	if (O_pdqpp) +		*O_pdqpp = pq; +	else if (pq) +		xfs_qm_dqrele(pq);  	return 0; + +error_rele: +	if (gq) +		xfs_qm_dqrele(gq); +	if (uq) +		xfs_qm_dqrele(uq); +	return error;  }  /* @@ -1821,29 +1917,34 @@ xfs_qm_vop_chown(   */  int  xfs_qm_vop_chown_reserve( -	xfs_trans_t	*tp, -	xfs_inode_t	*ip, -	xfs_dquot_t	*udqp, -	xfs_dquot_t	*gdqp, -	uint		flags) +	struct xfs_trans	*tp, +	struct xfs_inode	*ip, +	struct xfs_dquot	*udqp, +	struct xfs_dquot	*gdqp, +	struct xfs_dquot	*pdqp, +	uint			flags)  { -	xfs_mount_t	*mp = ip->i_mount; -	uint		delblks, blkflags, prjflags = 0; -	xfs_dquot_t	*unresudq, *unresgdq, *delblksudq, *delblksgdq; -	int		error; +	struct xfs_mount	*mp = ip->i_mount; +	uint			delblks, blkflags, prjflags = 0; +	struct xfs_dquot	*udq_unres = NULL; +	struct xfs_dquot	*gdq_unres = NULL; +	struct xfs_dquot	*pdq_unres = NULL; +	struct xfs_dquot	*udq_delblks = NULL; +	struct xfs_dquot	*gdq_delblks = NULL; +	struct xfs_dquot	*pdq_delblks = NULL; +	int			error;  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));  	ASSERT(XFS_IS_QUOTA_RUNNING(mp));  	delblks = ip->i_delayed_blks; -	delblksudq = delblksgdq = unresudq = unresgdq = NULL;  	blkflags = XFS_IS_REALTIME_INODE(ip) ?  			XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;  	if (XFS_IS_UQUOTA_ON(mp) && udqp &&  	    ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) { -		delblksudq = udqp; +		udq_delblks = udqp;  		/*  		 * If there are delayed allocation blocks, then we have to  		 * unreserve those from the old dquot, and add them to the @@ -1851,29 +1952,34 @@ xfs_qm_vop_chown_reserve(  		 */  		if (delblks) {  			ASSERT(ip->i_udquot); -			unresudq = ip->i_udquot; +			udq_unres = ip->i_udquot; +		} +	} +	if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp && +	    ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id)) { +		gdq_delblks = gdqp; +		if (delblks) { +			ASSERT(ip->i_gdquot); +			gdq_unres = ip->i_gdquot;  		}  	} -	if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { -		if (XFS_IS_PQUOTA_ON(ip->i_mount) && -		     xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id)) -			prjflags = XFS_QMOPT_ENOSPC; -		if (prjflags || -		    (XFS_IS_GQUOTA_ON(ip->i_mount) && -		     ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) { -			delblksgdq = gdqp; -			if (delblks) { -				ASSERT(ip->i_gdquot); -				unresgdq = ip->i_gdquot; -			} +	if (XFS_IS_PQUOTA_ON(ip->i_mount) && pdqp && +	    xfs_get_projid(ip) != be32_to_cpu(pdqp->q_core.d_id)) { +		prjflags = XFS_QMOPT_ENOSPC; +		pdq_delblks = pdqp; +		if (delblks) { +			ASSERT(ip->i_pdquot); +			pdq_unres = ip->i_pdquot;  		}  	} -	if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, -				delblksudq, delblksgdq, ip->i_d.di_nblocks, 1, -				flags | blkflags | prjflags))) -		return (error); +	error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, +				udq_delblks, gdq_delblks, pdq_delblks, +				ip->i_d.di_nblocks, 1, +				flags | blkflags | prjflags); +	if (error) +		return error;  	/*  	 * Do the delayed blks reservations/unreservations now. Since, these @@ -1885,15 +1991,17 @@ xfs_qm_vop_chown_reserve(  		/*  		 * Do the reservations first. Unreservation can't fail.  		 */ -		ASSERT(delblksudq || delblksgdq); -		ASSERT(unresudq || unresgdq); -		if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, -				delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0, -				flags | blkflags | prjflags))) -			return (error); +		ASSERT(udq_delblks || gdq_delblks || pdq_delblks); +		ASSERT(udq_unres || gdq_unres || pdq_unres); +		error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, +			    udq_delblks, gdq_delblks, pdq_delblks, +			    (xfs_qcnt_t)delblks, 0, +			    flags | blkflags | prjflags); +		if (error) +			return error;  		xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, -				unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0, -				blkflags); +				udq_unres, gdq_unres, pdq_unres, +				-((xfs_qcnt_t)delblks), 0, blkflags);  	}  	return (0); @@ -1932,7 +2040,8 @@ xfs_qm_vop_create_dqattach(  	struct xfs_trans	*tp,  	struct xfs_inode	*ip,  	struct xfs_dquot	*udqp, -	struct xfs_dquot	*gdqp) +	struct xfs_dquot	*gdqp, +	struct xfs_dquot	*pdqp)  {  	struct xfs_mount	*mp = tp->t_mountp; @@ -1952,13 +2061,18 @@ xfs_qm_vop_create_dqattach(  	}  	if (gdqp) {  		ASSERT(ip->i_gdquot == NULL); -		ASSERT(XFS_IS_OQUOTA_ON(mp)); -		ASSERT((XFS_IS_GQUOTA_ON(mp) ? -			ip->i_d.di_gid : xfs_get_projid(ip)) == -				be32_to_cpu(gdqp->q_core.d_id)); - +		ASSERT(XFS_IS_GQUOTA_ON(mp)); +		ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id));  		ip->i_gdquot = xfs_qm_dqhold(gdqp);  		xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);  	} +	if (pdqp) { +		ASSERT(ip->i_pdquot == NULL); +		ASSERT(XFS_IS_PQUOTA_ON(mp)); +		ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id)); + +		ip->i_pdquot = xfs_qm_dqhold(pdqp); +		xfs_trans_mod_dquot(tp, pdqp, XFS_TRANS_DQ_ICOUNT, 1); +	}  } diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 5d16a6e6900f..579d6a02a5b6 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -44,9 +44,11 @@ extern struct kmem_zone	*xfs_qm_dqtrxzone;  typedef struct xfs_quotainfo {  	struct radix_tree_root qi_uquota_tree;  	struct radix_tree_root qi_gquota_tree; +	struct radix_tree_root qi_pquota_tree;  	struct mutex qi_tree_lock; -	xfs_inode_t	*qi_uquotaip;	 /* user quota inode */ -	xfs_inode_t	*qi_gquotaip;	 /* group quota inode */ +	struct xfs_inode	*qi_uquotaip;	/* user quota inode */ +	struct xfs_inode	*qi_gquotaip;	/* group quota inode */ +	struct xfs_inode	*qi_pquotaip;	/* project quota inode */  	struct list_head qi_lru_list;  	struct mutex	 qi_lru_lock;  	int		 qi_lru_count; @@ -69,30 +71,66 @@ typedef struct xfs_quotainfo {  	struct shrinker  qi_shrinker;  } xfs_quotainfo_t; -#define XFS_DQUOT_TREE(qi, type) \ -	((type & XFS_DQ_USER) ? \ -	 &((qi)->qi_uquota_tree) : \ -	 &((qi)->qi_gquota_tree)) +static inline struct radix_tree_root * +xfs_dquot_tree( +	struct xfs_quotainfo	*qi, +	int			type) +{ +	switch (type) { +	case XFS_DQ_USER: +		return &qi->qi_uquota_tree; +	case XFS_DQ_GROUP: +		return &qi->qi_gquota_tree; +	case XFS_DQ_PROJ: +		return &qi->qi_pquota_tree; +	default: +		ASSERT(0); +	} +	return NULL; +} +static inline struct xfs_inode * +xfs_dq_to_quota_inode(struct xfs_dquot *dqp) +{ +	switch (dqp->dq_flags & XFS_DQ_ALLTYPES) { +	case XFS_DQ_USER: +		return dqp->q_mount->m_quotainfo->qi_uquotaip; +	case XFS_DQ_GROUP: +		return dqp->q_mount->m_quotainfo->qi_gquotaip; +	case XFS_DQ_PROJ: +		return dqp->q_mount->m_quotainfo->qi_pquotaip; +	default: +		ASSERT(0); +	} +	return NULL; +}  extern int	xfs_qm_calc_dquots_per_chunk(struct xfs_mount *mp,  					     unsigned int nbblks); -extern void	xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); -extern int	xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, -			xfs_dquot_t *, xfs_dquot_t *, long, long, uint); -extern void	xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *); -extern void	xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *); +extern void	xfs_trans_mod_dquot(struct xfs_trans *, +					struct xfs_dquot *, uint, long); +extern int	xfs_trans_reserve_quota_bydquots(struct xfs_trans *, +			struct xfs_mount *, struct xfs_dquot *, +			struct xfs_dquot *, struct xfs_dquot *, +			long, long, uint); +extern void	xfs_trans_dqjoin(struct xfs_trans *, struct xfs_dquot *); +extern void	xfs_trans_log_dquot(struct xfs_trans *, struct xfs_dquot *);  /* - * We keep the usr and grp dquots separately so that locking will be easier - * to do at commit time. All transactions that we know of at this point + * We keep the usr, grp, and prj dquots separately so that locking will be + * easier to do at commit time. All transactions that we know of at this point   * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value.   */ +enum { +	XFS_QM_TRANS_USR = 0, +	XFS_QM_TRANS_GRP, +	XFS_QM_TRANS_PRJ, +	XFS_QM_TRANS_DQTYPES +};  #define XFS_QM_TRANS_MAXDQS		2 -typedef struct xfs_dquot_acct { -	xfs_dqtrx_t	dqa_usrdquots[XFS_QM_TRANS_MAXDQS]; -	xfs_dqtrx_t	dqa_grpdquots[XFS_QM_TRANS_MAXDQS]; -} xfs_dquot_acct_t; +struct xfs_dquot_acct { +	struct xfs_dqtrx	dqs[XFS_QM_TRANS_DQTYPES][XFS_QM_TRANS_MAXDQS]; +};  /*   * Users are allowed to have a usage exceeding their softlimit for @@ -106,22 +144,23 @@ typedef struct xfs_dquot_acct {  #define XFS_QM_IWARNLIMIT	5  #define XFS_QM_RTBWARNLIMIT	5 -extern void		xfs_qm_destroy_quotainfo(xfs_mount_t *); -extern int		xfs_qm_quotacheck(xfs_mount_t *); -extern int		xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); +extern void		xfs_qm_destroy_quotainfo(struct xfs_mount *); +extern int		xfs_qm_quotacheck(struct xfs_mount *); +extern int		xfs_qm_write_sb_changes(struct xfs_mount *, __int64_t);  /* dquot stuff */ -extern void		xfs_qm_dqpurge_all(xfs_mount_t *, uint); -extern void		xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); +extern void		xfs_qm_dqpurge_all(struct xfs_mount *, uint); +extern void		xfs_qm_dqrele_all_inodes(struct xfs_mount *, uint);  /* quota ops */ -extern int		xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint); -extern int		xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint, -					fs_disk_quota_t *); +extern int		xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint); +extern int		xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t, +					uint, struct fs_disk_quota *);  extern int		xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint, -					fs_disk_quota_t *); -extern int		xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); -extern int		xfs_qm_scall_quotaon(xfs_mount_t *, uint); -extern int		xfs_qm_scall_quotaoff(xfs_mount_t *, uint); +					struct fs_disk_quota *); +extern int		xfs_qm_scall_getqstat(struct xfs_mount *, +					struct fs_quota_stat *); +extern int		xfs_qm_scall_quotaon(struct xfs_mount *, uint); +extern int		xfs_qm_scall_quotaoff(struct xfs_mount *, uint);  #endif /* __XFS_QM_H__ */ diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index 2d02eac1c9a8..437a52d91f6d 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c @@ -112,16 +112,16 @@ xfs_qm_newmount(  	if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) ||  	    (!uquotaondisk &&  XFS_IS_UQUOTA_ON(mp)) || -	     (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) || -	    (!pquotaondisk &&  XFS_IS_PQUOTA_ON(mp)) ||  	     (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || -	    (!gquotaondisk &&  XFS_IS_OQUOTA_ON(mp)))  && +	    (!gquotaondisk &&  XFS_IS_GQUOTA_ON(mp)) || +	     (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) || +	    (!pquotaondisk &&  XFS_IS_PQUOTA_ON(mp)))  &&  	    xfs_dev_is_read_only(mp, "changing quota state")) {  		xfs_warn(mp, "please mount with%s%s%s%s.",  			(!quotaondisk ? "out quota" : ""),  			(uquotaondisk ? " usrquota" : ""), -			(pquotaondisk ? " prjquota" : ""), -			(gquotaondisk ? " grpquota" : "")); +			(gquotaondisk ? " grpquota" : ""), +			(pquotaondisk ? " prjquota" : ""));  		return XFS_ERROR(EPERM);  	} diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 6cdf6ffc36a1..e4f8b2d6f38b 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -117,11 +117,12 @@ xfs_qm_scall_quotaoff(  	}  	if (flags & XFS_GQUOTA_ACCT) {  		dqtype |= XFS_QMOPT_GQUOTA; -		flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); +		flags |= (XFS_GQUOTA_CHKD | XFS_GQUOTA_ENFD);  		inactivate_flags |= XFS_GQUOTA_ACTIVE; -	} else if (flags & XFS_PQUOTA_ACCT) { +	} +	if (flags & XFS_PQUOTA_ACCT) {  		dqtype |= XFS_QMOPT_PQUOTA; -		flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); +		flags |= (XFS_PQUOTA_CHKD | XFS_PQUOTA_ENFD);  		inactivate_flags |= XFS_PQUOTA_ACTIVE;  	} @@ -198,10 +199,9 @@ xfs_qm_scall_quotaoff(  	}  	/* -	 * If quotas is completely disabled, close shop. +	 * If all quotas are completely turned off, close shop.  	 */ -	if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) || -	    ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) { +	if (mp->m_qflags == 0) {  		mutex_unlock(&q->qi_quotaofflock);  		xfs_qm_destroy_quotainfo(mp);  		return (0); @@ -214,10 +214,14 @@ xfs_qm_scall_quotaoff(  		IRELE(q->qi_uquotaip);  		q->qi_uquotaip = NULL;  	} -	if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) { +	if ((dqtype & XFS_QMOPT_GQUOTA) && q->qi_gquotaip) {  		IRELE(q->qi_gquotaip);  		q->qi_gquotaip = NULL;  	} +	if ((dqtype & XFS_QMOPT_PQUOTA) && q->qi_pquotaip) { +		IRELE(q->qi_pquotaip); +		q->qi_pquotaip = NULL; +	}  out_unlock:  	mutex_unlock(&q->qi_quotaofflock); @@ -335,14 +339,14 @@ xfs_qm_scall_quotaon(  	 * quota acct on ondisk without m_qflags' knowing.  	 */  	if (((flags & XFS_UQUOTA_ACCT) == 0 && -	    (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 && -	    (flags & XFS_UQUOTA_ENFD)) -	    || +	     (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 && +	     (flags & XFS_UQUOTA_ENFD)) || +	    ((flags & XFS_GQUOTA_ACCT) == 0 && +	     (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && +	     (flags & XFS_GQUOTA_ENFD)) ||  	    ((flags & XFS_PQUOTA_ACCT) == 0 && -	    (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && -	    (flags & XFS_GQUOTA_ACCT) == 0 && -	    (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && -	    (flags & XFS_OQUOTA_ENFD))) { +	     (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && +	     (flags & XFS_PQUOTA_ENFD))) {  		xfs_debug(mp,  			"%s: Can't enforce without acct, flags=%x sbflags=%x\n",  			__func__, flags, mp->m_sb.sb_qflags); @@ -407,11 +411,11 @@ xfs_qm_scall_getqstat(  	struct fs_quota_stat	*out)  {  	struct xfs_quotainfo	*q = mp->m_quotainfo; -	struct xfs_inode	*uip, *gip; -	bool                    tempuqip, tempgqip; +	struct xfs_inode	*uip = NULL; +	struct xfs_inode	*gip = NULL; +	bool                    tempuqip = false; +	bool                    tempgqip = false; -	uip = gip = NULL; -	tempuqip = tempgqip = false;  	memset(out, 0, sizeof(fs_quota_stat_t));  	out->qs_version = FS_QSTAT_VERSION; @@ -776,9 +780,12 @@ xfs_qm_scall_getquota(  	 * gets turned off. No need to confuse the user level code,  	 * so return zeroes in that case.  	 */ -	if ((!XFS_IS_UQUOTA_ENFORCED(mp) && dqp->q_core.d_flags == XFS_DQ_USER) || -	    (!XFS_IS_OQUOTA_ENFORCED(mp) && -			(dqp->q_core.d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) { +	if ((!XFS_IS_UQUOTA_ENFORCED(mp) && +	     dqp->q_core.d_flags == XFS_DQ_USER) || +	    (!XFS_IS_GQUOTA_ENFORCED(mp) && +	     dqp->q_core.d_flags == XFS_DQ_GROUP) || +	    (!XFS_IS_PQUOTA_ENFORCED(mp) && +	     dqp->q_core.d_flags == XFS_DQ_PROJ)) {  		dst->d_btimer = 0;  		dst->d_itimer = 0;  		dst->d_rtbtimer = 0; @@ -786,8 +793,8 @@ xfs_qm_scall_getquota(  #ifdef DEBUG  	if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) || -	     (XFS_IS_OQUOTA_ENFORCED(mp) && -			(dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) && +	     (XFS_IS_GQUOTA_ENFORCED(mp) && dst->d_flags == FS_GROUP_QUOTA) || +	     (XFS_IS_PQUOTA_ENFORCED(mp) && dst->d_flags == FS_PROJ_QUOTA)) &&  	    dst->d_id != 0) {  		if ((dst->d_bcount > dst->d_blk_softlimit) &&  		    (dst->d_blk_softlimit > 0)) { @@ -833,16 +840,16 @@ xfs_qm_export_flags(  	uflags = 0;  	if (flags & XFS_UQUOTA_ACCT)  		uflags |= FS_QUOTA_UDQ_ACCT; -	if (flags & XFS_PQUOTA_ACCT) -		uflags |= FS_QUOTA_PDQ_ACCT;  	if (flags & XFS_GQUOTA_ACCT)  		uflags |= FS_QUOTA_GDQ_ACCT; +	if (flags & XFS_PQUOTA_ACCT) +		uflags |= FS_QUOTA_PDQ_ACCT;  	if (flags & XFS_UQUOTA_ENFD)  		uflags |= FS_QUOTA_UDQ_ENFD; -	if (flags & (XFS_OQUOTA_ENFD)) { -		uflags |= (flags & XFS_GQUOTA_ACCT) ? -			FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD; -	} +	if (flags & XFS_GQUOTA_ENFD) +		uflags |= FS_QUOTA_GDQ_ENFD; +	if (flags & XFS_PQUOTA_ENFD) +		uflags |= FS_QUOTA_PDQ_ENFD;  	return (uflags);  } @@ -856,9 +863,11 @@ xfs_dqrele_inode(  {  	/* skip quota inodes */  	if (ip == ip->i_mount->m_quotainfo->qi_uquotaip || -	    ip == ip->i_mount->m_quotainfo->qi_gquotaip) { +	    ip == ip->i_mount->m_quotainfo->qi_gquotaip || +	    ip == ip->i_mount->m_quotainfo->qi_pquotaip) {  		ASSERT(ip->i_udquot == NULL);  		ASSERT(ip->i_gdquot == NULL); +		ASSERT(ip->i_pdquot == NULL);  		return 0;  	} @@ -867,10 +876,14 @@ xfs_dqrele_inode(  		xfs_qm_dqrele(ip->i_udquot);  		ip->i_udquot = NULL;  	} -	if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) { +	if ((flags & XFS_GQUOTA_ACCT) && ip->i_gdquot) {  		xfs_qm_dqrele(ip->i_gdquot);  		ip->i_gdquot = NULL;  	} +	if ((flags & XFS_PQUOTA_ACCT) && ip->i_pdquot) { +		xfs_qm_dqrele(ip->i_pdquot); +		ip->i_pdquot = NULL; +	}  	xfs_iunlock(ip, XFS_ILOCK_EXCL);  	return 0;  } diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index c38068f26c55..b14f42c714b6 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -108,11 +108,28 @@ typedef struct xfs_dqblk {  	{ XFS_DQ_FREEING,	"FREEING" }  /* - * In the worst case, when both user and group quotas are on, - * we can have a max of three dquots changing in a single transaction. + * We have the possibility of all three quota types being active at once, and + * hence free space modification requires modification of all three current + * dquots in a single transaction. For this case we need to have a reservation + * of at least 3 dquots. + * + * However, a chmod operation can change both UID and GID in a single + * transaction, resulting in requiring {old, new} x {uid, gid} dquots to be + * modified. Hence for this case we need to reserve space for at least 4 dquots. + * + * And in the worst case, there's a rename operation that can be modifying up to + * 4 inodes with dquots attached to them. In reality, the only inodes that can + * have their dquots modified are the source and destination directory inodes + * due to directory name creation and removal. That can require space allocation + * and/or freeing on both directory inodes, and hence all three dquots on each + * inode can be modified. And if the directories are world writeable, all the + * dquots can be unique and so 6 dquots can be modified.... + * + * And, of course, we also need to take into account the dquot log format item + * used to describe each dquot.   */ -#define XFS_DQUOT_LOGRES(mp)	(sizeof(xfs_disk_dquot_t) * 3) - +#define XFS_DQUOT_LOGRES(mp)	\ +	((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6)  /*   * These are the structures used to lay out dquots and quotaoff @@ -161,30 +178,42 @@ typedef struct xfs_qoff_logformat {  #define XFS_GQUOTA_ACCT	0x0040  /* group quota accounting ON */  /* + * Conversion to and from the combined OQUOTA flag (if necessary) + * is done only in xfs_sb_qflags_to_disk() and xfs_sb_qflags_from_disk() + */ +#define XFS_GQUOTA_ENFD	0x0080  /* group quota limits enforced */ +#define XFS_GQUOTA_CHKD	0x0100  /* quotacheck run on group quotas */ +#define XFS_PQUOTA_ENFD	0x0200  /* project quota limits enforced */ +#define XFS_PQUOTA_CHKD	0x0400  /* quotacheck run on project quotas */ + +/*   * Quota Accounting/Enforcement flags   */  #define XFS_ALL_QUOTA_ACCT	\  		(XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT) -#define XFS_ALL_QUOTA_ENFD	(XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD) -#define XFS_ALL_QUOTA_CHKD	(XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD) +#define XFS_ALL_QUOTA_ENFD	\ +		(XFS_UQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_ENFD) +#define XFS_ALL_QUOTA_CHKD	\ +		(XFS_UQUOTA_CHKD | XFS_GQUOTA_CHKD | XFS_PQUOTA_CHKD)  #define XFS_IS_QUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)  #define XFS_IS_UQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_UQUOTA_ACCT)  #define XFS_IS_PQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_PQUOTA_ACCT)  #define XFS_IS_GQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_GQUOTA_ACCT)  #define XFS_IS_UQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_UQUOTA_ENFD) -#define XFS_IS_OQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_OQUOTA_ENFD) +#define XFS_IS_GQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_GQUOTA_ENFD) +#define XFS_IS_PQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_PQUOTA_ENFD)  /*   * Incore only flags for quotaoff - these bits get cleared when quota(s)   * are in the process of getting turned off. These flags are in m_qflags but   * never in sb_qflags.   */ -#define XFS_UQUOTA_ACTIVE	0x0100  /* uquotas are being turned off */ -#define XFS_PQUOTA_ACTIVE	0x0200  /* pquotas are being turned off */ -#define XFS_GQUOTA_ACTIVE	0x0400  /* gquotas are being turned off */ +#define XFS_UQUOTA_ACTIVE	0x1000  /* uquotas are being turned off */ +#define XFS_GQUOTA_ACTIVE	0x2000  /* gquotas are being turned off */ +#define XFS_PQUOTA_ACTIVE	0x4000  /* pquotas are being turned off */  #define XFS_ALL_QUOTA_ACTIVE	\ -	(XFS_UQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE) +	(XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE)  /*   * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees @@ -259,33 +288,24 @@ typedef struct xfs_qoff_logformat {   * we didn't have the inode locked, the appropriate dquot(s) will be   * attached atomically.   */ -#define XFS_NOT_DQATTACHED(mp, ip) ((XFS_IS_UQUOTA_ON(mp) &&\ -				     (ip)->i_udquot == NULL) || \ -				    (XFS_IS_OQUOTA_ON(mp) && \ -				     (ip)->i_gdquot == NULL)) +#define XFS_NOT_DQATTACHED(mp, ip) \ +	((XFS_IS_UQUOTA_ON(mp) && (ip)->i_udquot == NULL) || \ +	 (XFS_IS_GQUOTA_ON(mp) && (ip)->i_gdquot == NULL) || \ +	 (XFS_IS_PQUOTA_ON(mp) && (ip)->i_pdquot == NULL))  #define XFS_QM_NEED_QUOTACHECK(mp) \  	((XFS_IS_UQUOTA_ON(mp) && \  		(mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \  	 (XFS_IS_GQUOTA_ON(mp) && \ -		((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \ -		 (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT))) || \ +		(mp->m_sb.sb_qflags & XFS_GQUOTA_CHKD) == 0) || \  	 (XFS_IS_PQUOTA_ON(mp) && \ -		((mp->m_sb.sb_qflags & XFS_OQUOTA_CHKD) == 0 || \ -		 (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT)))) - -#define XFS_MOUNT_QUOTA_SET1	(XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ -				 XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ -				 XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD) - -#define XFS_MOUNT_QUOTA_SET2	(XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ -				 XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ -				 XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD) +		(mp->m_sb.sb_qflags & XFS_PQUOTA_CHKD) == 0))  #define XFS_MOUNT_QUOTA_ALL	(XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ -				 XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ -				 XFS_OQUOTA_ENFD|XFS_OQUOTA_CHKD|\ -				 XFS_GQUOTA_ACCT) +				 XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ +				 XFS_GQUOTA_ENFD|XFS_GQUOTA_CHKD|\ +				 XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD|\ +				 XFS_PQUOTA_CHKD)  /* @@ -318,17 +338,18 @@ extern int xfs_trans_reserve_quota_nblks(struct xfs_trans *,  		struct xfs_inode *, long, long, uint);  extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *,  		struct xfs_mount *, struct xfs_dquot *, -		struct xfs_dquot *, long, long, uint); +		struct xfs_dquot *, struct xfs_dquot *, long, long, uint);  extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint, -		struct xfs_dquot **, struct xfs_dquot **); +		struct xfs_dquot **, struct xfs_dquot **, struct xfs_dquot **);  extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *, -		struct xfs_dquot *, struct xfs_dquot *); +		struct xfs_dquot *, struct xfs_dquot *, struct xfs_dquot *);  extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **);  extern struct xfs_dquot *xfs_qm_vop_chown(struct xfs_trans *,  		struct xfs_inode *, struct xfs_dquot **, struct xfs_dquot *);  extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, struct xfs_inode *, -		struct xfs_dquot *, struct xfs_dquot *, uint); +		struct xfs_dquot *, struct xfs_dquot *, +		struct xfs_dquot *, uint);  extern int xfs_qm_dqattach(struct xfs_inode *, uint);  extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint);  extern void xfs_qm_dqdetach(struct xfs_inode *); @@ -342,10 +363,12 @@ extern void xfs_qm_unmount_quotas(struct xfs_mount *);  #else  static inline int  xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid, -		uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp) +		uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp, +		struct xfs_dquot **pdqp)  {  	*udqp = NULL;  	*gdqp = NULL; +	*pdqp = NULL;  	return 0;  }  #define xfs_trans_dup_dqinfo(tp, tp2) @@ -360,14 +383,15 @@ static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp,  }  static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,  		struct xfs_mount *mp, struct xfs_dquot *udqp, -		struct xfs_dquot *gdqp, long nblks, long nions, uint flags) +		struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, +		long nblks, long nions, uint flags)  {  	return 0;  } -#define xfs_qm_vop_create_dqattach(tp, ip, u, g) +#define xfs_qm_vop_create_dqattach(tp, ip, u, g, p)  #define xfs_qm_vop_rename_dqattach(it)					(0)  #define xfs_qm_vop_chown(tp, ip, old, new)				(NULL) -#define xfs_qm_vop_chown_reserve(tp, ip, u, g, fl)			(0) +#define xfs_qm_vop_chown_reserve(tp, ip, u, g, p, fl)			(0)  #define xfs_qm_dqattach(ip, fl)						(0)  #define xfs_qm_dqattach_locked(ip, fl)					(0)  #define xfs_qm_dqdetach(ip) @@ -381,8 +405,8 @@ static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,  #define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \  	xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), -(ninos), flags) -#define xfs_trans_reserve_quota(tp, mp, ud, gd, nb, ni, f) \ -	xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \ +#define xfs_trans_reserve_quota(tp, mp, ud, gd, pd, nb, ni, f) \ +	xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, pd, nb, ni, \  				f | XFS_QMOPT_RES_REGBLKS)  extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *, diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 71926d630527..20e30f93b0c7 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -75,8 +75,10 @@ xfs_fs_set_xstate(  		flags |= XFS_GQUOTA_ACCT;  	if (uflags & FS_QUOTA_UDQ_ENFD)  		flags |= XFS_UQUOTA_ENFD; -	if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD)) -		flags |= XFS_OQUOTA_ENFD; +	if (uflags & FS_QUOTA_GDQ_ENFD) +		flags |= XFS_GQUOTA_ENFD; +	if (uflags & FS_QUOTA_PDQ_ENFD) +		flags |= XFS_PQUOTA_ENFD;  	switch (op) {  	case Q_XQUOTAON: diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index 2de58a85833c..78f9e70b80c7 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h @@ -618,6 +618,12 @@ xfs_sb_has_incompat_log_feature(  	return (sbp->sb_features_log_incompat & feature) != 0;  } +static inline bool +xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino) +{ +	return (ino == sbp->sb_uquotino || ino == sbp->sb_gquotino); +} +  /*   * end of superblock version macros   */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 3033ba5e9762..1d68ffcdeaa7 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -51,6 +51,7 @@  #include "xfs_inode_item.h"  #include "xfs_icache.h"  #include "xfs_trace.h" +#include "xfs_icreate_item.h"  #include <linux/namei.h>  #include <linux/init.h> @@ -359,17 +360,17 @@ xfs_parseargs(  		} else if (!strcmp(this_char, MNTOPT_PQUOTA) ||  			   !strcmp(this_char, MNTOPT_PRJQUOTA)) {  			mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | -					 XFS_OQUOTA_ENFD); +					 XFS_PQUOTA_ENFD);  		} else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {  			mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); -			mp->m_qflags &= ~XFS_OQUOTA_ENFD; +			mp->m_qflags &= ~XFS_PQUOTA_ENFD;  		} else if (!strcmp(this_char, MNTOPT_GQUOTA) ||  			   !strcmp(this_char, MNTOPT_GRPQUOTA)) {  			mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | -					 XFS_OQUOTA_ENFD); +					 XFS_GQUOTA_ENFD);  		} else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {  			mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); -			mp->m_qflags &= ~XFS_OQUOTA_ENFD; +			mp->m_qflags &= ~XFS_GQUOTA_ENFD;  		} else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {  			xfs_warn(mp,  	"delaylog is the default now, option is deprecated."); @@ -439,20 +440,15 @@ xfs_parseargs(  	}  done: -	if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) { +	if (dsunit && !(mp->m_flags & XFS_MOUNT_NOALIGN)) {  		/*  		 * At this point the superblock has not been read  		 * in, therefore we do not know the block size.  		 * Before the mount call ends we will convert  		 * these to FSBs.  		 */ -		if (dsunit) { -			mp->m_dalign = dsunit; -			mp->m_flags |= XFS_MOUNT_RETERR; -		} - -		if (dswidth) -			mp->m_swidth = dswidth; +		mp->m_dalign = dsunit; +		mp->m_swidth = dswidth;  	}  	if (mp->m_logbufs != -1 && @@ -563,12 +559,12 @@ xfs_showargs(  	/* Either project or group quotas can be active, not both */  	if (mp->m_qflags & XFS_PQUOTA_ACCT) { -		if (mp->m_qflags & XFS_OQUOTA_ENFD) +		if (mp->m_qflags & XFS_PQUOTA_ENFD)  			seq_puts(m, "," MNTOPT_PRJQUOTA);  		else  			seq_puts(m, "," MNTOPT_PQUOTANOENF);  	} else if (mp->m_qflags & XFS_GQUOTA_ACCT) { -		if (mp->m_qflags & XFS_OQUOTA_ENFD) +		if (mp->m_qflags & XFS_GQUOTA_ENFD)  			seq_puts(m, "," MNTOPT_GRPQUOTA);  		else  			seq_puts(m, "," MNTOPT_GQUOTANOENF); @@ -1136,8 +1132,8 @@ xfs_fs_statfs(  	spin_unlock(&mp->m_sb_lock);  	if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && -	    ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) == -			      (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) +	    ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == +			      (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))  		xfs_qm_statvfs(ip, statp);  	return 0;  } @@ -1481,6 +1477,10 @@ xfs_fs_fill_super(  	sb->s_time_gran = 1;  	set_posix_acl_flag(sb); +	/* version 5 superblocks support inode version counters. */ +	if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) +		sb->s_flags |= MS_I_VERSION; +  	error = xfs_mountfs(mp);  	if (error)  		goto out_filestream_unmount; @@ -1655,9 +1655,15 @@ xfs_init_zones(void)  					KM_ZONE_SPREAD, NULL);  	if (!xfs_ili_zone)  		goto out_destroy_inode_zone; +	xfs_icreate_zone = kmem_zone_init(sizeof(struct xfs_icreate_item), +					"xfs_icr"); +	if (!xfs_icreate_zone) +		goto out_destroy_ili_zone;  	return 0; + out_destroy_ili_zone: +	kmem_zone_destroy(xfs_ili_zone);   out_destroy_inode_zone:  	kmem_zone_destroy(xfs_inode_zone);   out_destroy_efi_zone: @@ -1696,6 +1702,7 @@ xfs_destroy_zones(void)  	 * destroy caches.  	 */  	rcu_barrier(); +	kmem_zone_destroy(xfs_icreate_zone);  	kmem_zone_destroy(xfs_ili_zone);  	kmem_zone_destroy(xfs_inode_zone);  	kmem_zone_destroy(xfs_efi_zone); diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 195a403e1522..f4895b662fcb 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -358,7 +358,9 @@ xfs_symlink(  	int			n;  	xfs_buf_t		*bp;  	prid_t			prid; -	struct xfs_dquot	*udqp, *gdqp; +	struct xfs_dquot	*udqp = NULL; +	struct xfs_dquot	*gdqp = NULL; +	struct xfs_dquot	*pdqp = NULL;  	uint			resblks;  	*ipp = NULL; @@ -385,7 +387,7 @@ xfs_symlink(  	 * Make sure that we have allocated dquot(s) on disk.  	 */  	error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, -			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); +		XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp);  	if (error)  		goto std_return; @@ -426,7 +428,8 @@ xfs_symlink(  	/*  	 * Reserve disk quota : blocks and inode.  	 */ -	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); +	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, +						pdqp, resblks, 1, 0);  	if (error)  		goto error_return; @@ -464,7 +467,7 @@ xfs_symlink(  	/*  	 * Also attach the dquot(s) to it, if applicable.  	 */ -	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); +	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);  	if (resblks)  		resblks -= XFS_IALLOC_SPACE_RES(mp); @@ -562,6 +565,7 @@ xfs_symlink(  	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);  	xfs_qm_dqrele(udqp);  	xfs_qm_dqrele(gdqp); +	xfs_qm_dqrele(pdqp);  	*ipp = ip;  	return 0; @@ -575,6 +579,7 @@ xfs_symlink(  	xfs_trans_cancel(tp, cancel_flags);  	xfs_qm_dqrele(udqp);  	xfs_qm_dqrele(gdqp); +	xfs_qm_dqrele(pdqp);  	if (unlock_dp_on_error)  		xfs_iunlock(dp, XFS_ILOCK_EXCL); @@ -585,7 +590,7 @@ xfs_symlink(  /*   * Free a symlink that has blocks associated with it.   */ -int +STATIC int  xfs_inactive_symlink_rmt(  	xfs_inode_t	*ip,  	xfs_trans_t	**tpp) @@ -606,7 +611,7 @@ xfs_inactive_symlink_rmt(  	tp = *tpp;  	mp = ip->i_mount; -	ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip)); +	ASSERT(ip->i_df.if_flags & XFS_IFEXTENTS);  	/*  	 * We're freeing a symlink that has some  	 * blocks allocated to it.  Free the @@ -720,3 +725,47 @@ xfs_inactive_symlink_rmt(   error0:  	return error;  } + +/* + * xfs_inactive_symlink - free a symlink + */ +int +xfs_inactive_symlink( +	struct xfs_inode	*ip, +	struct xfs_trans	**tp) +{ +	struct xfs_mount	*mp = ip->i_mount; +	int			pathlen; + +	trace_xfs_inactive_symlink(ip); + +	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + +	if (XFS_FORCED_SHUTDOWN(mp)) +		return XFS_ERROR(EIO); + +	/* +	 * Zero length symlinks _can_ exist. +	 */ +	pathlen = (int)ip->i_d.di_size; +	if (!pathlen) +		return 0; + +	if (pathlen < 0 || pathlen > MAXPATHLEN) { +		xfs_alert(mp, "%s: inode (0x%llx) bad symlink length (%d)", +			 __func__, (unsigned long long)ip->i_ino, pathlen); +		ASSERT(0); +		return XFS_ERROR(EFSCORRUPTED); +	} + +	if (ip->i_df.if_flags & XFS_IFINLINE) { +		if (ip->i_df.if_bytes > 0) +			xfs_idata_realloc(ip, -(ip->i_df.if_bytes), +					  XFS_DATA_FORK); +		ASSERT(ip->i_df.if_bytes == 0); +		return 0; +	} + +	/* remove the remote symlink */ +	return xfs_inactive_symlink_rmt(ip, tp); +} diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h index b39398d2097c..374394880c01 100644 --- a/fs/xfs/xfs_symlink.h +++ b/fs/xfs/xfs_symlink.h @@ -60,7 +60,7 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops;  int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,  		const char *target_path, umode_t mode, struct xfs_inode **ipp);  int xfs_readlink(struct xfs_inode *ip, char *link); -int xfs_inactive_symlink_rmt(struct xfs_inode *ip, struct xfs_trans **tpp); +int xfs_inactive_symlink(struct xfs_inode *ip, struct xfs_trans **tpp);  #endif /* __KERNEL__ */  #endif /* __XFS_SYMLINK_H */ diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index 2801b5ce6cdb..1743b9f8e23d 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c @@ -25,11 +25,11 @@ static struct ctl_table_header *xfs_table_header;  #ifdef CONFIG_PROC_FS  STATIC int  xfs_stats_clear_proc_handler( -	ctl_table	*ctl, -	int		write, -	void		__user *buffer, -	size_t		*lenp, -	loff_t		*ppos) +	struct ctl_table	*ctl, +	int			write, +	void			__user *buffer, +	size_t			*lenp, +	loff_t			*ppos)  {  	int		c, ret, *valp = ctl->data;  	__uint32_t	vn_active; @@ -55,11 +55,11 @@ xfs_stats_clear_proc_handler(  STATIC int  xfs_panic_mask_proc_handler( -	ctl_table	*ctl, -	int		write, -	void		__user *buffer, -	size_t		*lenp, -	loff_t		*ppos) +	struct ctl_table	*ctl, +	int			write, +	void			__user *buffer, +	size_t			*lenp, +	loff_t			*ppos)  {  	int		ret, *valp = ctl->data; @@ -74,7 +74,7 @@ xfs_panic_mask_proc_handler(  }  #endif /* CONFIG_PROC_FS */ -static ctl_table xfs_table[] = { +static struct ctl_table xfs_table[] = {  	{  		.procname	= "irix_sgid_inherit",  		.data		= &xfs_params.sgid_inherit.val, @@ -227,7 +227,7 @@ static ctl_table xfs_table[] = {  	{}  }; -static ctl_table xfs_dir_table[] = { +static struct ctl_table xfs_dir_table[] = {  	{  		.procname	= "xfs",  		.mode		= 0555, @@ -236,7 +236,7 @@ static ctl_table xfs_dir_table[] = {  	{}  }; -static ctl_table xfs_root_table[] = { +static struct ctl_table xfs_root_table[] = {  	{  		.procname	= "fs",  		.mode		= 0555, diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index aa4db3307d36..47910e638c18 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -486,9 +486,12 @@ DEFINE_EVENT(xfs_buf_item_class, name, \  	TP_PROTO(struct xfs_buf_log_item *bip), \  	TP_ARGS(bip))  DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_ordered);  DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);  DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_ordered);  DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_ordered);  DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);  DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);  DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); @@ -508,6 +511,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);  DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);  DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);  DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); +DEFINE_BUF_ITEM_EVENT(xfs_trans_buf_ordered);  DECLARE_EVENT_CLASS(xfs_lock_class,  	TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, @@ -571,6 +575,7 @@ DEFINE_INODE_EVENT(xfs_iget_miss);  DEFINE_INODE_EVENT(xfs_getattr);  DEFINE_INODE_EVENT(xfs_setattr);  DEFINE_INODE_EVENT(xfs_readlink); +DEFINE_INODE_EVENT(xfs_inactive_symlink);  DEFINE_INODE_EVENT(xfs_alloc_file_space);  DEFINE_INODE_EVENT(xfs_free_file_space);  DEFINE_INODE_EVENT(xfs_readdir); @@ -974,14 +979,16 @@ DEFINE_RW_EVENT(xfs_file_splice_read);  DEFINE_RW_EVENT(xfs_file_splice_write);  DECLARE_EVENT_CLASS(xfs_page_class, -	TP_PROTO(struct inode *inode, struct page *page, unsigned long off), -	TP_ARGS(inode, page, off), +	TP_PROTO(struct inode *inode, struct page *page, unsigned long off, +		 unsigned int len), +	TP_ARGS(inode, page, off, len),  	TP_STRUCT__entry(  		__field(dev_t, dev)  		__field(xfs_ino_t, ino)  		__field(pgoff_t, pgoff)  		__field(loff_t, size)  		__field(unsigned long, offset) +		__field(unsigned int, length)  		__field(int, delalloc)  		__field(int, unwritten)  	), @@ -995,24 +1002,27 @@ DECLARE_EVENT_CLASS(xfs_page_class,  		__entry->pgoff = page_offset(page);  		__entry->size = i_size_read(inode);  		__entry->offset = off; +		__entry->length = len;  		__entry->delalloc = delalloc;  		__entry->unwritten = unwritten;  	),  	TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " -		  "delalloc %d unwritten %d", +		  "length %x delalloc %d unwritten %d",  		  MAJOR(__entry->dev), MINOR(__entry->dev),  		  __entry->ino,  		  __entry->pgoff,  		  __entry->size,  		  __entry->offset, +		  __entry->length,  		  __entry->delalloc,  		  __entry->unwritten)  )  #define DEFINE_PAGE_EVENT(name)		\  DEFINE_EVENT(xfs_page_class, name,	\ -	TP_PROTO(struct inode *inode, struct page *page, unsigned long off),	\ -	TP_ARGS(inode, page, off)) +	TP_PROTO(struct inode *inode, struct page *page, unsigned long off, \ +		 unsigned int len),	\ +	TP_ARGS(inode, page, off, len))  DEFINE_PAGE_EVENT(xfs_writepage);  DEFINE_PAGE_EVENT(xfs_releasepage);  DEFINE_PAGE_EVENT(xfs_invalidatepage); diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 2fd7c1ff1d21..35a229981354 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -234,71 +234,93 @@ xfs_calc_remove_reservation(  }  /* - * For symlink we can modify: + * For create, break it in to the two cases that the transaction + * covers. We start with the modify case - allocation done by modification + * of the state of existing inodes - and the allocation case. + */ + +/* + * For create we can modify:   *    the parent directory inode: inode size   *    the new inode: inode size - *    the inode btree entry: 1 block + *    the inode btree entry: block size + *    the superblock for the nlink flag: sector size   *    the directory btree: (max depth + v2) * dir block size   *    the directory inode's bmap btree: (max depth + v2) * block size - *    the blocks for the symlink: 1 kB - * Or in the first xact we allocate some inodes giving: + */ +STATIC uint +xfs_calc_create_resv_modify( +	struct xfs_mount	*mp) +{ +	return xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + +		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + +		(uint)XFS_FSB_TO_B(mp, 1) + +		xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)); +} + +/* + * For create we can allocate some inodes giving:   *    the agi and agf of the ag getting the new inodes: 2 * sectorsize + *    the superblock for the nlink flag: sector size   *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize   *    the inode btree: max depth * blocksize - *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size + *    the allocation btrees: 2 trees * (max depth - 1) * block size   */  STATIC uint -xfs_calc_symlink_reservation( +xfs_calc_create_resv_alloc( +	struct xfs_mount	*mp) +{ +	return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + +		mp->m_sb.sb_sectsize + +		xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), XFS_FSB_TO_B(mp, 1)) + +		xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + +		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), +				 XFS_FSB_TO_B(mp, 1)); +} + +STATIC uint +__xfs_calc_create_reservation(  	struct xfs_mount	*mp)  {  	return XFS_DQUOT_LOGRES(mp) + -		MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + -		     xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + -		     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), -				      XFS_FSB_TO_B(mp, 1)) + -		     xfs_calc_buf_res(1, 1024)), -		    (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + -		     xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), -				      XFS_FSB_TO_B(mp, 1)) + -		     xfs_calc_buf_res(mp->m_in_maxlevels, -				      XFS_FSB_TO_B(mp, 1)) + -		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), -				      XFS_FSB_TO_B(mp, 1)))); +		MAX(xfs_calc_create_resv_alloc(mp), +		    xfs_calc_create_resv_modify(mp));  }  /* - * For create we can modify: - *    the parent directory inode: inode size - *    the new inode: inode size - *    the inode btree entry: block size - *    the superblock for the nlink flag: sector size - *    the directory btree: (max depth + v2) * dir block size - *    the directory inode's bmap btree: (max depth + v2) * block size - * Or in the first xact we allocate some inodes giving: + * For icreate we can allocate some inodes giving:   *    the agi and agf of the ag getting the new inodes: 2 * sectorsize   *    the superblock for the nlink flag: sector size - *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize   *    the inode btree: max depth * blocksize   *    the allocation btrees: 2 trees * (max depth - 1) * block size   */  STATIC uint -xfs_calc_create_reservation( +xfs_calc_icreate_resv_alloc(  	struct xfs_mount	*mp)  { +	return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + +		mp->m_sb.sb_sectsize + +		xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + +		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), +				 XFS_FSB_TO_B(mp, 1)); +} + +STATIC uint +xfs_calc_icreate_reservation(xfs_mount_t *mp) +{  	return XFS_DQUOT_LOGRES(mp) + -		MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + -		     xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + -		     (uint)XFS_FSB_TO_B(mp, 1) + -		     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), -				      XFS_FSB_TO_B(mp, 1))), -		    (xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + -		     mp->m_sb.sb_sectsize + -		     xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), -				      XFS_FSB_TO_B(mp, 1)) + -		     xfs_calc_buf_res(mp->m_in_maxlevels, -				      XFS_FSB_TO_B(mp, 1)) + -		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), -				      XFS_FSB_TO_B(mp, 1)))); +		MAX(xfs_calc_icreate_resv_alloc(mp), +		    xfs_calc_create_resv_modify(mp)); +} + +STATIC uint +xfs_calc_create_reservation( +	struct xfs_mount	*mp) +{ +	if (xfs_sb_version_hascrc(&mp->m_sb)) +		return xfs_calc_icreate_reservation(mp); +	return __xfs_calc_create_reservation(mp); +  }  /* @@ -311,6 +333,20 @@ xfs_calc_mkdir_reservation(  	return xfs_calc_create_reservation(mp);  } + +/* + * Making a new symplink is the same as creating a new file, but + * with the added blocks for remote symlink data which can be up to 1kB in + * length (MAXPATHLEN). + */ +STATIC uint +xfs_calc_symlink_reservation( +	struct xfs_mount	*mp) +{ +	return xfs_calc_create_reservation(mp) + +	       xfs_calc_buf_res(1, MAXPATHLEN); +} +  /*   * In freeing an inode we can modify:   *    the inode being freed: inode size diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index a44dba5b2cdb..2b4946393e30 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -48,6 +48,7 @@ typedef struct xfs_trans_header {  #define	XFS_LI_BUF		0x123c	/* v2 bufs, variable sized inode bufs */  #define	XFS_LI_DQUOT		0x123d  #define	XFS_LI_QUOTAOFF		0x123e +#define	XFS_LI_ICREATE		0x123f  #define XFS_LI_TYPE_DESC \  	{ XFS_LI_EFI,		"XFS_LI_EFI" }, \ @@ -107,7 +108,8 @@ typedef struct xfs_trans_header {  #define	XFS_TRANS_SWAPEXT		40  #define	XFS_TRANS_SB_COUNT		41  #define	XFS_TRANS_CHECKPOINT		42 -#define	XFS_TRANS_TYPE_MAX		42 +#define	XFS_TRANS_ICREATE		43 +#define	XFS_TRANS_TYPE_MAX		43  /* new transaction types need to be reflected in xfs_logprint(8) */  #define XFS_TRANS_TYPES \ @@ -210,23 +212,18 @@ struct xfs_log_item_desc {  /*   * Per-extent log reservation for the allocation btree changes   * involved in freeing or allocating an extent. - * 2 trees * (2 blocks/level * max depth - 1) * block size + * 2 trees * (2 blocks/level * max depth - 1)   */ -#define	XFS_ALLOCFREE_LOG_RES(mp,nx) \ -	((nx) * (2 * XFS_FSB_TO_B((mp), 2 * XFS_AG_MAXLEVELS(mp) - 1)))  #define	XFS_ALLOCFREE_LOG_COUNT(mp,nx) \  	((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1)))  /*   * Per-directory log reservation for any directory change. - * dir blocks: (1 btree block per level + data block + free block) * dblock size - * bmap btree: (levels + 2) * max depth * block size + * dir blocks: (1 btree block per level + data block + free block) + * bmap btree: (levels + 2) * max depth   * v2 directory blocks can be fragmented below the dirblksize down to the fsb   * size, so account for that in the DAENTER macros.   */ -#define	XFS_DIROP_LOG_RES(mp)	\ -	(XFS_FSB_TO_B(mp, XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK)) + \ -	 (XFS_FSB_TO_B(mp, XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)))  #define	XFS_DIROP_LOG_COUNT(mp)	\  	(XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \  	 XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1) @@ -503,6 +500,7 @@ void		xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *);  void		xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);  void		xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);  void		xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); +void		xfs_trans_ordered_buf(xfs_trans_t *, struct xfs_buf *);  void		xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);  void		xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);  void		xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 73a5fa457e16..aa5a04b844d6 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -397,7 +397,6 @@ shutdown_abort:  	return XFS_ERROR(EIO);  } -  /*   * Release the buffer bp which was previously acquired with one of the   * xfs_trans_... buffer allocation routines if the buffer has not @@ -603,8 +602,14 @@ xfs_trans_log_buf(xfs_trans_t	*tp,  	tp->t_flags |= XFS_TRANS_DIRTY;  	bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY; -	bip->bli_flags |= XFS_BLI_LOGGED; -	xfs_buf_item_log(bip, first, last); + +	/* +	 * If we have an ordered buffer we are not logging any dirty range but +	 * it still needs to be marked dirty and that it has been logged. +	 */ +	bip->bli_flags |= XFS_BLI_DIRTY | XFS_BLI_LOGGED; +	if (!(bip->bli_flags & XFS_BLI_ORDERED)) +		xfs_buf_item_log(bip, first, last);  } @@ -757,6 +762,29 @@ xfs_trans_inode_alloc_buf(  }  /* + * Mark the buffer as ordered for this transaction. This means + * that the contents of the buffer are not recorded in the transaction + * but it is tracked in the AIL as though it was. This allows us + * to record logical changes in transactions rather than the physical + * changes we make to the buffer without changing writeback ordering + * constraints of metadata buffers. + */ +void +xfs_trans_ordered_buf( +	struct xfs_trans	*tp, +	struct xfs_buf		*bp) +{ +	struct xfs_buf_log_item	*bip = bp->b_fspriv; + +	ASSERT(bp->b_transp == tp); +	ASSERT(bip != NULL); +	ASSERT(atomic_read(&bip->bli_refcount) > 0); + +	bip->bli_flags |= XFS_BLI_ORDERED; +	trace_xfs_buf_item_ordered(bip); +} + +/*   * Set the type of the buffer for log recovery so that it can correctly identify   * and hence attach the correct buffer ops to the buffer after replay.   */ diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index fec75d023703..61407a847b86 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -103,8 +103,6 @@ xfs_trans_dup_dqinfo(  		return;  	xfs_trans_alloc_dqinfo(ntp); -	oqa = otp->t_dqinfo->dqa_usrdquots; -	nqa = ntp->t_dqinfo->dqa_usrdquots;  	/*  	 * Because the quota blk reservation is carried forward, @@ -113,7 +111,9 @@ xfs_trans_dup_dqinfo(  	if(otp->t_flags & XFS_TRANS_DQ_DIRTY)  		ntp->t_flags |= XFS_TRANS_DQ_DIRTY; -	for (j = 0; j < 2; j++) { +	for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { +		oqa = otp->t_dqinfo->dqs[j]; +		nqa = ntp->t_dqinfo->dqs[j];  		for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {  			if (oqa[i].qt_dquot == NULL)  				break; @@ -138,8 +138,6 @@ xfs_trans_dup_dqinfo(  			oq->qt_ino_res = oq->qt_ino_res_used;  		} -		oqa = otp->t_dqinfo->dqa_grpdquots; -		nqa = ntp->t_dqinfo->dqa_grpdquots;  	}  } @@ -157,8 +155,7 @@ xfs_trans_mod_dquot_byino(  	if (!XFS_IS_QUOTA_RUNNING(mp) ||  	    !XFS_IS_QUOTA_ON(mp) || -	    ip->i_ino == mp->m_sb.sb_uquotino || -	    ip->i_ino == mp->m_sb.sb_gquotino) +	    xfs_is_quota_inode(&mp->m_sb, ip->i_ino))  		return;  	if (tp->t_dqinfo == NULL) @@ -166,20 +163,28 @@ xfs_trans_mod_dquot_byino(  	if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot)  		(void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta); -	if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot) +	if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot)  		(void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta); +	if (XFS_IS_PQUOTA_ON(mp) && ip->i_pdquot) +		(void) xfs_trans_mod_dquot(tp, ip->i_pdquot, field, delta);  } -STATIC xfs_dqtrx_t * +STATIC struct xfs_dqtrx *  xfs_trans_get_dqtrx( -	xfs_trans_t	*tp, -	xfs_dquot_t	*dqp) +	struct xfs_trans	*tp, +	struct xfs_dquot	*dqp)  { -	int		i; -	xfs_dqtrx_t	*qa; +	int			i; +	struct xfs_dqtrx	*qa; -	qa = XFS_QM_ISUDQ(dqp) ? -		tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots; +	if (XFS_QM_ISUDQ(dqp)) +		qa = tp->t_dqinfo->dqs[XFS_QM_TRANS_USR]; +	else if (XFS_QM_ISGDQ(dqp)) +		qa = tp->t_dqinfo->dqs[XFS_QM_TRANS_GRP]; +	else if (XFS_QM_ISPDQ(dqp)) +		qa = tp->t_dqinfo->dqs[XFS_QM_TRANS_PRJ]; +	else +		return NULL;  	for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {  		if (qa[i].qt_dquot == NULL || @@ -292,11 +297,10 @@ xfs_trans_mod_dquot(  /* - * Given an array of dqtrx structures, lock all the dquots associated - * and join them to the transaction, provided they have been modified. - * We know that the highest number of dquots (of one type - usr OR grp), - * involved in a transaction is 2 and that both usr and grp combined - 3. - * So, we don't attempt to make this very generic. + * Given an array of dqtrx structures, lock all the dquots associated and join + * them to the transaction, provided they have been modified.  We know that the + * highest number of dquots of one type - usr, grp OR prj - involved in a + * transaction is 2 so we don't need to make this very generic.   */  STATIC void  xfs_trans_dqlockedjoin( @@ -339,12 +343,10 @@ xfs_trans_apply_dquot_deltas(  		return;  	ASSERT(tp->t_dqinfo); -	qa = tp->t_dqinfo->dqa_usrdquots; -	for (j = 0; j < 2; j++) { -		if (qa[0].qt_dquot == NULL) { -			qa = tp->t_dqinfo->dqa_grpdquots; +	for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { +		qa = tp->t_dqinfo->dqs[j]; +		if (qa[0].qt_dquot == NULL)  			continue; -		}  		/*  		 * Lock all of the dquots and join them to the transaction. @@ -495,10 +497,6 @@ xfs_trans_apply_dquot_deltas(  			ASSERT(dqp->q_res_rtbcount >=  				be64_to_cpu(dqp->q_core.d_rtbcount));  		} -		/* -		 * Do the group quotas next -		 */ -		qa = tp->t_dqinfo->dqa_grpdquots;  	}  } @@ -521,9 +519,9 @@ xfs_trans_unreserve_and_mod_dquots(  	if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))  		return; -	qa = tp->t_dqinfo->dqa_usrdquots; +	for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { +		qa = tp->t_dqinfo->dqs[j]; -	for (j = 0; j < 2; j++) {  		for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {  			qtrx = &qa[i];  			/* @@ -565,7 +563,6 @@ xfs_trans_unreserve_and_mod_dquots(  				xfs_dqunlock(dqp);  		} -		qa = tp->t_dqinfo->dqa_grpdquots;  	}  } @@ -640,8 +637,8 @@ xfs_trans_dqresv(  	if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&  	    dqp->q_core.d_id &&  	    ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) || -	     (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && -	      (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { +	     (XFS_IS_GQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISGDQ(dqp)) || +	     (XFS_IS_PQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISPDQ(dqp)))) {  		if (nblks > 0) {  			/*  			 * dquot is locked already. See if we'd go over the @@ -736,8 +733,8 @@ error_return:  /*   * Given dquot(s), make disk block and/or inode reservations against them. - * The fact that this does the reservation against both the usr and - * grp/prj quotas is important, because this follows a both-or-nothing + * The fact that this does the reservation against user, group and + * project quotas is important, because this follows a all-or-nothing   * approach.   *   * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown. @@ -748,15 +745,16 @@ error_return:   */  int  xfs_trans_reserve_quota_bydquots( -	xfs_trans_t	*tp, -	xfs_mount_t	*mp, -	xfs_dquot_t	*udqp, -	xfs_dquot_t	*gdqp, -	long		nblks, -	long		ninos, -	uint		flags) +	struct xfs_trans	*tp, +	struct xfs_mount	*mp, +	struct xfs_dquot	*udqp, +	struct xfs_dquot	*gdqp, +	struct xfs_dquot	*pdqp, +	long			nblks, +	long			ninos, +	uint			flags)  { -	int		resvd = 0, error; +	int		error;  	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))  		return 0; @@ -771,28 +769,34 @@ xfs_trans_reserve_quota_bydquots(  					(flags & ~XFS_QMOPT_ENOSPC));  		if (error)  			return error; -		resvd = 1;  	}  	if (gdqp) {  		error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags); -		if (error) { -			/* -			 * can't do it, so backout previous reservation -			 */ -			if (resvd) { -				flags |= XFS_QMOPT_FORCE_RES; -				xfs_trans_dqresv(tp, mp, udqp, -						 -nblks, -ninos, flags); -			} -			return error; -		} +		if (error) +			goto unwind_usr; +	} + +	if (pdqp) { +		error = xfs_trans_dqresv(tp, mp, pdqp, nblks, ninos, flags); +		if (error) +			goto unwind_grp;  	}  	/*  	 * Didn't change anything critical, so, no need to log  	 */  	return 0; + +unwind_grp: +	flags |= XFS_QMOPT_FORCE_RES; +	if (gdqp) +		xfs_trans_dqresv(tp, mp, gdqp, -nblks, -ninos, flags); +unwind_usr: +	flags |= XFS_QMOPT_FORCE_RES; +	if (udqp) +		xfs_trans_dqresv(tp, mp, udqp, -nblks, -ninos, flags); +	return error;  } @@ -816,8 +820,7 @@ xfs_trans_reserve_quota_nblks(  	if (XFS_IS_PQUOTA_ON(mp))  		flags |= XFS_QMOPT_ENOSPC; -	ASSERT(ip->i_ino != mp->m_sb.sb_uquotino); -	ASSERT(ip->i_ino != mp->m_sb.sb_gquotino); +	ASSERT(!xfs_is_quota_inode(&mp->m_sb, ip->i_ino));  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));  	ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == @@ -830,6 +833,7 @@ xfs_trans_reserve_quota_nblks(  	 */  	return xfs_trans_reserve_quota_bydquots(tp, mp,  						ip->i_udquot, ip->i_gdquot, +						ip->i_pdquot,  						nblks, ninos, flags);  } diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index ac6d567704db..53dfe46f3680 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -112,6 +112,17 @@ xfs_trans_log_inode(  	ASSERT(ip->i_itemp != NULL);  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); +	/* +	 * First time we log the inode in a transaction, bump the inode change +	 * counter if it is configured for this to occur. +	 */ +	if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) && +	    IS_I_VERSION(VFS_I(ip))) { +		inode_inc_iversion(VFS_I(ip)); +		ip->i_d.di_changecount = VFS_I(ip)->i_version; +		flags |= XFS_ILOG_CORE; +	} +  	tp->t_flags |= XFS_TRANS_DIRTY;  	ip->i_itemp->ili_item.li_desc->lid_flags |= XFS_LID_DIRTY; diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 0176bb21f09a..dc730ac272be 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -322,18 +322,9 @@ xfs_inactive(  	xfs_trans_ijoin(tp, ip, 0);  	if (S_ISLNK(ip->i_d.di_mode)) { -		/* -		 * Zero length symlinks _can_ exist. -		 */ -		if (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) { -			error = xfs_inactive_symlink_rmt(ip, &tp); -			if (error) -				goto out_cancel; -		} else if (ip->i_df.if_bytes > 0) { -			xfs_idata_realloc(ip, -(ip->i_df.if_bytes), -					  XFS_DATA_FORK); -			ASSERT(ip->i_df.if_bytes == 0); -		} +		error = xfs_inactive_symlink(ip, &tp); +		if (error) +			goto out_cancel;  	} else if (truncate) {  		ip->i_d.di_size = 0;  		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -498,6 +489,7 @@ xfs_create(  	prid_t			prid;  	struct xfs_dquot	*udqp = NULL;  	struct xfs_dquot	*gdqp = NULL; +	struct xfs_dquot	*pdqp = NULL;  	uint			resblks;  	uint			log_res;  	uint			log_count; @@ -516,7 +508,8 @@ xfs_create(  	 * Make sure that we have allocated dquot(s) on disk.  	 */  	error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, -			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); +					XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, +					&udqp, &gdqp, &pdqp);  	if (error)  		return error; @@ -568,7 +561,8 @@ xfs_create(  	/*  	 * Reserve disk quota and the inode.  	 */ -	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); +	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, +						pdqp, resblks, 1, 0);  	if (error)  		goto out_trans_cancel; @@ -632,7 +626,7 @@ xfs_create(  	 * These ids of the inode couldn't have changed since the new  	 * inode has been locked ever since it was created.  	 */ -	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); +	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);  	error = xfs_bmap_finish(&tp, &free_list, &committed);  	if (error) @@ -644,6 +638,7 @@ xfs_create(  	xfs_qm_dqrele(udqp);  	xfs_qm_dqrele(gdqp); +	xfs_qm_dqrele(pdqp);  	*ipp = ip;  	return 0; @@ -665,6 +660,7 @@ xfs_create(  	xfs_qm_dqrele(udqp);  	xfs_qm_dqrele(gdqp); +	xfs_qm_dqrele(pdqp);  	if (unlock_dp_on_error)  		xfs_iunlock(dp, XFS_ILOCK_EXCL); @@ -1577,7 +1573,7 @@ xfs_free_file_space(  		}  		xfs_ilock(ip, XFS_ILOCK_EXCL);  		error = xfs_trans_reserve_quota(tp, mp, -				ip->i_udquot, ip->i_gdquot, +				ip->i_udquot, ip->i_gdquot, ip->i_pdquot,  				resblks, 0, XFS_QMOPT_RES_REGBLKS);  		if (error)  			goto error1; diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 5163022d9808..38c67c34d73f 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -31,8 +31,7 @@ int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,  		struct xfs_inode *ip);  int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,  		struct xfs_name *target_name); -int xfs_readdir(struct xfs_inode	*dp, void *dirent, size_t bufsize, -		       xfs_off_t *offset, filldir_t filldir); +int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx, size_t bufsize);  int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,  		const char *target_path, umode_t mode, struct xfs_inode **ipp);  int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); | 
