diff options
Diffstat (limited to 'fs/ext4/fast_commit.c')
| -rw-r--r-- | fs/ext4/fast_commit.c | 133 | 
1 files changed, 78 insertions, 55 deletions
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 5ae8026a0c56..7964ee34e322 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -300,18 +300,32 @@ restart:  }  /* - * Mark file system as fast commit ineligible. This means that next commit - * operation would result in a full jbd2 commit. + * Mark file system as fast commit ineligible, and record latest + * ineligible transaction tid. This means until the recorded + * transaction, commit operation would result in a full jbd2 commit.   */ -void ext4_fc_mark_ineligible(struct super_block *sb, int reason) +void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)  {  	struct ext4_sb_info *sbi = EXT4_SB(sb); +	tid_t tid;  	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||  	    (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))  		return;  	ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); +	if (handle && !IS_ERR(handle)) +		tid = handle->h_transaction->t_tid; +	else { +		read_lock(&sbi->s_journal->j_state_lock); +		tid = sbi->s_journal->j_running_transaction ? +				sbi->s_journal->j_running_transaction->t_tid : 0; +		read_unlock(&sbi->s_journal->j_state_lock); +	} +	spin_lock(&sbi->s_fc_lock); +	if (sbi->s_fc_ineligible_tid < tid) +		sbi->s_fc_ineligible_tid = tid; +	spin_unlock(&sbi->s_fc_lock);  	WARN_ON(reason >= EXT4_FC_REASON_MAX);  	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;  } @@ -361,7 +375,8 @@ static int ext4_fc_track_template(  	spin_lock(&sbi->s_fc_lock);  	if (list_empty(&EXT4_I(inode)->i_fc_list))  		list_add_tail(&EXT4_I(inode)->i_fc_list, -				(ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ? +				(sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || +				 sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ?  				&sbi->s_fc_q[FC_Q_STAGING] :  				&sbi->s_fc_q[FC_Q_MAIN]);  	spin_unlock(&sbi->s_fc_lock); @@ -387,7 +402,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)  	mutex_unlock(&ei->i_fc_lock);  	node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);  	if (!node) { -		ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); +		ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);  		mutex_lock(&ei->i_fc_lock);  		return -ENOMEM;  	} @@ -400,7 +415,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)  		if (!node->fcd_name.name) {  			kmem_cache_free(ext4_fc_dentry_cachep, node);  			ext4_fc_mark_ineligible(inode->i_sb, -				EXT4_FC_REASON_NOMEM); +				EXT4_FC_REASON_NOMEM, NULL);  			mutex_lock(&ei->i_fc_lock);  			return -ENOMEM;  		} @@ -414,7 +429,8 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)  	node->fcd_name.len = dentry->d_name.len;  	spin_lock(&sbi->s_fc_lock); -	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) +	if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || +		sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)  		list_add_tail(&node->fcd_list,  				&sbi->s_fc_dentry_q[FC_Q_STAGING]);  	else @@ -502,7 +518,7 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode)  	if (ext4_should_journal_data(inode)) {  		ext4_fc_mark_ineligible(inode->i_sb, -					EXT4_FC_REASON_INODE_JOURNAL_DATA); +					EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);  		return;  	} @@ -879,7 +895,6 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal)  	int ret = 0;  	spin_lock(&sbi->s_fc_lock); -	ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);  	list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {  		ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);  		while (atomic_read(&ei->i_fc_updates)) { @@ -1179,7 +1194,7 @@ fallback:   * Fast commit cleanup routine. This is called after every fast commit and   * full commit. full is true if we are called after a full commit.   */ -static void ext4_fc_cleanup(journal_t *journal, int full) +static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)  {  	struct super_block *sb = journal->j_private;  	struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -1197,7 +1212,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full)  		list_del_init(&iter->i_fc_list);  		ext4_clear_inode_state(&iter->vfs_inode,  				       EXT4_STATE_FC_COMMITTING); -		ext4_fc_reset_inode(&iter->vfs_inode); +		if (iter->i_sync_tid <= tid) +			ext4_fc_reset_inode(&iter->vfs_inode);  		/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */  		smp_mb();  #if (BITS_PER_LONG < 64) @@ -1226,8 +1242,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full)  	list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],  				&sbi->s_fc_q[FC_Q_MAIN]); -	ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); -	ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); +	if (tid >= sbi->s_fc_ineligible_tid) { +		sbi->s_fc_ineligible_tid = 0; +		ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); +	}  	if (full)  		sbi->s_fc_bytes = 0; @@ -1392,14 +1410,15 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)  		if (state->fc_modified_inodes[i] == ino)  			return 0;  	if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) { -		state->fc_modified_inodes_size += -			EXT4_FC_REPLAY_REALLOC_INCREMENT;  		state->fc_modified_inodes = krealloc( -					state->fc_modified_inodes, sizeof(int) * -					state->fc_modified_inodes_size, -					GFP_KERNEL); +				state->fc_modified_inodes, +				sizeof(int) * (state->fc_modified_inodes_size + +				EXT4_FC_REPLAY_REALLOC_INCREMENT), +				GFP_KERNEL);  		if (!state->fc_modified_inodes)  			return -ENOMEM; +		state->fc_modified_inodes_size += +			EXT4_FC_REPLAY_REALLOC_INCREMENT;  	}  	state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;  	return 0; @@ -1431,7 +1450,9 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,  	}  	inode = NULL; -	ext4_fc_record_modified_inode(sb, ino); +	ret = ext4_fc_record_modified_inode(sb, ino); +	if (ret) +		goto out;  	raw_fc_inode = (struct ext4_inode *)  		(val + offsetof(struct ext4_fc_inode, fc_raw_inode)); @@ -1563,16 +1584,23 @@ out:  }  /* - * Record physical disk regions which are in use as per fast commit area. Our - * simple replay phase allocator excludes these regions from allocation. + * Record physical disk regions which are in use as per fast commit area, + * and used by inodes during replay phase. Our simple replay phase + * allocator excludes these regions from allocation.   */ -static int ext4_fc_record_regions(struct super_block *sb, int ino, -		ext4_lblk_t lblk, ext4_fsblk_t pblk, int len) +int ext4_fc_record_regions(struct super_block *sb, int ino, +		ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay)  {  	struct ext4_fc_replay_state *state;  	struct ext4_fc_alloc_region *region;  	state = &EXT4_SB(sb)->s_fc_replay_state; +	/* +	 * during replay phase, the fc_regions_valid may not same as +	 * fc_regions_used, update it when do new additions. +	 */ +	if (replay && state->fc_regions_used != state->fc_regions_valid) +		state->fc_regions_used = state->fc_regions_valid;  	if (state->fc_regions_used == state->fc_regions_size) {  		state->fc_regions_size +=  			EXT4_FC_REPLAY_REALLOC_INCREMENT; @@ -1590,6 +1618,9 @@ static int ext4_fc_record_regions(struct super_block *sb, int ino,  	region->pblk = pblk;  	region->len = len; +	if (replay) +		state->fc_regions_valid++; +  	return 0;  } @@ -1621,6 +1652,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,  	}  	ret = ext4_fc_record_modified_inode(sb, inode->i_ino); +	if (ret) +		goto out;  	start = le32_to_cpu(ex->ee_block);  	start_pblk = ext4_ext_pblock(ex); @@ -1638,18 +1671,14 @@ static int ext4_fc_replay_add_range(struct super_block *sb,  		map.m_pblk = 0;  		ret = ext4_map_blocks(NULL, inode, &map, 0); -		if (ret < 0) { -			iput(inode); -			return 0; -		} +		if (ret < 0) +			goto out;  		if (ret == 0) {  			/* Range is not mapped */  			path = ext4_find_extent(inode, cur, NULL, 0); -			if (IS_ERR(path)) { -				iput(inode); -				return 0; -			} +			if (IS_ERR(path)) +				goto out;  			memset(&newex, 0, sizeof(newex));  			newex.ee_block = cpu_to_le32(cur);  			ext4_ext_store_pblock( @@ -1663,10 +1692,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,  			up_write((&EXT4_I(inode)->i_data_sem));  			ext4_ext_drop_refs(path);  			kfree(path); -			if (ret) { -				iput(inode); -				return 0; -			} +			if (ret) +				goto out;  			goto next;  		} @@ -1679,10 +1706,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,  			ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,  					ext4_ext_is_unwritten(ex),  					start_pblk + cur - start); -			if (ret) { -				iput(inode); -				return 0; -			} +			if (ret) +				goto out;  			/*  			 * Mark the old blocks as free since they aren't used  			 * anymore. We maintain an array of all the modified @@ -1702,10 +1727,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,  			ext4_ext_is_unwritten(ex), map.m_pblk);  		ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,  					ext4_ext_is_unwritten(ex), map.m_pblk); -		if (ret) { -			iput(inode); -			return 0; -		} +		if (ret) +			goto out;  		/*  		 * We may have split the extent tree while toggling the state.  		 * Try to shrink the extent tree now. @@ -1717,6 +1740,7 @@ next:  	}  	ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>  					sb->s_blocksize_bits); +out:  	iput(inode);  	return 0;  } @@ -1746,6 +1770,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,  	}  	ret = ext4_fc_record_modified_inode(sb, inode->i_ino); +	if (ret) +		goto out;  	jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n",  			inode->i_ino, le32_to_cpu(lrange.fc_lblk), @@ -1755,10 +1781,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,  		map.m_len = remaining;  		ret = ext4_map_blocks(NULL, inode, &map, 0); -		if (ret < 0) { -			iput(inode); -			return 0; -		} +		if (ret < 0) +			goto out;  		if (ret > 0) {  			remaining -= ret;  			cur += ret; @@ -1770,18 +1794,17 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,  	}  	down_write(&EXT4_I(inode)->i_data_sem); -	ret = ext4_ext_remove_space(inode, lrange.fc_lblk, -				lrange.fc_lblk + lrange.fc_len - 1); +	ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk), +				le32_to_cpu(lrange.fc_lblk) + +				le32_to_cpu(lrange.fc_len) - 1);  	up_write(&EXT4_I(inode)->i_data_sem); -	if (ret) { -		iput(inode); -		return 0; -	} +	if (ret) +		goto out;  	ext4_ext_replay_shrink_inode(inode,  		i_size_read(inode) >> sb->s_blocksize_bits);  	ext4_mark_inode_dirty(NULL, inode); +out:  	iput(inode); -  	return 0;  } @@ -1937,7 +1960,7 @@ static int ext4_fc_replay_scan(journal_t *journal,  			ret = ext4_fc_record_regions(sb,  				le32_to_cpu(ext.fc_ino),  				le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), -				ext4_ext_get_actual_len(ex)); +				ext4_ext_get_actual_len(ex), 0);  			if (ret < 0)  				break;  			ret = JBD2_FC_REPLAY_CONTINUE;  | 
