From 746e2403927efbd7c7f2e796314e3cfb3cfabaa4 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Tue, 20 Dec 2016 11:11:35 +0800 Subject: f2fs: add a case of no need to read a page in write begin If the range we write cover the whole valid data in the last page, we do not need to read it. Signed-off-by: Yunlei He [Jaegeuk Kim: nullify the remaining area (fix: xfstests/f2fs/001)] Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9ac262564fa6..2c5df1dc1479 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1715,6 +1715,11 @@ repeat: if (len == PAGE_SIZE || PageUptodate(page)) return 0; + if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) { + zero_user_segment(page, len, PAGE_SIZE); + return 0; + } + if (blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_SIZE); SetPageUptodate(page); @@ -1768,7 +1773,7 @@ static int f2fs_write_end(struct file *file, * let generic_perform_write() try to copy data again through copied=0. */ if (!PageUptodate(page)) { - if (unlikely(copied != PAGE_SIZE)) + if (unlikely(copied != len)) copied = 0; else SetPageUptodate(page); -- cgit v1.2.3-70-g09d2 From 554b5125f5cfca6653461fd52bad24d4ef35ec29 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 21 Dec 2016 12:13:03 -0800 Subject: f2fs: add submit_bio tracepoint This patch adds final submit_bio() tracepoint. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 12 ++++++++---- include/trace/events/f2fs.h | 45 ++++++++++++++++++++++++++++++--------------- 2 files changed, 38 insertions(+), 19 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2c5df1dc1479..a06b2d187aec 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -175,6 +175,10 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, current->plug && (type == DATA || type == NODE)) blk_finish_plug(current->plug); } + if (is_read_io(bio_op(bio))) + trace_f2fs_submit_read_bio(sbi->sb, type, bio); + else + trace_f2fs_submit_write_bio(sbi->sb, type, bio); submit_bio(bio); } @@ -185,12 +189,12 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) if (!io->bio) return; + bio_set_op_attrs(io->bio, fio->op, fio->op_flags); + if (is_read_io(fio->op)) - trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio); + trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio); else - trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio); - - bio_set_op_attrs(io->bio, fio->op, fio->op_flags); + trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio); __submit_bio(io->sbi, io->bio, fio->type); io->bio = NULL; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 4c942599581b..04c527410ecc 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -784,12 +784,11 @@ DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio, TP_CONDITION(page->mapping) ); -DECLARE_EVENT_CLASS(f2fs__submit_bio, +DECLARE_EVENT_CLASS(f2fs__bio, - TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, - struct bio *bio), + TP_PROTO(struct super_block *sb, int type, struct bio *bio), - TP_ARGS(sb, fio, bio), + TP_ARGS(sb, type, bio), TP_STRUCT__entry( __field(dev_t, dev) @@ -802,9 +801,9 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio, TP_fast_assign( __entry->dev = sb->s_dev; - __entry->op = fio->op; - __entry->op_flags = fio->op_flags; - __entry->type = fio->type; + __entry->op = bio_op(bio); + __entry->op_flags = bio->bi_opf; + __entry->type = type; __entry->sector = bio->bi_iter.bi_sector; __entry->size = bio->bi_iter.bi_size; ), @@ -817,22 +816,38 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio, __entry->size) ); -DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_write_bio, +DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_prepare_write_bio, + + TP_PROTO(struct super_block *sb, int type, struct bio *bio), + + TP_ARGS(sb, type, bio), + + TP_CONDITION(bio) +); + +DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_prepare_read_bio, + + TP_PROTO(struct super_block *sb, int type, struct bio *bio), + + TP_ARGS(sb, type, bio), + + TP_CONDITION(bio) +); + +DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_submit_read_bio, - TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, - struct bio *bio), + TP_PROTO(struct super_block *sb, int type, struct bio *bio), - TP_ARGS(sb, fio, bio), + TP_ARGS(sb, type, bio), TP_CONDITION(bio) ); -DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio, +DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_submit_write_bio, - TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio, - struct bio *bio), + TP_PROTO(struct super_block *sb, int type, struct bio *bio), - TP_ARGS(sb, fio, bio), + TP_ARGS(sb, type, bio), TP_CONDITION(bio) ); -- cgit v1.2.3-70-g09d2 From 0a595ebaaa6b53a2226d3fee2a2fd616ea5ba378 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 14 Dec 2016 10:12:56 -0800 Subject: f2fs: support IO alignment for DATA and NODE writes This patch implements IO alignment by filling dummy blocks in DATA and NODE write bios. If we can guarantee, for example, 32KB or 64KB for such the IOs, we can eliminate underlying dummy page problem which FTL conducts in order to close MLC or TLC partial written pages. Note that, - it requires "-o mode=lfs". - IO size should be power of 2, not exceed BIO_MAX_PAGES, 256. - read IO is still 4KB. - do checkpoint at fsync, if dummy NODE page was written. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++-- fs/f2fs/f2fs.h | 4 +++- fs/f2fs/segment.c | 9 ++++++-- fs/f2fs/segment.h | 3 +++ fs/f2fs/super.c | 13 +++++++++++- include/linux/f2fs_fs.h | 6 ++++++ 6 files changed, 84 insertions(+), 6 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a06b2d187aec..12d235f2c771 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -93,6 +93,17 @@ static void f2fs_write_end_io(struct bio *bio) struct page *page = bvec->bv_page; enum count_type type = WB_DATA_TYPE(page); + if (IS_DUMMY_WRITTEN_PAGE(page)) { + set_page_private(page, (unsigned long)NULL); + ClearPagePrivate(page); + unlock_page(page); + mempool_free(page, sbi->write_io_dummy); + + if (unlikely(bio->bi_error)) + f2fs_stop_checkpoint(sbi, true); + continue; + } + fscrypt_pullback_bio_page(&page, true); if (unlikely(bio->bi_error)) { @@ -171,10 +182,42 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, struct bio *bio, enum page_type type) { if (!is_read_io(bio_op(bio))) { + unsigned int start; + if (f2fs_sb_mounted_blkzoned(sbi->sb) && current->plug && (type == DATA || type == NODE)) blk_finish_plug(current->plug); + + if (type != DATA && type != NODE) + goto submit_io; + + start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS; + start %= F2FS_IO_SIZE(sbi); + + if (start == 0) + goto submit_io; + + /* fill dummy pages */ + for (; start < F2FS_IO_SIZE(sbi); start++) { + struct page *page = + mempool_alloc(sbi->write_io_dummy, + GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL); + f2fs_bug_on(sbi, !page); + + SetPagePrivate(page); + set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE); + lock_page(page); + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) + f2fs_bug_on(sbi, 1); + } + /* + * In the NODE case, we lose next block address chain. So, we + * need to do checkpoint in f2fs_sync_file. + */ + if (type == NODE) + set_sbi_flag(sbi, SBI_NEED_CP); } +submit_io: if (is_read_io(bio_op(bio))) trace_f2fs_submit_read_bio(sbi->sb, type, bio); else @@ -319,13 +362,14 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) return 0; } -void f2fs_submit_page_mbio(struct f2fs_io_info *fio) +int f2fs_submit_page_mbio(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io; bool is_read = is_read_io(fio->op); struct page *bio_page; + int err = 0; io = is_read ? &sbi->read_io : &sbi->write_io[btype]; @@ -346,6 +390,12 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { + if ((fio->type == DATA || fio->type == NODE) && + fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { + err = -EAGAIN; + dec_page_count(sbi, WB_DATA_TYPE(bio_page)); + goto out_fail; + } io->bio = __bio_alloc(sbi, fio->new_blkaddr, BIO_MAX_PAGES, is_read); io->fio = *fio; @@ -359,9 +409,10 @@ alloc_new: io->last_block_in_bio = fio->new_blkaddr; f2fs_trace_ios(fio, 0); - +out_fail: up_write(&io->io_rwsem); trace_f2fs_submit_page_mbio(fio->page, fio); + return err; } static void __set_data_blkaddr(struct dnode_of_data *dn) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2da8c3aa0ce5..0d7eaddef4a0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -792,6 +792,8 @@ struct f2fs_sb_info { struct f2fs_bio_info read_io; /* for read bios */ struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ struct mutex wio_mutex[NODE + 1]; /* bio ordering for NODE/DATA */ + int write_io_size_bits; /* Write IO size bits */ + mempool_t *write_io_dummy; /* Dummy pages */ /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ @@ -2174,7 +2176,7 @@ void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *, struct inode *, struct page *, nid_t, enum page_type, int); void f2fs_flush_merged_bios(struct f2fs_sb_info *); int f2fs_submit_page_bio(struct f2fs_io_info *); -void f2fs_submit_page_mbio(struct f2fs_io_info *); +int f2fs_submit_page_mbio(struct f2fs_io_info *); struct block_device *f2fs_target_device(struct f2fs_sb_info *, block_t, struct bio *); int f2fs_target_device_index(struct f2fs_sb_info *, block_t); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b6bb6490a640..2e8d12e9cae9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1604,15 +1604,20 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { int type = __get_segment_type(fio->page, fio->type); + int err; if (fio->type == NODE || fio->type == DATA) mutex_lock(&fio->sbi->wio_mutex[fio->type]); - +reallocate: allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, &fio->new_blkaddr, sum, type); /* writeout dirty page into bdev */ - f2fs_submit_page_mbio(fio); + err = f2fs_submit_page_mbio(fio); + if (err == -EAGAIN) { + fio->old_blkaddr = fio->new_blkaddr; + goto reallocate; + } if (fio->type == NODE || fio->type == DATA) mutex_unlock(&fio->sbi->wio_mutex[fio->type]); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 9d44ce83acb2..08f1455c812c 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -186,9 +186,12 @@ struct segment_allocation { * the page is atomically written, and it is in inmem_pages list. */ #define ATOMIC_WRITTEN_PAGE ((unsigned long)-1) +#define DUMMY_WRITTEN_PAGE ((unsigned long)-2) #define IS_ATOMIC_WRITTEN_PAGE(page) \ (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE) +#define IS_DUMMY_WRITTEN_PAGE(page) \ + (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE) struct inmem_pages { struct list_head list; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 46fd30d8af77..00fc36e49368 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1763,6 +1763,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) FDEV(i).total_segments, FDEV(i).start_blk, FDEV(i).end_blk); } + f2fs_msg(sbi->sb, KERN_INFO, + "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi)); return 0; } @@ -1880,12 +1882,19 @@ try_onemore: if (err) goto free_options; + if (F2FS_IO_SIZE(sbi) > 1) { + sbi->write_io_dummy = + mempool_create_page_pool(F2FS_IO_SIZE(sbi) - 1, 0); + if (!sbi->write_io_dummy) + goto free_options; + } + /* get an inode for meta space */ sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); if (IS_ERR(sbi->meta_inode)) { f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode"); err = PTR_ERR(sbi->meta_inode); - goto free_options; + goto free_io_dummy; } err = get_valid_checkpoint(sbi); @@ -2103,6 +2112,8 @@ free_devices: free_meta_inode: make_bad_inode(sbi->meta_inode); iput(sbi->meta_inode); +free_io_dummy: + mempool_destroy(sbi->write_io_dummy); free_options: destroy_percpu_info(sbi); kfree(options); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index cea41a124a80..f0748524ca8c 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -36,6 +36,12 @@ #define F2FS_NODE_INO(sbi) (sbi->node_ino_num) #define F2FS_META_INO(sbi) (sbi->meta_ino_num) +#define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */ +#define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */ +#define F2FS_IO_SIZE_BYTES(sbi) (1 << ((sbi)->write_io_size_bits + 12)) /* B */ +#define F2FS_IO_SIZE_BITS(sbi) ((sbi)->write_io_size_bits) /* power of 2 */ +#define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1) + /* This flag is used by node and meta inodes, and by recovery */ #define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) #define GFP_F2FS_HIGH_ZERO (GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM) -- cgit v1.2.3-70-g09d2 From 939afa943c5290a3b92f01612a792af17bc98115 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 7 Jan 2017 18:49:42 +0800 Subject: f2fs: clean up with list_{first, last}_entry Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 ++-- fs/f2fs/data.c | 4 ++-- fs/f2fs/node.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index ccea40763d9d..fb8cdfcaece6 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -891,7 +891,7 @@ retry: F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA)); return 0; } - fi = list_entry(head->next, struct f2fs_inode_info, dirty_list); + fi = list_first_entry(head, struct f2fs_inode_info, dirty_list); inode = igrab(&fi->vfs_inode); spin_unlock(&sbi->inode_lock[type]); if (inode) { @@ -924,7 +924,7 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi) spin_unlock(&sbi->inode_lock[DIRTY_META]); return 0; } - fi = list_entry(head->next, struct f2fs_inode_info, + fi = list_first_entry(head, struct f2fs_inode_info, gdirty_list); inode = igrab(&fi->vfs_inode); spin_unlock(&sbi->inode_lock[DIRTY_META]); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 12d235f2c771..ab2008dea921 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1143,7 +1143,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, prefetchw(&page->flags); if (pages) { - page = list_entry(pages->prev, struct page, lru); + page = list_last_entry(pages, struct page, lru); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, page->index, @@ -1262,7 +1262,7 @@ static int f2fs_read_data_pages(struct file *file, struct list_head *pages, unsigned nr_pages) { struct inode *inode = file->f_mapping->host; - struct page *page = list_entry(pages->prev, struct page, lru); + struct page *page = list_last_entry(pages, struct page, lru); trace_f2fs_readpages(inode, page, nr_pages); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index e7997e240366..9278b21ee073 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -174,7 +174,7 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) spin_unlock(&nm_i->nid_list_lock); return; } - fnid = list_entry(nm_i->nid_list[FREE_NID_LIST].next, + fnid = list_first_entry(&nm_i->nid_list[FREE_NID_LIST], struct free_nid, list); *nid = fnid->nid; spin_unlock(&nm_i->nid_list_lock); -- cgit v1.2.3-70-g09d2 From 5fe457430e554a2f5188f13c1a2e36ad845640c5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 7 Jan 2017 18:50:26 +0800 Subject: f2fs: introduce FI_ATOMIC_COMMIT This patch introduces a new flag to indicate inode status of doing atomic write committing, so that, we can keep atomic write status for inode during atomic committing, then we can skip GCing pages of atomic write inode, that avoids random GCed datas being mixed with current transaction, so isolation of transaction can be kept. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 6 ++++++ fs/f2fs/file.c | 11 ++++++----- fs/f2fs/gc.c | 6 ++++++ fs/f2fs/segment.c | 10 +++++++--- 5 files changed, 26 insertions(+), 9 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ab2008dea921..848d110dc1ca 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1977,7 +1977,7 @@ static int f2fs_set_data_page_dirty(struct page *page) if (!PageUptodate(page)) SetPageUptodate(page); - if (f2fs_is_atomic_file(inode)) { + if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) { if (!IS_ATOMIC_WRITTEN_PAGE(page)) { register_inmem_page(inode, page); return 1; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a4e8e6278a17..94250a69762a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1639,6 +1639,7 @@ enum { FI_UPDATE_WRITE, /* inode has in-place-update data */ FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ + FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */ FI_VOLATILE_FILE, /* indicate volatile file */ FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ FI_DROP_CACHE, /* drop dirty page cache */ @@ -1828,6 +1829,11 @@ static inline bool f2fs_is_atomic_file(struct inode *inode) return is_inode_flag_set(inode, FI_ATOMIC_FILE); } +static inline bool f2fs_is_commit_atomic_write(struct inode *inode) +{ + return is_inode_flag_set(inode, FI_ATOMIC_COMMIT); +} + static inline bool f2fs_is_volatile_file(struct inode *inode) { return is_inode_flag_set(inode, FI_VOLATILE_FILE); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6c335180b9d8..e45522115b1c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1569,14 +1569,15 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) goto err_out; if (f2fs_is_atomic_file(inode)) { - clear_inode_flag(inode, FI_ATOMIC_FILE); ret = commit_inmem_pages(inode); - if (ret) { - set_inode_flag(inode, FI_ATOMIC_FILE); + if (ret) goto err_out; - } + ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); - stat_dec_atomic_write(inode); + if (!ret) { + clear_inode_flag(inode, FI_ATOMIC_FILE); + stat_dec_atomic_write(inode); + } } else { ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 88bfc3dff496..88e5e7b10ab6 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -569,6 +569,9 @@ static void move_encrypted_block(struct inode *inode, block_t bidx, if (!check_valid_map(F2FS_I_SB(inode), segno, off)) goto out; + if (f2fs_is_atomic_file(inode)) + goto out; + set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE); if (err) @@ -661,6 +664,9 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, if (!check_valid_map(F2FS_I_SB(inode), segno, off)) goto out; + if (f2fs_is_atomic_file(inode)) + goto out; + if (gc_type == BG_GC) { if (PageWriteback(page)) goto out; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8b54b1fafa70..02a8d4ee65eb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -242,12 +242,12 @@ void drop_inmem_pages(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); - clear_inode_flag(inode, FI_ATOMIC_FILE); - stat_dec_atomic_write(inode); - mutex_lock(&fi->inmem_lock); __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); mutex_unlock(&fi->inmem_lock); + + clear_inode_flag(inode, FI_ATOMIC_FILE); + stat_dec_atomic_write(inode); } static int __commit_inmem_pages(struct inode *inode, @@ -316,6 +316,8 @@ int commit_inmem_pages(struct inode *inode) f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); + set_inode_flag(inode, FI_ATOMIC_COMMIT); + mutex_lock(&fi->inmem_lock); err = __commit_inmem_pages(inode, &revoke_list); if (err) { @@ -337,6 +339,8 @@ int commit_inmem_pages(struct inode *inode) } mutex_unlock(&fi->inmem_lock); + clear_inode_flag(inode, FI_ATOMIC_COMMIT); + f2fs_unlock_op(sbi); return err; } -- cgit v1.2.3-70-g09d2 From dc91de78e5e1d44238b5dd2b57d2e8e67cbc00a1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 13 Jan 2017 13:12:29 -0800 Subject: f2fs: do not preallocate blocks which has wrong buffer Sheng Yong reports needless preallocation if write(small_buffer, large_size) is called. In that case, f2fs preallocates large_size, but vfs returns early due to small_buffer size. Let's detect it before preallocation phase in f2fs. Reported-by: Sheng Yong Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 +++++- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 8 +++++++- 3 files changed, 13 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 848d110dc1ca..2ea80215f26c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -749,6 +749,9 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) struct f2fs_map_blocks map; int err = 0; + if (is_inode_flag_set(inode, FI_NO_PREALLOC)) + return 0; + map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos); map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from)); if (map.m_len > map.m_lblk) @@ -1653,7 +1656,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, * we already allocated all the blocks, so we don't need to get * the block addresses when there is no need to fill the page. */ - if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE) + if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE && + !is_inode_flag_set(inode, FI_NO_PREALLOC)) return 0; if (f2fs_has_inline_data(inode) || diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5d9731036306..0045ef9a9ad2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1665,6 +1665,7 @@ enum { FI_INLINE_DOTS, /* indicate inline dot dentries */ FI_DO_DEFRAG, /* indicate defragment is running */ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ + FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ }; static inline void __mark_inode_dirty_flag(struct inode *inode, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e45522115b1c..9c0f469cde13 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -2258,8 +2259,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) inode_lock(inode); ret = generic_write_checks(iocb, from); if (ret > 0) { - int err = f2fs_preallocate_blocks(iocb, from); + int err; + if (iov_iter_fault_in_readable(from, iov_iter_count(from))) + set_inode_flag(inode, FI_NO_PREALLOC); + + err = f2fs_preallocate_blocks(iocb, from); if (err) { inode_unlock(inode); return err; @@ -2267,6 +2272,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) blk_start_plug(&plug); ret = __generic_file_write_iter(iocb, from); blk_finish_plug(&plug); + clear_inode_flag(inode, FI_NO_PREALLOC); } inode_unlock(inode); -- cgit v1.2.3-70-g09d2 From b86e33075ed1909d8002745b56ecf73b833db143 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Sun, 22 Jan 2017 12:21:02 +0800 Subject: f2fs: fix a dead loop in f2fs_fiemap() A dead loop can be triggered in f2fs_fiemap() using the test case as below: ... fd = open(); fallocate(fd, 0, 0, 4294967296); ioctl(fd, FS_IOC_FIEMAP, fiemap_buf); ... It's caused by an overflow in __get_data_block(): ... bh->b_size = map.m_len << inode->i_blkbits; ... map.m_len is an unsigned int, and bh->b_size is a size_t which is 64 bits on 64 bits archtecture, type conversion from an unsigned int to a size_t will result in an overflow. In the above-mentioned case, bh->b_size will be zero, and f2fs_fiemap() will call get_data_block() at block 0 again an again. Fix this by adding a force conversion before left shift. Signed-off-by: Wei Fang Acked-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2ea80215f26c..3b5f1d14cab3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -964,7 +964,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, if (!err) { map_bh(bh, inode->i_sb, map.m_pblk); bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; - bh->b_size = map.m_len << inode->i_blkbits; + bh->b_size = (u64)map.m_len << inode->i_blkbits; } return err; } -- cgit v1.2.3-70-g09d2 From f566bae8462c4a48b55bbf4e58d5b1e0a8563819 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 3 Feb 2017 17:18:00 -0800 Subject: f2fs: call internal __write_data_page directly This patch introduces __write_data_page to call it by f2fs_write_cache_pages directly.. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3b5f1d14cab3..e78286ee3cc7 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1346,7 +1346,7 @@ out_writepage: return err; } -static int f2fs_write_data_page(struct page *page, +static int __write_data_page(struct page *page, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; @@ -1448,6 +1448,12 @@ redirty_out: return err; } +static int f2fs_write_data_page(struct page *page, + struct writeback_control *wbc) +{ + return __write_data_page(page, wbc); +} + /* * This function was copied from write_cche_pages from mm/page-writeback.c. * The major change is making write step of cold data page separately from @@ -1537,7 +1543,7 @@ continue_unlock: if (!clear_page_dirty_for_io(page)) goto continue_unlock; - ret = mapping->a_ops->writepage(page, wbc); + ret = __write_data_page(page, wbc); if (unlikely(ret)) { /* * keep nr_to_write, since vfs uses this to -- cgit v1.2.3-70-g09d2 From d68f735b3bc934a7523a047aa952a577cf6ca171 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 3 Feb 2017 17:44:04 -0800 Subject: f2fs: check io submission more precisely This patch check IO submission more precisely than previous rough check. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 23 +++++++++++++++++------ fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 27 +++++++++++++++++++-------- 3 files changed, 37 insertions(+), 14 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e78286ee3cc7..e0b74b9ea388 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -379,6 +379,9 @@ int f2fs_submit_page_mbio(struct f2fs_io_info *fio) bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; + /* set submitted = 1 as a return value */ + fio->submitted = 1; + if (!is_read) inc_page_count(sbi, WB_DATA_TYPE(bio_page)); @@ -1346,8 +1349,8 @@ out_writepage: return err; } -static int __write_data_page(struct page *page, - struct writeback_control *wbc) +static int __write_data_page(struct page *page, bool *submitted, + struct writeback_control *wbc) { struct inode *inode = page->mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -1365,6 +1368,7 @@ static int __write_data_page(struct page *page, .op_flags = wbc_to_write_flags(wbc), .page = page, .encrypted_page = NULL, + .submitted = false, }; trace_f2fs_writepage(page, DATA); @@ -1430,13 +1434,19 @@ out: if (wbc->for_reclaim) { f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, DATA, WRITE); remove_dirty_inode(inode); + submitted = NULL; } unlock_page(page); f2fs_balance_fs(sbi, need_balance_fs); - if (unlikely(f2fs_cp_error(sbi))) + if (unlikely(f2fs_cp_error(sbi))) { f2fs_submit_merged_bio(sbi, DATA, WRITE); + submitted = NULL; + } + + if (submitted) + *submitted = fio.submitted; return 0; @@ -1451,7 +1461,7 @@ redirty_out: static int f2fs_write_data_page(struct page *page, struct writeback_control *wbc) { - return __write_data_page(page, wbc); + return __write_data_page(page, NULL, wbc); } /* @@ -1510,6 +1520,7 @@ retry: for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; + bool submitted = false; if (page->index > end) { done = 1; @@ -1543,7 +1554,7 @@ continue_unlock: if (!clear_page_dirty_for_io(page)) goto continue_unlock; - ret = __write_data_page(page, wbc); + ret = __write_data_page(page, &submitted, wbc); if (unlikely(ret)) { /* * keep nr_to_write, since vfs uses this to @@ -1557,7 +1568,7 @@ continue_unlock: done_index = page->index + 1; done = 1; break; - } else { + } else if (submitted) { nwritten++; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 59a32e4b02f4..77ae2a8f15ce 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -728,6 +728,7 @@ struct f2fs_io_info { block_t old_blkaddr; /* old block address before Cow */ struct page *page; /* page to be written */ struct page *encrypted_page; /* encrypted page */ + bool submitted; /* indicate IO submission */ }; #define is_read_io(rw) (rw == READ) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6a86f398aeac..5bd05e552d19 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1318,7 +1318,7 @@ continue_unlock: return last_page; } -static int __write_node_page(struct page *page, bool atomic, +static int __write_node_page(struct page *page, bool atomic, bool *submitted, struct writeback_control *wbc) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); @@ -1331,6 +1331,7 @@ static int __write_node_page(struct page *page, bool atomic, .op_flags = wbc_to_write_flags(wbc), .page = page, .encrypted_page = NULL, + .submitted = false, }; trace_f2fs_writepage(page, NODE); @@ -1372,13 +1373,19 @@ static int __write_node_page(struct page *page, bool atomic, dec_page_count(sbi, F2FS_DIRTY_NODES); up_read(&sbi->node_write); - if (wbc->for_reclaim) + if (wbc->for_reclaim) { f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, NODE, WRITE); + submitted = NULL; + } unlock_page(page); - if (unlikely(f2fs_cp_error(sbi))) + if (unlikely(f2fs_cp_error(sbi))) { f2fs_submit_merged_bio(sbi, NODE, WRITE); + submitted = NULL; + } + if (submitted) + *submitted = fio.submitted; return 0; @@ -1390,7 +1397,7 @@ redirty_out: static int f2fs_write_node_page(struct page *page, struct writeback_control *wbc) { - return __write_node_page(page, false, wbc); + return __write_node_page(page, false, NULL, wbc); } int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, @@ -1424,6 +1431,7 @@ retry: for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; + bool submitted = false; if (unlikely(f2fs_cp_error(sbi))) { f2fs_put_page(last_page, 0); @@ -1473,12 +1481,13 @@ continue_unlock: goto continue_unlock; ret = __write_node_page(page, atomic && - page == last_page, wbc); + page == last_page, + &submitted, wbc); if (ret) { unlock_page(page); f2fs_put_page(last_page, 0); break; - } else { + } else if (submitted) { nwritten++; } @@ -1534,6 +1543,7 @@ next_step: for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; + bool submitted = false; if (unlikely(f2fs_cp_error(sbi))) { pagevec_release(&pvec); @@ -1587,9 +1597,10 @@ continue_unlock: set_fsync_mark(page, 0); set_dentry_mark(page, 0); - if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc)) + ret = __write_node_page(page, false, &submitted, wbc); + if (ret) unlock_page(page); - else + else if (submitted) nwritten++; if (--wbc->nr_to_write == 0) -- cgit v1.2.3-70-g09d2 From 942fd3192f83cef54bc0d485937fd5382ac5acd0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 1 Feb 2017 16:51:22 -0800 Subject: f2fs: check last page index in cached bio to decide submission If the cached bio has the last page's index, then we need to submit it. Otherwise, we don't need to submit it and can wait for further IO merges. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 ++- fs/f2fs/data.c | 43 ++++++++++++++++++++++--------------------- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/node.c | 12 +++++++----- fs/f2fs/segment.c | 13 +++++++------ 5 files changed, 40 insertions(+), 35 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index fb8cdfcaece6..e6e42a4b1344 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -249,7 +249,8 @@ static int f2fs_write_meta_page(struct page *page, dec_page_count(sbi, F2FS_DIRTY_META); if (wbc->for_reclaim) - f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE); + f2fs_submit_merged_bio_cond(sbi, page->mapping->host, + 0, page->index, META, WRITE); unlock_page(page); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e0b74b9ea388..d14cc8be14f1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -243,8 +243,8 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) io->bio = NULL; } -static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, - struct page *page, nid_t ino) +static bool __has_merged_page(struct f2fs_bio_info *io, + struct inode *inode, nid_t ino, pgoff_t idx) { struct bio_vec *bvec; struct page *target; @@ -253,7 +253,7 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, if (!io->bio) return false; - if (!inode && !page && !ino) + if (!inode && !ino) return true; bio_for_each_segment_all(bvec, io->bio, i) { @@ -263,10 +263,11 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, else target = fscrypt_control_page(bvec->bv_page); + if (idx != target->index) + continue; + if (inode && inode == target->mapping->host) return true; - if (page && page == target) - return true; if (ino && ino == ino_of_node(target)) return true; } @@ -275,22 +276,21 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, } static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode, - struct page *page, nid_t ino, - enum page_type type) + nid_t ino, pgoff_t idx, enum page_type type) { enum page_type btype = PAGE_TYPE_OF_BIO(type); struct f2fs_bio_info *io = &sbi->write_io[btype]; bool ret; down_read(&io->io_rwsem); - ret = __has_merged_page(io, inode, page, ino); + ret = __has_merged_page(io, inode, ino, idx); up_read(&io->io_rwsem); return ret; } static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, - struct inode *inode, struct page *page, - nid_t ino, enum page_type type, int rw) + struct inode *inode, nid_t ino, pgoff_t idx, + enum page_type type, int rw) { enum page_type btype = PAGE_TYPE_OF_BIO(type); struct f2fs_bio_info *io; @@ -299,7 +299,7 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, down_write(&io->io_rwsem); - if (!__has_merged_page(io, inode, page, ino)) + if (!__has_merged_page(io, inode, ino, idx)) goto out; /* change META to META_FLUSH in the checkpoint procedure */ @@ -318,15 +318,15 @@ out: void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type, int rw) { - __f2fs_submit_merged_bio(sbi, NULL, NULL, 0, type, rw); + __f2fs_submit_merged_bio(sbi, NULL, 0, 0, type, rw); } void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi, - struct inode *inode, struct page *page, - nid_t ino, enum page_type type, int rw) + struct inode *inode, nid_t ino, pgoff_t idx, + enum page_type type, int rw) { - if (has_merged_page(sbi, inode, page, ino, type)) - __f2fs_submit_merged_bio(sbi, inode, page, ino, type, rw); + if (has_merged_page(sbi, inode, ino, idx, type)) + __f2fs_submit_merged_bio(sbi, inode, ino, idx, type, rw); } void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi) @@ -1432,7 +1432,8 @@ out: ClearPageUptodate(page); if (wbc->for_reclaim) { - f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, DATA, WRITE); + f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index, + DATA, WRITE); remove_dirty_inode(inode); submitted = NULL; } @@ -1480,10 +1481,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping, pgoff_t index; pgoff_t end; /* Inclusive */ pgoff_t done_index; + pgoff_t last_idx = ULONG_MAX; int cycled; int range_whole = 0; int tag; - int nwritten = 0; pagevec_init(&pvec, 0); @@ -1569,7 +1570,7 @@ continue_unlock: done = 1; break; } else if (submitted) { - nwritten++; + last_idx = page->index; } if (--wbc->nr_to_write <= 0 && @@ -1591,9 +1592,9 @@ continue_unlock: if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index; - if (nwritten) + if (last_idx != ULONG_MAX) f2fs_submit_merged_bio_cond(F2FS_M_SB(mapping), mapping->host, - NULL, 0, DATA, WRITE); + 0, last_idx, DATA, WRITE); return ret; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 77ae2a8f15ce..b8070f9e8285 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2233,8 +2233,8 @@ void destroy_checkpoint_caches(void); void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type, int rw); void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi, - struct inode *inode, struct page *page, - nid_t ino, enum page_type type, int rw); + struct inode *inode, nid_t ino, pgoff_t idx, + enum page_type type, int rw); void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi); int f2fs_submit_page_bio(struct f2fs_io_info *fio); int f2fs_submit_page_mbio(struct f2fs_io_info *fio); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 5bd05e552d19..8203a2f8b350 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1374,7 +1374,8 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, up_read(&sbi->node_write); if (wbc->for_reclaim) { - f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, NODE, WRITE); + f2fs_submit_merged_bio_cond(sbi, page->mapping->host, 0, + page->index, NODE, WRITE); submitted = NULL; } @@ -1404,12 +1405,12 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic) { pgoff_t index, end; + pgoff_t last_idx = ULONG_MAX; struct pagevec pvec; int ret = 0; struct page *last_page = NULL; bool marked = false; nid_t ino = inode->i_ino; - int nwritten = 0; if (atomic) { last_page = last_fsync_dnode(sbi, ino); @@ -1488,7 +1489,7 @@ continue_unlock: f2fs_put_page(last_page, 0); break; } else if (submitted) { - nwritten++; + last_idx = page->index; } if (page == last_page) { @@ -1514,8 +1515,9 @@ continue_unlock: goto retry; } out: - if (nwritten) - f2fs_submit_merged_bio_cond(sbi, NULL, NULL, ino, NODE, WRITE); + if (last_idx != ULONG_MAX) + f2fs_submit_merged_bio_cond(sbi, NULL, ino, last_idx, + NODE, WRITE); return ret ? -EIO: 0; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2f6d7370904c..b2e0769a09d0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -263,7 +263,7 @@ static int __commit_inmem_pages(struct inode *inode, .op_flags = REQ_SYNC | REQ_PRIO, .encrypted_page = NULL, }; - bool submit_bio = false; + pgoff_t last_idx = ULONG_MAX; int err = 0; list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { @@ -289,15 +289,15 @@ static int __commit_inmem_pages(struct inode *inode, /* record old blkaddr for revoking */ cur->old_addr = fio.old_blkaddr; - - submit_bio = true; + last_idx = page->index; } unlock_page(page); list_move_tail(&cur->list, revoke_list); } - if (submit_bio) - f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE); + if (last_idx != ULONG_MAX) + f2fs_submit_merged_bio_cond(sbi, inode, 0, last_idx, + DATA, WRITE); if (!err) __revoke_inmem_pages(inode, revoke_list, false, false); @@ -1932,7 +1932,8 @@ void f2fs_wait_on_page_writeback(struct page *page, if (PageWriteback(page)) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); - f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, type, WRITE); + f2fs_submit_merged_bio_cond(sbi, page->mapping->host, + 0, page->index, type, WRITE); if (ordered) wait_on_page_writeback(page); else -- cgit v1.2.3-70-g09d2 From 7f54f51f46f69f3ccde2b65e682a51b0aa6199c3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 6 Feb 2017 13:57:58 -0800 Subject: f2fs: remove preflush for nobarrier case This patch removes REQ_PREFLUSH in the nobarrier case. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d14cc8be14f1..8c61fa7fd27d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -306,9 +306,9 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, if (type >= META_FLUSH) { io->fio.type = META_FLUSH; io->fio.op = REQ_OP_WRITE; - io->fio.op_flags = REQ_PREFLUSH | REQ_META | REQ_PRIO; + io->fio.op_flags = REQ_META | REQ_PRIO; if (!test_opt(sbi, NOBARRIER)) - io->fio.op_flags |= REQ_FUA; + io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA; } __submit_merged_bio(io); out: -- cgit v1.2.3-70-g09d2 From 86d54795c94532075d862aa0a79f0c981dab4bdd Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 17 Feb 2017 09:55:55 -0800 Subject: f2fs: do not wait for writeback in write_begin Otherwise we can get livelock like below. [79880.428136] dbench D 0 18405 18404 0x00000000 [79880.428139] Call Trace: [79880.428142] __schedule+0x219/0x6b0 [79880.428144] schedule+0x36/0x80 [79880.428147] schedule_timeout+0x243/0x2e0 [79880.428152] ? update_sd_lb_stats+0x16b/0x5f0 [79880.428155] ? ktime_get+0x3c/0xb0 [79880.428157] io_schedule_timeout+0xa6/0x110 [79880.428161] __lock_page+0xf7/0x130 [79880.428164] ? unlock_page+0x30/0x30 [79880.428167] pagecache_get_page+0x16b/0x250 [79880.428171] grab_cache_page_write_begin+0x20/0x40 [79880.428182] f2fs_write_begin+0xa2/0xdb0 [f2fs] [79880.428192] ? f2fs_mark_inode_dirty_sync+0x16/0x30 [f2fs] [79880.428197] ? kmem_cache_free+0x79/0x200 [79880.428203] ? __mark_inode_dirty+0x17f/0x360 [79880.428206] generic_perform_write+0xbb/0x190 [79880.428213] ? file_update_time+0xa4/0xf0 [79880.428217] __generic_file_write_iter+0x19b/0x1e0 [79880.428226] f2fs_file_write_iter+0x9c/0x180 [f2fs] [79880.428231] __vfs_write+0xc5/0x140 [79880.428235] vfs_write+0xb2/0x1b0 [79880.428238] SyS_write+0x46/0xa0 [79880.428242] entry_SYSCALL_64_fastpath+0x1e/0xad Fixes: cae96a5c8ab6 ("f2fs: check io submission more precisely") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8c61fa7fd27d..5f3bc98a387d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1759,7 +1759,12 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, goto fail; } repeat: - page = grab_cache_page_write_begin(mapping, index, flags); + /* + * Do not use grab_cache_page_write_begin() to avoid deadlock due to + * wait_for_stable_page. Will wait that below with our IO control. + */ + page = pagecache_get_page(mapping, index, + FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS); if (!page) { err = -ENOMEM; goto fail; -- cgit v1.2.3-70-g09d2 From e15882b6c6caff427fe387e878e2f23de58c053b Mon Sep 17 00:00:00 2001 From: Hou Pengyang Date: Thu, 23 Feb 2017 09:18:05 +0000 Subject: f2fs: init local extent_info to avoid stale stack info in tp To avoid such stale(fops, blk, len) info in f2fs_lookup_extent_tree_end tp dio-23095 [005] ...1 17878.856859: f2fs_lookup_extent_tree_end: dev = (259,30), ino = 856, pgofs = 0, ext_info(fofs: 3441207040, blk: 4294967232, len: 3481143808) Signed-off-by: Hou Pengyang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 ++++---- fs/f2fs/file.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5f3bc98a387d..f72493d8c8e4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -511,7 +511,7 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) { - struct extent_info ei; + struct extent_info ei = {0,0,0}; struct inode *inode = dn->inode; if (f2fs_lookup_extent_cache(inode, index, &ei)) { @@ -528,7 +528,7 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; struct page *page; - struct extent_info ei; + struct extent_info ei = {0,0,0}; int err; struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), @@ -803,7 +803,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int err = 0, ofs = 1; unsigned int ofs_in_node, last_ofs_in_node; blkcnt_t prealloc; - struct extent_info ei; + struct extent_info ei = {0,0,0}; block_t blkaddr; if (!maxblocks) @@ -1667,7 +1667,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, struct dnode_of_data dn; struct page *ipage; bool locked = false; - struct extent_info ei; + struct extent_info ei = {0,0,0}; int err = 0; /* diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 120164815030..36c156557bb1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1877,7 +1877,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, { struct inode *inode = file_inode(filp); struct f2fs_map_blocks map = { .m_next_pgofs = NULL }; - struct extent_info ei; + struct extent_info ei = {0,0,0}; pgoff_t pg_start, pg_end; unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; -- cgit v1.2.3-70-g09d2 From 3f2be04304cf10f2ef074399f8dd565bd00ddcae Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Thu, 23 Feb 2017 19:55:05 +0800 Subject: f2fs: avoid m_flags overlay when allocating more data blocks When more than one data blocks are allocated, the F2FS_MAP_UNWRITTEN/MAPPED flags will be overlapped by F2FS_MAP_NEW at the later times. Signed-off-by: Kinglong Mee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f72493d8c8e4..80f9863dc4b0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -867,7 +867,7 @@ next_block: } if (err) goto sync_out; - map->m_flags = F2FS_MAP_NEW; + map->m_flags |= F2FS_MAP_NEW; blkaddr = dn.data_blkaddr; } else { if (flag == F2FS_GET_BLOCK_BMAP) { -- cgit v1.2.3-70-g09d2 From dd7b2333e6cd31584682382fcf0a1c1e5140b936 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 23 Feb 2017 20:31:20 +0800 Subject: f2fs: no need lock_op in f2fs_write_inline_data Similar as f2fs_write_inode, f2fs_write_inline_data just mark inode page dirty, so it's no need to write inline data under read lock of cp_rwsem. Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 80f9863dc4b0..9e51c5e40ce1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1414,9 +1414,12 @@ write: goto redirty_out; err = -EAGAIN; - f2fs_lock_op(sbi); - if (f2fs_has_inline_data(inode)) + if (f2fs_has_inline_data(inode)) { err = f2fs_write_inline_data(inode, page); + if (!err) + goto out; + } + f2fs_lock_op(sbi); if (err == -EAGAIN) err = do_write_data_page(&fio); if (F2FS_I(inode)->last_disk_size < psize) -- cgit v1.2.3-70-g09d2 From 55523519bc7227e651fd4febeb3aafdd22b8af1c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 25 Feb 2017 11:08:28 +0800 Subject: f2fs: show simple call stack in fault injection message Previously kernel message can show that in which function we do the injection, but unfortunately, most of the caller are the same, for tracking more information of injection path, it needs to show upper caller's name. This patch supports that ability. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 1 + fs/f2fs/data.c | 4 +++- fs/f2fs/dir.c | 4 +++- fs/f2fs/f2fs.h | 20 +++++++++++++------- fs/f2fs/gc.c | 4 +++- fs/f2fs/inode.c | 4 +++- fs/f2fs/node.c | 4 +++- fs/f2fs/segment.c | 4 +++- 8 files changed, 32 insertions(+), 13 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index cd7132121573..04d7c244c0f0 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -494,6 +494,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi) #ifdef CONFIG_F2FS_FAULT_INJECTION if (time_to_inject(sbi, FAULT_ORPHAN)) { spin_unlock(&im->ino_lock); + f2fs_show_injection_info(FAULT_ORPHAN); return -ENOSPC; } #endif diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9e51c5e40ce1..b0a2e3faabb2 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -55,8 +55,10 @@ static void f2fs_read_end_io(struct bio *bio) int i; #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) + if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) { + f2fs_show_injection_info(FAULT_IO); bio->bi_error = -EIO; + } #endif if (f2fs_bio_encrypted(bio)) { diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 54aa30ee028f..295a223ae11e 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -549,8 +549,10 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, start: #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) + if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) { + f2fs_show_injection_info(FAULT_DIR_DEPTH); return -ENOSPC; + } #endif if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) return -ENOSPC; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d1156cdd211e..8c0916ae0bea 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -952,6 +952,10 @@ struct f2fs_sb_info { }; #ifdef CONFIG_F2FS_FAULT_INJECTION +#define f2fs_show_injection_info(type) \ + printk("%sF2FS-fs : inject %s in %s of %pF\n", \ + KERN_INFO, fault_name[type], \ + __func__, __builtin_return_address(0)) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { struct f2fs_fault_info *ffi = &sbi->fault_info; @@ -965,10 +969,6 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) atomic_inc(&ffi->inject_ops); if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) { atomic_set(&ffi->inject_ops, 0); - printk("%sF2FS-fs : inject %s in %pF\n", - KERN_INFO, - fault_name[type], - __builtin_return_address(0)); return true; } return false; @@ -1277,8 +1277,10 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, blkcnt_t diff; #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(sbi, FAULT_BLOCK)) + if (time_to_inject(sbi, FAULT_BLOCK)) { + f2fs_show_injection_info(FAULT_BLOCK); return false; + } #endif /* * let's increase this in prior to actual block count change in order @@ -1518,8 +1520,10 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, if (page) return page; - if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) + if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) { + f2fs_show_injection_info(FAULT_PAGE_ALLOC); return NULL; + } #endif if (!for_write) return grab_cache_page(mapping, index); @@ -1995,8 +1999,10 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(sbi, FAULT_KMALLOC)) + if (time_to_inject(sbi, FAULT_KMALLOC)) { + f2fs_show_injection_info(FAULT_KMALLOC); return NULL; + } #endif return kmalloc(size, flags); } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 6c996e39b59a..8be5144da8e6 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -48,8 +48,10 @@ static int gc_thread_func(void *data) } #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(sbi, FAULT_CHECKPOINT)) + if (time_to_inject(sbi, FAULT_CHECKPOINT)) { + f2fs_show_injection_info(FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); + } #endif /* diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index af06bda51a54..24bb8213d974 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -373,8 +373,10 @@ void f2fs_evict_inode(struct inode *inode) goto no_delete; #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(sbi, FAULT_EVICT_INODE)) + if (time_to_inject(sbi, FAULT_EVICT_INODE)) { + f2fs_show_injection_info(FAULT_EVICT_INODE); goto no_delete; + } #endif remove_ino_entry(sbi, inode->i_ino, APPEND_INO); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 43d35ec11851..24ea49f98891 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1987,8 +1987,10 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct free_nid *i = NULL; retry: #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(sbi, FAULT_ALLOC_NID)) + if (time_to_inject(sbi, FAULT_ALLOC_NID)) { + f2fs_show_injection_info(FAULT_ALLOC_NID); return false; + } #endif spin_lock(&nm_i->nid_list_lock); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9eb6d89bf9e2..1bab09097590 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -352,8 +352,10 @@ int commit_inmem_pages(struct inode *inode) void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(sbi, FAULT_CHECKPOINT)) + if (time_to_inject(sbi, FAULT_CHECKPOINT)) { + f2fs_show_injection_info(FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); + } #endif if (!need) -- cgit v1.2.3-70-g09d2 From 540faedb0036aca14ff90a7f679b008442bc9dd7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 27 Feb 2017 17:10:45 +0800 Subject: f2fs: fix to update F2FS_{CP_}WB_DATA count correctly We should only account F2FS_{CP_}WB_DATA IOs for write path, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b0a2e3faabb2..1375fef11146 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -398,7 +398,8 @@ alloc_new: if ((fio->type == DATA || fio->type == NODE) && fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { err = -EAGAIN; - dec_page_count(sbi, WB_DATA_TYPE(bio_page)); + if (!is_read) + dec_page_count(sbi, WB_DATA_TYPE(bio_page)); goto out_fail; } io->bio = __bio_alloc(sbi, fio->new_blkaddr, -- cgit v1.2.3-70-g09d2