diff options
author | Andreas Gruenbacher <agruenba@redhat.com> | 2018-07-24 20:02:40 +0200 |
---|---|---|
committer | Andreas Gruenbacher <agruenba@redhat.com> | 2018-07-24 20:02:40 +0200 |
commit | a3479c7fc096a1a7a2dccbfbdc6fcf86b805711a (patch) | |
tree | 85b0044b207d05b0cf5f118f2160a8de996073ce /fs/gfs2/bmap.c | |
parent | 109dbb1e6f27fb8f80ee61953485c7c3b1717951 (diff) | |
parent | 025d0e7f73c6a9cc3ca2fe7de821792a8f3269bf (diff) |
Merge branch 'iomap-write' into linux-gfs2/for-next
Pull in the gfs2 iomap-write changes: Tweak the existing code to
properly support iomap write and eliminate an unnecessary special case
in gfs2_block_map. Implement iomap write support for buffered and
direct I/O. Simplify some of the existing code and eliminate code that
is no longer used:
gfs2: Remove gfs2_write_{begin,end}
gfs2: iomap direct I/O support
gfs2: gfs2_extent_length cleanup
gfs2: iomap buffered write support
gfs2: Further iomap cleanups
This is based on the following changes on the xfs 'iomap-4.19-merge'
branch:
iomap: add private pointer to struct iomap
iomap: add a page_done callback
iomap: generic inline data handling
iomap: complete partial direct I/O writes synchronously
iomap: mark newly allocated buffer heads as new
fs: factor out a __generic_write_end helper
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Diffstat (limited to 'fs/gfs2/bmap.c')
-rw-r--r-- | fs/gfs2/bmap.c | 398 |
1 files changed, 326 insertions, 72 deletions
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index c7287afeeef5..49a6ab919bd7 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -28,6 +28,7 @@ #include "trans.h" #include "dir.h" #include "util.h" +#include "aops.h" #include "trace_gfs2.h" /* This doesn't need to be that large as max 64 bit pointers in a 4k @@ -41,6 +42,8 @@ struct metapath { int mp_aheight; /* actual height (lookup height) */ }; +static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length); + /** * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page * @ip: the inode @@ -389,7 +392,7 @@ static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h) return mp->mp_aheight - x - 1; } -static inline void release_metapath(struct metapath *mp) +static void release_metapath(struct metapath *mp) { int i; @@ -397,27 +400,23 @@ static inline void release_metapath(struct metapath *mp) if (mp->mp_bh[i] == NULL) break; brelse(mp->mp_bh[i]); + mp->mp_bh[i] = NULL; } } /** * gfs2_extent_length - Returns length of an extent of blocks - * @start: Start of the buffer - * @len: Length of the buffer in bytes - * @ptr: Current position in the buffer - * @limit: Max extent length to return (0 = unlimited) + * @bh: The metadata block + * @ptr: Current position in @bh + * @limit: Max extent length to return * @eob: Set to 1 if we hit "end of block" * - * If the first block is zero (unallocated) it will return the number of - * unallocated blocks in the extent, otherwise it will return the number - * of contiguous blocks in the extent. - * * Returns: The length of the extent (minimum of one block) */ -static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob) +static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *ptr, size_t limit, int *eob) { - const __be64 *end = (start + len); + const __be64 *end = (__be64 *)(bh->b_data + bh->b_size); const __be64 *first = ptr; u64 d = be64_to_cpu(*ptr); @@ -426,14 +425,11 @@ static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __b ptr++; if (ptr >= end) break; - if (limit && --limit == 0) - break; - if (d) - d++; + d++; } while(be64_to_cpu(*ptr) == d); if (ptr >= end) *eob = 1; - return (ptr - first); + return ptr - first; } typedef const __be64 *(*gfs2_metadata_walker)( @@ -609,11 +605,13 @@ enum alloc_state { * ii) Indirect blocks to fill in lower part of the metadata tree * iii) Data blocks * - * The function is in two parts. The first part works out the total - * number of blocks which we need. The second part does the actual - * allocation asking for an extent at a time (if enough contiguous free - * blocks are available, there will only be one request per bmap call) - * and uses the state machine to initialise the blocks in order. + * This function is called after gfs2_iomap_get, which works out the + * total number of blocks which we need via gfs2_alloc_size. + * + * We then do the actual allocation asking for an extent at a time (if + * enough contiguous free blocks are available, there will only be one + * allocation request per call) and uses the state machine to initialise + * the blocks in order. * * Right now, this function will allocate at most one indirect block * worth of data -- with a default block size of 4K, that's slightly @@ -633,39 +631,26 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap, struct buffer_head *dibh = mp->mp_bh[0]; u64 bn; unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; - unsigned dblks = 0; - unsigned ptrs_per_blk; + size_t dblks = iomap->length >> inode->i_blkbits; const unsigned end_of_metadata = mp->mp_fheight - 1; int ret; enum alloc_state state; __be64 *ptr; __be64 zero_bn = 0; - size_t maxlen = iomap->length >> inode->i_blkbits; BUG_ON(mp->mp_aheight < 1); BUG_ON(dibh == NULL); + BUG_ON(dblks < 1); gfs2_trans_add_meta(ip->i_gl, dibh); down_write(&ip->i_rw_mutex); if (mp->mp_fheight == mp->mp_aheight) { - struct buffer_head *bh; - int eob; - - /* Bottom indirect block exists, find unalloced extent size */ - ptr = metapointer(end_of_metadata, mp); - bh = mp->mp_bh[end_of_metadata]; - dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr, - maxlen, &eob); - BUG_ON(dblks < 1); + /* Bottom indirect block exists */ state = ALLOC_DATA; } else { /* Need to allocate indirect blocks */ - ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs : - sdp->sd_diptrs; - dblks = min(maxlen, (size_t)(ptrs_per_blk - - mp->mp_list[end_of_metadata])); if (mp->mp_fheight == ip->i_height) { /* Writing into existing tree, extend tree down */ iblks = mp->mp_fheight - mp->mp_aheight; @@ -750,6 +735,7 @@ static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap, } } while (iomap->addr == IOMAP_NULL_ADDR); + iomap->type = IOMAP_MAPPED; iomap->length = (u64)dblks << inode->i_blkbits; ip->i_height = mp->mp_fheight; gfs2_add_inode_blocks(&ip->i_inode, alloced); @@ -759,18 +745,51 @@ out: return ret; } -static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap) +#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE + +/** + * gfs2_alloc_size - Compute the maximum allocation size + * @inode: The inode + * @mp: The metapath + * @size: Requested size in blocks + * + * Compute the maximum size of the next allocation at @mp. + * + * Returns: size in blocks + */ +static u64 gfs2_alloc_size(struct inode *inode, struct metapath *mp, u64 size) { struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + const __be64 *first, *ptr, *end; - iomap->addr = (ip->i_no_addr << inode->i_blkbits) + - sizeof(struct gfs2_dinode); - iomap->offset = 0; - iomap->length = i_size_read(inode); - iomap->type = IOMAP_INLINE; -} + /* + * For writes to stuffed files, this function is called twice via + * gfs2_iomap_get, before and after unstuffing. The size we return the + * first time needs to be large enough to get the reservation and + * allocation sizes right. The size we return the second time must + * be exact or else gfs2_iomap_alloc won't do the right thing. + */ -#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE + if (gfs2_is_stuffed(ip) || mp->mp_fheight != mp->mp_aheight) { + unsigned int maxsize = mp->mp_fheight > 1 ? + sdp->sd_inptrs : sdp->sd_diptrs; + maxsize -= mp->mp_list[mp->mp_fheight - 1]; + if (size > maxsize) + size = maxsize; + return size; + } + + first = metapointer(ip->i_height - 1, mp); + end = metaend(ip->i_height - 1, mp); + if (end - first > size) + end = first + size; + for (ptr = first; ptr < end; ptr++) { + if (*ptr) + break; + } + return ptr - first; +} /** * gfs2_iomap_get - Map blocks from an inode to disk blocks @@ -789,37 +808,63 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); + loff_t size = i_size_read(inode); __be64 *ptr; sector_t lblock; sector_t lblock_stop; int ret; int eob; u64 len; - struct buffer_head *bh; + struct buffer_head *dibh = NULL, *bh; u8 height; if (!length) return -EINVAL; + down_read(&ip->i_rw_mutex); + + ret = gfs2_meta_inode_buffer(ip, &dibh); + if (ret) + goto unlock; + iomap->private = dibh; + if (gfs2_is_stuffed(ip)) { - if (flags & IOMAP_REPORT) { - if (pos >= i_size_read(inode)) - return -ENOENT; - gfs2_stuffed_iomap(inode, iomap); - return 0; + if (flags & IOMAP_WRITE) { + loff_t max_size = gfs2_max_stuffed_size(ip); + + if (pos + length > max_size) + goto unstuff; + iomap->length = max_size; + } else { + if (pos >= size) { + if (flags & IOMAP_REPORT) { + ret = -ENOENT; + goto unlock; + } else { + /* report a hole */ + iomap->offset = pos; + iomap->length = length; + goto do_alloc; + } + } + iomap->length = size; } - BUG_ON(!(flags & IOMAP_WRITE)); + iomap->addr = (ip->i_no_addr << inode->i_blkbits) + + sizeof(struct gfs2_dinode); + iomap->type = IOMAP_INLINE; + iomap->inline_data = dibh->b_data + sizeof(struct gfs2_dinode); + goto out; } + +unstuff: lblock = pos >> inode->i_blkbits; iomap->offset = lblock << inode->i_blkbits; lblock_stop = (pos + length - 1) >> inode->i_blkbits; len = lblock_stop - lblock + 1; + iomap->length = len << inode->i_blkbits; - down_read(&ip->i_rw_mutex); - - ret = gfs2_meta_inode_buffer(ip, &mp->mp_bh[0]); - if (ret) - goto unlock; + get_bh(dibh); + mp->mp_bh[0] = dibh; height = ip->i_height; while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height]) @@ -840,7 +885,7 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, goto do_alloc; bh = mp->mp_bh[ip->i_height - 1]; - len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, len, &eob); + len = gfs2_extent_length(bh, ptr, len, &eob); iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits; iomap->length = len << inode->i_blkbits; @@ -853,25 +898,185 @@ out: iomap->bdev = inode->i_sb->s_bdev; unlock: up_read(&ip->i_rw_mutex); + if (ret && dibh) + brelse(dibh); return ret; do_alloc: iomap->addr = IOMAP_NULL_ADDR; - iomap->length = len << inode->i_blkbits; iomap->type = IOMAP_HOLE; - iomap->flags = 0; if (flags & IOMAP_REPORT) { - loff_t size = i_size_read(inode); if (pos >= size) ret = -ENOENT; else if (height == ip->i_height) ret = gfs2_hole_size(inode, lblock, len, mp, iomap); else iomap->length = size - pos; + } else if (flags & IOMAP_WRITE) { + u64 alloc_size; + + if (flags & IOMAP_DIRECT) + goto out; /* (see gfs2_file_direct_write) */ + + len = gfs2_alloc_size(inode, mp, len); + alloc_size = len << inode->i_blkbits; + if (alloc_size < iomap->length) + iomap->length = alloc_size; + } else { + if (pos < size && height == ip->i_height) + ret = gfs2_hole_size(inode, lblock, len, mp, iomap); } goto out; } +static int gfs2_write_lock(struct inode *inode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + int error; + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); + error = gfs2_glock_nq(&ip->i_gh); + if (error) + goto out_uninit; + if (&ip->i_inode == sdp->sd_rindex) { + struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); + + error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, + GL_NOCACHE, &m_ip->i_gh); + if (error) + goto out_unlock; + } + return 0; + +out_unlock: + gfs2_glock_dq(&ip->i_gh); +out_uninit: + gfs2_holder_uninit(&ip->i_gh); + return error; +} + +static void gfs2_write_unlock(struct inode *inode) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + + if (&ip->i_inode == sdp->sd_rindex) { + struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); + + gfs2_glock_dq_uninit(&m_ip->i_gh); + } + gfs2_glock_dq_uninit(&ip->i_gh); +} + +static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos, + unsigned copied, struct page *page, + struct iomap *iomap) +{ + struct gfs2_inode *ip = GFS2_I(inode); + + gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied); +} + +static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, + loff_t length, unsigned flags, + struct iomap *iomap) +{ + struct metapath mp = { .mp_aheight = 1, }; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + unsigned int data_blocks = 0, ind_blocks = 0, rblocks; + bool unstuff, alloc_required; + int ret; + + ret = gfs2_write_lock(inode); + if (ret) + return ret; + + unstuff = gfs2_is_stuffed(ip) && + pos + length > gfs2_max_stuffed_size(ip); + + ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); + if (ret) + goto out_release; + + alloc_required = unstuff || iomap->type == IOMAP_HOLE; + + if (alloc_required || gfs2_is_jdata(ip)) + gfs2_write_calc_reserv(ip, iomap->length, &data_blocks, + &ind_blocks); + + if (alloc_required) { + struct gfs2_alloc_parms ap = { + .target = data_blocks + ind_blocks + }; + + ret = gfs2_quota_lock_check(ip, &ap); + if (ret) + goto out_release; + + ret = gfs2_inplace_reserve(ip, &ap); + if (ret) + goto out_qunlock; + } + + rblocks = RES_DINODE + ind_blocks; + if (gfs2_is_jdata(ip)) + rblocks += data_blocks; + if (ind_blocks || data_blocks) + rblocks += RES_STATFS + RES_QUOTA; + if (inode == sdp->sd_rindex) + rblocks += 2 * RES_STATFS; + if (alloc_required) + rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); + + ret = gfs2_trans_begin(sdp, rblocks, iomap->length >> inode->i_blkbits); + if (ret) + goto out_trans_fail; + + if (unstuff) { + ret = gfs2_unstuff_dinode(ip, NULL); + if (ret) + goto out_trans_end; + release_metapath(&mp); + brelse(iomap->private); + iomap->private = NULL; + ret = gfs2_iomap_get(inode, iomap->offset, iomap->length, + flags, iomap, &mp); + if (ret) + goto out_trans_end; + } + + if (iomap->type == IOMAP_HOLE) { + ret = gfs2_iomap_alloc(inode, iomap, flags, &mp); + if (ret) { + gfs2_trans_end(sdp); + gfs2_inplace_release(ip); + punch_hole(ip, iomap->offset, iomap->length); + goto out_qunlock; + } + } + release_metapath(&mp); + if (gfs2_is_jdata(ip)) + iomap->page_done = gfs2_iomap_journaled_page_done; + return 0; + +out_trans_end: + gfs2_trans_end(sdp); +out_trans_fail: + if (alloc_required) + gfs2_inplace_release(ip); +out_qunlock: + if (alloc_required) + gfs2_quota_unlock(ip); +out_release: + if (iomap->private) + brelse(iomap->private); + release_metapath(&mp); + gfs2_write_unlock(inode); + return ret; +} + static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, unsigned flags, struct iomap *iomap) { @@ -880,21 +1085,76 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, int ret; trace_gfs2_iomap_start(ip, pos, length, flags); - if (flags & IOMAP_WRITE) { - ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); - if (!ret && iomap->type == IOMAP_HOLE) - ret = gfs2_iomap_alloc(inode, iomap, flags, &mp); - release_metapath(&mp); + if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) { + ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap); } else { ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); release_metapath(&mp); + /* + * Silently fall back to buffered I/O for stuffed files or if + * we've hot a hole (see gfs2_file_direct_write). + */ + if ((flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT) && + iomap->type != IOMAP_MAPPED) + ret = -ENOTBLK; } trace_gfs2_iomap_end(ip, iomap, ret); return ret; } +static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, + ssize_t written, unsigned flags, struct iomap *iomap) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_trans *tr = current->journal_info; + struct buffer_head *dibh = iomap->private; + + if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE) + goto out; + + if (iomap->type != IOMAP_INLINE) { + gfs2_ordered_add_inode(ip); + + if (tr->tr_num_buf_new) + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + else + gfs2_trans_add_meta(ip->i_gl, dibh); + } + + if (inode == sdp->sd_rindex) { + adjust_fs_space(inode); + sdp->sd_rindex_uptodate = 0; + } + + gfs2_trans_end(sdp); + gfs2_inplace_release(ip); + + if (length != written && (iomap->flags & IOMAP_F_NEW)) { + /* Deallocate blocks that were just allocated. */ + loff_t blockmask = i_blocksize(inode) - 1; + loff_t end = (pos + length) & ~blockmask; + + pos = (pos + written + blockmask) & ~blockmask; + if (pos < end) { + truncate_pagecache_range(inode, pos, end - 1); + punch_hole(ip, pos, end - pos); + } + } + + if (ip->i_qadata && ip->i_qadata->qa_qd_num) + gfs2_quota_unlock(ip); + gfs2_write_unlock(inode); + +out: + if (dibh) + brelse(dibh); + return 0; +} + const struct iomap_ops gfs2_iomap_ops = { .iomap_begin = gfs2_iomap_begin, + .iomap_end = gfs2_iomap_end, }; /** @@ -941,12 +1201,6 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, } else { ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp); release_metapath(&mp); - - /* Return unmapped buffer beyond the end of file. */ - if (ret == -ENOENT) { - ret = 0; - goto out; - } } if (ret) goto out; |