summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/xattr.c7
-rw-r--r--fs/afs/yfsclient.c1
-rw-r--r--fs/ceph/caps.c2
-rw-r--r--fs/ceph/mds_client.c50
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/quota.c2
-rw-r--r--fs/ceph/snap.c2
-rw-r--r--fs/gfs2/glock.c3
-rw-r--r--fs/gfs2/glops.c56
-rw-r--r--fs/gfs2/glops.h1
-rw-r--r--fs/gfs2/inode.c3
-rw-r--r--fs/gfs2/lops.c31
-rw-r--r--fs/gfs2/lops.h2
-rw-r--r--fs/gfs2/ops_fstype.c14
-rw-r--r--fs/gfs2/recovery.c2
-rw-r--r--fs/gfs2/rgrp.c5
-rw-r--r--fs/gfs2/super.c1
-rw-r--r--fs/io-wq.c4
-rw-r--r--fs/io_uring.c183
-rw-r--r--fs/iomap/buffered-io.c30
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/proc/cpuinfo.c2
-rw-r--r--fs/proc/generic.c4
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/stat.c2
-rw-r--r--fs/seq_file.c45
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c1
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h2
-rw-r--r--fs/xfs/scrub/inode.c3
-rw-r--r--fs/xfs/xfs_aops.c20
-rw-r--r--fs/xfs/xfs_iops.c10
-rw-r--r--fs/xfs/xfs_reflink.c3
32 files changed, 328 insertions, 168 deletions
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index 38884d6c57cd..95c573dcda11 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -148,11 +148,6 @@ static const struct xattr_handler afs_xattr_afs_acl_handler = {
.set = afs_xattr_set_acl,
};
-static void yfs_acl_put(struct afs_operation *op)
-{
- yfs_free_opaque_acl(op->yacl);
-}
-
static const struct afs_operation_ops yfs_fetch_opaque_acl_operation = {
.issue_yfs_rpc = yfs_fs_fetch_opaque_acl,
.success = afs_acl_success,
@@ -246,7 +241,7 @@ error:
static const struct afs_operation_ops yfs_store_opaque_acl2_operation = {
.issue_yfs_rpc = yfs_fs_store_opaque_acl2,
.success = afs_acl_success,
- .put = yfs_acl_put,
+ .put = afs_acl_put,
};
/*
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 3b1239b7e90d..bd787e71a657 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -1990,6 +1990,7 @@ void yfs_fs_store_opaque_acl2(struct afs_operation *op)
memcpy(bp, acl->data, acl->size);
if (acl->size != size)
memset((void *)bp + acl->size, 0, size - acl->size);
+ bp += size / sizeof(__be32);
yfs_check_req(call, bp);
trace_afs_make_fs_call(call, &vp->fid);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 5027bbdca419..ded4229c314a 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4074,7 +4074,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
vino.snap, inode);
mutex_lock(&session->s_mutex);
- session->s_seq++;
+ inc_session_sequence(session);
dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
(unsigned)seq);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 08f1c0c31dc2..8f1d7500a7ec 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4231,7 +4231,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
dname.len, dname.name);
mutex_lock(&session->s_mutex);
- session->s_seq++;
+ inc_session_sequence(session);
if (!inode) {
dout("handle_lease no inode %llx\n", vino.ino);
@@ -4385,29 +4385,49 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
bool check_session_state(struct ceph_mds_session *s)
{
- if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
- dout("resending session close request for mds%d\n",
- s->s_mds);
- request_close_session(s);
- return false;
- }
- if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
- if (s->s_state == CEPH_MDS_SESSION_OPEN) {
+ switch (s->s_state) {
+ case CEPH_MDS_SESSION_OPEN:
+ if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
s->s_state = CEPH_MDS_SESSION_HUNG;
pr_info("mds%d hung\n", s->s_mds);
}
- }
- if (s->s_state == CEPH_MDS_SESSION_NEW ||
- s->s_state == CEPH_MDS_SESSION_RESTARTING ||
- s->s_state == CEPH_MDS_SESSION_CLOSED ||
- s->s_state == CEPH_MDS_SESSION_REJECTED)
- /* this mds is failed or recovering, just wait */
+ break;
+ case CEPH_MDS_SESSION_CLOSING:
+ /* Should never reach this when we're unmounting */
+ WARN_ON_ONCE(true);
+ fallthrough;
+ case CEPH_MDS_SESSION_NEW:
+ case CEPH_MDS_SESSION_RESTARTING:
+ case CEPH_MDS_SESSION_CLOSED:
+ case CEPH_MDS_SESSION_REJECTED:
return false;
+ }
return true;
}
/*
+ * If the sequence is incremented while we're waiting on a REQUEST_CLOSE reply,
+ * then we need to retransmit that request.
+ */
+void inc_session_sequence(struct ceph_mds_session *s)
+{
+ lockdep_assert_held(&s->s_mutex);
+
+ s->s_seq++;
+
+ if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
+ int ret;
+
+ dout("resending session close request for mds%d\n", s->s_mds);
+ ret = request_close_session(s);
+ if (ret < 0)
+ pr_err("unable to close session to mds%d: %d\n",
+ s->s_mds, ret);
+ }
+}
+
+/*
* delayed work -- periodically trim expired leases, renew caps with mds
*/
static void schedule_delayed(struct ceph_mds_client *mdsc)
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index cbf8af437140..f5adbebcb38e 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -480,6 +480,7 @@ struct ceph_mds_client {
extern const char *ceph_mds_op_name(int op);
extern bool check_session_state(struct ceph_mds_session *s);
+void inc_session_sequence(struct ceph_mds_session *s);
extern struct ceph_mds_session *
__ceph_lookup_mds_session(struct ceph_mds_client *, int mds);
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 83cb4f26b689..9b785f11e95a 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -53,7 +53,7 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
/* increment msg sequence number */
mutex_lock(&session->s_mutex);
- session->s_seq++;
+ inc_session_sequence(session);
mutex_unlock(&session->s_mutex);
/* lookup inode */
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 0da39c16dab4..b611f829cb61 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -873,7 +873,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
ceph_snap_op_name(op), split, trace_len);
mutex_lock(&session->s_mutex);
- session->s_seq++;
+ inc_session_sequence(session);
mutex_unlock(&session->s_mutex);
down_write(&mdsc->snap_rwsem);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 5441c17562c5..d98a2e5dab9f 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1078,7 +1078,8 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
out_free:
kfree(gl->gl_lksb.sb_lvbptr);
kmem_cache_free(cachep, gl);
- atomic_dec(&sdp->sd_glock_disposal);
+ if (atomic_dec_and_test(&sdp->sd_glock_disposal))
+ wake_up(&sdp->sd_glock_wait);
out:
return ret;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index aa3f5236befb..6c1432d78dce 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -165,6 +165,31 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
}
/**
+ * gfs2_rgrp_metasync - sync out the metadata of a resource group
+ * @gl: the glock protecting the resource group
+ *
+ */
+
+static int gfs2_rgrp_metasync(struct gfs2_glock *gl)
+{
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+ struct address_space *metamapping = &sdp->sd_aspace;
+ struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
+ const unsigned bsize = sdp->sd_sb.sb_bsize;
+ loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK;
+ loff_t end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1;
+ int error;
+
+ filemap_fdatawrite_range(metamapping, start, end);
+ error = filemap_fdatawait_range(metamapping, start, end);
+ WARN_ON_ONCE(error && !gfs2_withdrawn(sdp));
+ mapping_set_error(metamapping, error);
+ if (error)
+ gfs2_io_error(sdp);
+ return error;
+}
+
+/**
* rgrp_go_sync - sync out the metadata for this glock
* @gl: the glock
*
@@ -176,11 +201,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
static int rgrp_go_sync(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- struct address_space *mapping = &sdp->sd_aspace;
struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
- const unsigned bsize = sdp->sd_sb.sb_bsize;
- loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK;
- loff_t end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1;
int error;
if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
@@ -189,10 +210,7 @@ static int rgrp_go_sync(struct gfs2_glock *gl)
gfs2_log_flush(sdp, gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
GFS2_LFC_RGRP_GO_SYNC);
- filemap_fdatawrite_range(mapping, start, end);
- error = filemap_fdatawait_range(mapping, start, end);
- WARN_ON_ONCE(error && !gfs2_withdrawn(sdp));
- mapping_set_error(mapping, error);
+ error = gfs2_rgrp_metasync(gl);
if (!error)
error = gfs2_ail_empty_gl(gl);
gfs2_free_clones(rgd);
@@ -266,7 +284,24 @@ static void gfs2_clear_glop_pending(struct gfs2_inode *ip)
}
/**
- * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
+ * gfs2_inode_metasync - sync out the metadata of an inode
+ * @gl: the glock protecting the inode
+ *
+ */
+int gfs2_inode_metasync(struct gfs2_glock *gl)
+{
+ struct address_space *metamapping = gfs2_glock2aspace(gl);
+ int error;
+
+ filemap_fdatawrite(metamapping);
+ error = filemap_fdatawait(metamapping);
+ if (error)
+ gfs2_io_error(gl->gl_name.ln_sbd);
+ return error;
+}
+
+/**
+ * inode_go_sync - Sync the dirty metadata of an inode
* @gl: the glock protecting the inode
*
*/
@@ -297,8 +332,7 @@ static int inode_go_sync(struct gfs2_glock *gl)
error = filemap_fdatawait(mapping);
mapping_set_error(mapping, error);
}
- ret = filemap_fdatawait(metamapping);
- mapping_set_error(metamapping, ret);
+ ret = gfs2_inode_metasync(gl);
if (!error)
error = ret;
gfs2_ail_empty_gl(gl);
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
index 2dd192e85618..695898afcaf1 100644
--- a/fs/gfs2/glops.h
+++ b/fs/gfs2/glops.h
@@ -22,6 +22,7 @@ extern const struct gfs2_glock_operations gfs2_quota_glops;
extern const struct gfs2_glock_operations gfs2_journal_glops;
extern const struct gfs2_glock_operations *gfs2_glops_list[];
+extern int gfs2_inode_metasync(struct gfs2_glock *gl);
extern void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync);
#endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 6774865f5b5b..077ccb1b3ccc 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -180,7 +180,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
if (unlikely(error))
goto fail;
- gfs2_cancel_delete_work(ip->i_iopen_gh.gh_gl);
+ if (blktype != GFS2_BLKST_UNLINKED)
+ gfs2_cancel_delete_work(ip->i_iopen_gh.gh_gl);
glock_set_object(ip->i_iopen_gh.gh_gl, ip);
gfs2_glock_put(io_gl);
io_gl = NULL;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index ed69298dd824..3922b26264f5 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -22,6 +22,7 @@
#include "incore.h"
#include "inode.h"
#include "glock.h"
+#include "glops.h"
#include "log.h"
#include "lops.h"
#include "meta_io.h"
@@ -817,41 +818,19 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, u32 start,
return error;
}
-/**
- * gfs2_meta_sync - Sync all buffers associated with a glock
- * @gl: The glock
- *
- */
-
-void gfs2_meta_sync(struct gfs2_glock *gl)
-{
- struct address_space *mapping = gfs2_glock2aspace(gl);
- struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- int error;
-
- if (mapping == NULL)
- mapping = &sdp->sd_aspace;
-
- filemap_fdatawrite(mapping);
- error = filemap_fdatawait(mapping);
-
- if (error)
- gfs2_io_error(gl->gl_name.ln_sbd);
-}
-
static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
if (error) {
- gfs2_meta_sync(ip->i_gl);
+ gfs2_inode_metasync(ip->i_gl);
return;
}
if (pass != 1)
return;
- gfs2_meta_sync(ip->i_gl);
+ gfs2_inode_metasync(ip->i_gl);
fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
@@ -1060,14 +1039,14 @@ static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
if (error) {
- gfs2_meta_sync(ip->i_gl);
+ gfs2_inode_metasync(ip->i_gl);
return;
}
if (pass != 1)
return;
/* data sync? */
- gfs2_meta_sync(ip->i_gl);
+ gfs2_inode_metasync(ip->i_gl);
fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks);
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 4a3d8aecdf82..fbdbb08dcec6 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -27,8 +27,6 @@ extern void gfs2_log_submit_bio(struct bio **biop, int opf);
extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head, bool keep_cache);
-extern void gfs2_meta_sync(struct gfs2_glock *gl);
-
static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
{
unsigned int limit;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 7a7e3c10a9a9..61fce59cb4d3 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -633,8 +633,10 @@ static int init_statfs(struct gfs2_sbd *sdp)
if (IS_ERR(sdp->sd_statfs_inode)) {
error = PTR_ERR(sdp->sd_statfs_inode);
fs_err(sdp, "can't read in statfs inode: %d\n", error);
- goto fail;
+ goto out;
}
+ if (sdp->sd_args.ar_spectator)
+ goto out;
pn = gfs2_lookup_simple(master, "per_node");
if (IS_ERR(pn)) {
@@ -682,15 +684,17 @@ free_local:
iput(pn);
put_statfs:
iput(sdp->sd_statfs_inode);
-fail:
+out:
return error;
}
/* Uninitialize and free up memory used by the list of statfs inodes */
static void uninit_statfs(struct gfs2_sbd *sdp)
{
- gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
- free_local_statfs_inodes(sdp);
+ if (!sdp->sd_args.ar_spectator) {
+ gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
+ free_local_statfs_inodes(sdp);
+ }
iput(sdp->sd_statfs_inode);
}
@@ -704,7 +708,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
if (undo) {
jindex = 0;
- goto fail_jinode_gh;
+ goto fail_statfs;
}
sdp->sd_jindex = gfs2_lookup_simple(master, "jindex");
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index b5cbe21efdfb..c26c68ebd29d 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -349,7 +349,7 @@ static int update_statfs_inode(struct gfs2_jdesc *jd,
mark_buffer_dirty(bh);
brelse(bh);
- gfs2_meta_sync(ip->i_gl);
+ gfs2_inode_metasync(ip->i_gl);
out:
return error;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index ee491bb9c1cc..92d799a193b8 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -719,9 +719,9 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
}
gfs2_free_clones(rgd);
+ return_all_reservations(rgd);
kfree(rgd->rd_bits);
rgd->rd_bits = NULL;
- return_all_reservations(rgd);
kmem_cache_free(gfs2_rgrpd_cachep, rgd);
}
}
@@ -1370,6 +1370,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
+ return -EROFS;
+
if (!blk_queue_discard(q))
return -EOPNOTSUPP;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index b285192bd6b3..b3d951ab8068 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -738,6 +738,7 @@ restart:
gfs2_jindex_free(sdp);
/* Take apart glock structures and buffer lists */
gfs2_gl_hash_clear(sdp);
+ truncate_inode_pages_final(&sdp->sd_aspace);
gfs2_delete_debugfs_file(sdp);
/* Unmount the locking protocol */
gfs2_lm_unmount(sdp);
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 02894df7656d..b53c055bea6a 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -482,6 +482,10 @@ static void io_impersonate_work(struct io_worker *worker,
current->files = work->identity->files;
current->nsproxy = work->identity->nsproxy;
task_unlock(current);
+ if (!work->identity->files) {
+ /* failed grabbing files, ensure work gets cancelled */
+ work->flags |= IO_WQ_WORK_CANCEL;
+ }
}
if ((work->flags & IO_WQ_WORK_FS) && current->fs != work->identity->fs)
current->fs = work->identity->fs;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index a7429c977eb3..8018c7076b25 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -995,20 +995,33 @@ static void io_sq_thread_drop_mm(void)
if (mm) {
kthread_unuse_mm(mm);
mmput(mm);
+ current->mm = NULL;
}
}
static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx)
{
- if (!current->mm) {
- if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL) ||
- !ctx->sqo_task->mm ||
- !mmget_not_zero(ctx->sqo_task->mm)))
- return -EFAULT;
- kthread_use_mm(ctx->sqo_task->mm);
+ struct mm_struct *mm;
+
+ if (current->mm)
+ return 0;
+
+ /* Should never happen */
+ if (unlikely(!(ctx->flags & IORING_SETUP_SQPOLL)))
+ return -EFAULT;
+
+ task_lock(ctx->sqo_task);
+ mm = ctx->sqo_task->mm;
+ if (unlikely(!mm || !mmget_not_zero(mm)))
+ mm = NULL;
+ task_unlock(ctx->sqo_task);
+
+ if (mm) {
+ kthread_use_mm(mm);
+ return 0;
}
- return 0;
+ return -EFAULT;
}
static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx,
@@ -1274,9 +1287,12 @@ static bool io_identity_cow(struct io_kiocb *req)
/* add one for this request */
refcount_inc(&id->count);
- /* drop old identity, assign new one. one ref for req, one for tctx */
- if (req->work.identity != tctx->identity &&
- refcount_sub_and_test(2, &req->work.identity->count))
+ /* drop tctx and req identity references, if needed */
+ if (tctx->identity != &tctx->__identity &&
+ refcount_dec_and_test(&tctx->identity->count))
+ kfree(tctx->identity);
+ if (req->work.identity != &tctx->__identity &&
+ refcount_dec_and_test(&req->work.identity->count))
kfree(req->work.identity);
req->work.identity = id;
@@ -1577,14 +1593,29 @@ static void io_cqring_mark_overflow(struct io_ring_ctx *ctx)
}
}
-static inline bool io_match_files(struct io_kiocb *req,
- struct files_struct *files)
+static inline bool __io_match_files(struct io_kiocb *req,
+ struct files_struct *files)
{
+ return ((req->flags & REQ_F_WORK_INITIALIZED) &&
+ (req->work.flags & IO_WQ_WORK_FILES)) &&
+ req->work.identity->files == files;
+}
+
+static bool io_match_files(struct io_kiocb *req,
+ struct files_struct *files)
+{
+ struct io_kiocb *link;
+
if (!files)
return true;
- if ((req->flags & REQ_F_WORK_INITIALIZED) &&
- (req->work.flags & IO_WQ_WORK_FILES))
- return req->work.identity->files == files;
+ if (__io_match_files(req, files))
+ return true;
+ if (req->flags & REQ_F_LINK_HEAD) {
+ list_for_each_entry(link, &req->link_list, link_list) {
+ if (__io_match_files(link, files))
+ return true;
+ }
+ }
return false;
}
@@ -1668,7 +1699,8 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
WRITE_ONCE(cqe->user_data, req->user_data);
WRITE_ONCE(cqe->res, res);
WRITE_ONCE(cqe->flags, cflags);
- } else if (ctx->cq_overflow_flushed || req->task->io_uring->in_idle) {
+ } else if (ctx->cq_overflow_flushed ||
+ atomic_read(&req->task->io_uring->in_idle)) {
/*
* If we're in ring overflow flush mode, or in task cancel mode,
* then we cannot store the request for later flushing, we need
@@ -1838,7 +1870,7 @@ static void __io_free_req(struct io_kiocb *req)
io_dismantle_req(req);
percpu_counter_dec(&tctx->inflight);
- if (tctx->in_idle)
+ if (atomic_read(&tctx->in_idle))
wake_up(&tctx->wait);
put_task_struct(req->task);
@@ -7695,7 +7727,8 @@ static int io_uring_alloc_task_context(struct task_struct *task)
xa_init(&tctx->xa);
init_waitqueue_head(&tctx->wait);
tctx->last = NULL;
- tctx->in_idle = 0;
+ atomic_set(&tctx->in_idle, 0);
+ tctx->sqpoll = false;
io_init_identity(&tctx->__identity);
tctx->identity = &tctx->__identity;
task->io_uring = tctx;
@@ -8388,22 +8421,6 @@ static bool io_match_link(struct io_kiocb *preq, struct io_kiocb *req)
return false;
}
-static bool io_match_link_files(struct io_kiocb *req,
- struct files_struct *files)
-{
- struct io_kiocb *link;
-
- if (io_match_files(req, files))
- return true;
- if (req->flags & REQ_F_LINK_HEAD) {
- list_for_each_entry(link, &req->link_list, link_list) {
- if (io_match_files(link, files))
- return true;
- }
- }
- return false;
-}
-
/*
* We're looking to cancel 'req' because it's holding on to our files, but
* 'req' could be a link to another request. See if it is, and cancel that
@@ -8453,7 +8470,21 @@ static bool io_timeout_remove_link(struct io_ring_ctx *ctx,
static bool io_cancel_link_cb(struct io_wq_work *work, void *data)
{
- return io_match_link(container_of(work, struct io_kiocb, work), data);
+ struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+ bool ret;
+
+ if (req->flags & REQ_F_LINK_TIMEOUT) {
+ unsigned long flags;
+ struct io_ring_ctx *ctx = req->ctx;
+
+ /* protect against races with linked timeouts */
+ spin_lock_irqsave(&ctx->completion_lock, flags);
+ ret = io_match_link(req, data);
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ } else {
+ ret = io_match_link(req, data);
+ }
+ return ret;
}
static void io_attempt_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req)
@@ -8479,6 +8510,7 @@ static void io_attempt_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req)
}
static void io_cancel_defer_files(struct io_ring_ctx *ctx,
+ struct task_struct *task,
struct files_struct *files)
{
struct io_defer_entry *de = NULL;
@@ -8486,7 +8518,8 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx,
spin_lock_irq(&ctx->completion_lock);
list_for_each_entry_reverse(de, &ctx->defer_list, list) {
- if (io_match_link_files(de->req, files)) {
+ if (io_task_match(de->req, task) &&
+ io_match_files(de->req, files)) {
list_cut_position(&list, &ctx->defer_list, &de->list);
break;
}
@@ -8512,7 +8545,6 @@ static bool io_uring_cancel_files(struct io_ring_ctx *ctx,
if (list_empty_careful(&ctx->inflight_list))
return false;
- io_cancel_defer_files(ctx, files);
/* cancel all at once, should be faster than doing it one by one*/
io_wq_cancel_cb(ctx->io_wq, io_wq_files_match, files, true);
@@ -8598,8 +8630,16 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
{
struct task_struct *task = current;
- if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data)
+ if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
task = ctx->sq_data->thread;
+ atomic_inc(&task->io_uring->in_idle);
+ io_sq_thread_park(ctx->sq_data);
+ }
+
+ if (files)
+ io_cancel_defer_files(ctx, NULL, files);
+ else
+ io_cancel_defer_files(ctx, task, NULL);
io_cqring_overflow_flush(ctx, true, task, files);
@@ -8607,12 +8647,23 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
io_run_task_work();
cond_resched();
}
+
+ if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
+ atomic_dec(&task->io_uring->in_idle);
+ /*
+ * If the files that are going away are the ones in the thread
+ * identity, clear them out.
+ */
+ if (task->io_uring->identity->files == files)
+ task->io_uring->identity->files = NULL;
+ io_sq_thread_unpark(ctx->sq_data);
+ }
}
/*
* Note that this task has used io_uring. We use it for cancelation purposes.
*/
-static int io_uring_add_task_file(struct file *file)
+static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file)
{
struct io_uring_task *tctx = current->io_uring;
@@ -8634,6 +8685,14 @@ static int io_uring_add_task_file(struct file *file)
tctx->last = file;
}
+ /*
+ * This is race safe in that the task itself is doing this, hence it
+ * cannot be going through the exit/cancel paths at the same time.
+ * This cannot be modified while exit/cancel is running.
+ */
+ if (!tctx->sqpoll && (ctx->flags & IORING_SETUP_SQPOLL))
+ tctx->sqpoll = true;
+
return 0;
}
@@ -8675,7 +8734,7 @@ void __io_uring_files_cancel(struct files_struct *files)
unsigned long index;
/* make sure overflow events are dropped */
- tctx->in_idle = true;
+ atomic_inc(&tctx->in_idle);
xa_for_each(&tctx->xa, index, file) {
struct io_ring_ctx *ctx = file->private_data;
@@ -8684,6 +8743,35 @@ void __io_uring_files_cancel(struct files_struct *files)
if (files)
io_uring_del_task_file(file);
}
+
+ atomic_dec(&tctx->in_idle);
+}
+
+static s64 tctx_inflight(struct io_uring_task *tctx)
+{
+ unsigned long index;
+ struct file *file;
+ s64 inflight;
+
+ inflight = percpu_counter_sum(&tctx->inflight);
+ if (!tctx->sqpoll)
+ return inflight;
+
+ /*
+ * If we have SQPOLL rings, then we need to iterate and find them, and
+ * add the pending count for those.
+ */
+ xa_for_each(&tctx->xa, index, file) {
+ struct io_ring_ctx *ctx = file->private_data;
+
+ if (ctx->flags & IORING_SETUP_SQPOLL) {
+ struct io_uring_task *__tctx = ctx->sqo_task->io_uring;
+
+ inflight += percpu_counter_sum(&__tctx->inflight);
+ }
+ }
+
+ return inflight;
}
/*
@@ -8697,11 +8785,11 @@ void __io_uring_task_cancel(void)
s64 inflight;
/* make sure overflow events are dropped */
- tctx->in_idle = true;
+ atomic_inc(&tctx->in_idle);
do {
/* read completions before cancelations */
- inflight = percpu_counter_sum(&tctx->inflight);
+ inflight = tctx_inflight(tctx);
if (!inflight)
break;
__io_uring_files_cancel(NULL);
@@ -8712,13 +8800,13 @@ void __io_uring_task_cancel(void)
* If we've seen completions, retry. This avoids a race where
* a completion comes in before we did prepare_to_wait().
*/
- if (inflight != percpu_counter_sum(&tctx->inflight))
+ if (inflight != tctx_inflight(tctx))
continue;
schedule();
} while (1);
finish_wait(&tctx->wait, &wait);
- tctx->in_idle = false;
+ atomic_dec(&tctx->in_idle);
}
static int io_uring_flush(struct file *file, void *data)
@@ -8863,7 +8951,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
io_sqpoll_wait_sq(ctx);
submitted = to_submit;
} else if (to_submit) {
- ret = io_uring_add_task_file(f.file);
+ ret = io_uring_add_task_file(ctx, f.file);
if (unlikely(ret))
goto out;
mutex_lock(&ctx->uring_lock);
@@ -8900,7 +8988,8 @@ out_fput:
#ifdef CONFIG_PROC_FS
static int io_uring_show_cred(int id, void *p, void *data)
{
- const struct cred *cred = p;
+ struct io_identity *iod = p;
+ const struct cred *cred = iod->creds;
struct seq_file *m = data;
struct user_namespace *uns = seq_user_ns(m);
struct group_info *gi;
@@ -9092,7 +9181,7 @@ err_fd:
#if defined(CONFIG_UNIX)
ctx->ring_sock->file = file;
#endif
- if (unlikely(io_uring_add_task_file(file))) {
+ if (unlikely(io_uring_add_task_file(ctx, file))) {
file = ERR_PTR(-ENOMEM);
goto err_fd;
}
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 8180061b9e16..10cc7979ce38 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1374,6 +1374,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
WARN_ON_ONCE(!wpc->ioend && !list_empty(&submit_list));
WARN_ON_ONCE(!PageLocked(page));
WARN_ON_ONCE(PageWriteback(page));
+ WARN_ON_ONCE(PageDirty(page));
/*
* We cannot cancel the ioend directly here on error. We may have
@@ -1382,33 +1383,22 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
* appropriately.
*/
if (unlikely(error)) {
+ /*
+ * Let the filesystem know what portion of the current page
+ * failed to map. If the page wasn't been added to ioend, it
+ * won't be affected by I/O completion and we must unlock it
+ * now.
+ */
+ if (wpc->ops->discard_page)
+ wpc->ops->discard_page(page, file_offset);
if (!count) {
- /*
- * If the current page hasn't been added to ioend, it
- * won't be affected by I/O completions and we must
- * discard and unlock it right here.
- */
- if (wpc->ops->discard_page)
- wpc->ops->discard_page(page);
ClearPageUptodate(page);
unlock_page(page);
goto done;
}
-
- /*
- * If the page was not fully cleaned, we need to ensure that the
- * higher layers come back to it correctly. That means we need
- * to keep the page dirty, and for WB_SYNC_ALL writeback we need
- * to ensure the PAGECACHE_TAG_TOWRITE index mark is not removed
- * so another attempt to write this page in this writeback sweep
- * will be made.
- */
- set_page_writeback_keepwrite(page);
- } else {
- clear_page_dirty_for_io(page);
- set_page_writeback(page);
}
+ set_page_writeback(page);
unlock_page(page);
/*
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 0f707003dda5..b362523a9829 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1049,6 +1049,8 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
OOM_SCORE_ADJ_MAX;
put_task_struct(task);
+ if (oom_adj > OOM_ADJUST_MAX)
+ oom_adj = OOM_ADJUST_MAX;
len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
return simple_read_from_buffer(buf, count, ppos, buffer, len);
}
diff --git a/fs/proc/cpuinfo.c b/fs/proc/cpuinfo.c
index d0989a443c77..419760fd77bd 100644
--- a/fs/proc/cpuinfo.c
+++ b/fs/proc/cpuinfo.c
@@ -19,7 +19,7 @@ static int cpuinfo_open(struct inode *inode, struct file *file)
static const struct proc_ops cpuinfo_proc_ops = {
.proc_flags = PROC_ENTRY_PERMANENT,
.proc_open = cpuinfo_open,
- .proc_read = seq_read,
+ .proc_read_iter = seq_read_iter,
.proc_lseek = seq_lseek,
.proc_release = seq_release,
};
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 2f9fa179194d..b84663252add 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -590,7 +590,7 @@ static int proc_seq_release(struct inode *inode, struct file *file)
static const struct proc_ops proc_seq_ops = {
/* not permanent -- can call into arbitrary seq_operations */
.proc_open = proc_seq_open,
- .proc_read = seq_read,
+ .proc_read_iter = seq_read_iter,
.proc_lseek = seq_lseek,
.proc_release = proc_seq_release,
};
@@ -621,7 +621,7 @@ static int proc_single_open(struct inode *inode, struct file *file)
static const struct proc_ops proc_single_ops = {
/* not permanent -- can call into arbitrary ->single_show */
.proc_open = proc_single_open,
- .proc_read = seq_read,
+ .proc_read_iter = seq_read_iter,
.proc_lseek = seq_lseek,
.proc_release = single_release,
};
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 58c075e2a452..bde6b6f69852 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -597,6 +597,7 @@ static const struct file_operations proc_iter_file_ops = {
.llseek = proc_reg_llseek,
.read_iter = proc_reg_read_iter,
.write = proc_reg_write,
+ .splice_read = generic_file_splice_read,
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
.mmap = proc_reg_mmap,
@@ -622,6 +623,7 @@ static const struct file_operations proc_reg_file_ops_compat = {
static const struct file_operations proc_iter_file_ops_compat = {
.llseek = proc_reg_llseek,
.read_iter = proc_reg_read_iter,
+ .splice_read = generic_file_splice_read,
.write = proc_reg_write,
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 46b3293015fe..4695b6de3151 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -226,7 +226,7 @@ static int stat_open(struct inode *inode, struct file *file)
static const struct proc_ops stat_proc_ops = {
.proc_flags = PROC_ENTRY_PERMANENT,
.proc_open = stat_open,
- .proc_read = seq_read,
+ .proc_read_iter = seq_read_iter,
.proc_lseek = seq_lseek,
.proc_release = single_release,
};
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 31219c1db17d..3b20e21604e7 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -18,6 +18,7 @@
#include <linux/mm.h>
#include <linux/printk.h>
#include <linux/string_helpers.h>
+#include <linux/uio.h>
#include <linux/uaccess.h>
#include <asm/page.h>
@@ -146,7 +147,28 @@ Eoverflow:
*/
ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
{
- struct seq_file *m = file->private_data;
+ struct iovec iov = { .iov_base = buf, .iov_len = size};
+ struct kiocb kiocb;
+ struct iov_iter iter;
+ ssize_t ret;
+
+ init_sync_kiocb(&kiocb, file);
+ iov_iter_init(&iter, READ, &iov, 1, size);
+
+ kiocb.ki_pos = *ppos;
+ ret = seq_read_iter(&kiocb, &iter);
+ *ppos = kiocb.ki_pos;
+ return ret;
+}
+EXPORT_SYMBOL(seq_read);
+
+/*
+ * Ready-made ->f_op->read_iter()
+ */
+ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct seq_file *m = iocb->ki_filp->private_data;
+ size_t size = iov_iter_count(iter);
size_t copied = 0;
size_t n;
void *p;
@@ -158,14 +180,14 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
* if request is to read from zero offset, reset iterator to first
* record as it might have been already advanced by previous requests
*/
- if (*ppos == 0) {
+ if (iocb->ki_pos == 0) {
m->index = 0;
m->count = 0;
}
- /* Don't assume *ppos is where we left it */
- if (unlikely(*ppos != m->read_pos)) {
- while ((err = traverse(m, *ppos)) == -EAGAIN)
+ /* Don't assume ki_pos is where we left it */
+ if (unlikely(iocb->ki_pos != m->read_pos)) {
+ while ((err = traverse(m, iocb->ki_pos)) == -EAGAIN)
;
if (err) {
/* With prejudice... */
@@ -174,7 +196,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
m->count = 0;
goto Done;
} else {
- m->read_pos = *ppos;
+ m->read_pos = iocb->ki_pos;
}
}
@@ -187,13 +209,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
/* if not empty - flush it first */
if (m->count) {
n = min(m->count, size);
- err = copy_to_user(buf, m->buf + m->from, n);
- if (err)
+ if (copy_to_iter(m->buf + m->from, n, iter) != n)
goto Efault;
m->count -= n;
m->from += n;
size -= n;
- buf += n;
copied += n;
if (!size)
goto Done;
@@ -254,8 +274,7 @@ Fill:
}
m->op->stop(m, p);
n = min(m->count, size);
- err = copy_to_user(buf, m->buf, n);
- if (err)
+ if (copy_to_iter(m->buf, n, iter) != n)
goto Efault;
copied += n;
m->count -= n;
@@ -264,7 +283,7 @@ Done:
if (!copied)
copied = err;
else {
- *ppos += copied;
+ iocb->ki_pos += copied;
m->read_pos += copied;
}
mutex_unlock(&m->lock);
@@ -276,7 +295,7 @@ Efault:
err = -EFAULT;
goto Done;
}
-EXPORT_SYMBOL(seq_read);
+EXPORT_SYMBOL(seq_read_iter);
/**
* seq_lseek - ->llseek() method for sequential files.
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 852b536551b5..15640015be9d 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2467,6 +2467,7 @@ xfs_defer_agfl_block(
new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
new->xefi_blockcount = 1;
new->xefi_oinfo = *oinfo;
+ new->xefi_skip_discard = false;
trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index e1bd484e5548..6747e97a7949 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -52,9 +52,9 @@ struct xfs_extent_free_item
{
xfs_fsblock_t xefi_startblock;/* starting fs block number */
xfs_extlen_t xefi_blockcount;/* number of blocks in extent */
+ bool xefi_skip_discard;
struct list_head xefi_list;
struct xfs_owner_info xefi_oinfo; /* extent owner */
- bool xefi_skip_discard;
};
#define XFS_BMAP_MAX_NMAP 4
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 3aa85b64de36..bb25ff1b770d 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -121,8 +121,7 @@ xchk_inode_flags(
goto bad;
/* rt flags require rt device */
- if ((flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) &&
- !mp->m_rtdev_targp)
+ if ((flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)
goto bad;
/* new rt bitmap flag only valid for rbmino */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 55d126d4e096..4304c6416fbb 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -346,8 +346,8 @@ xfs_map_blocks(
ssize_t count = i_blocksize(inode);
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count);
- xfs_fileoff_t cow_fsb = NULLFILEOFF;
- int whichfork = XFS_DATA_FORK;
+ xfs_fileoff_t cow_fsb;
+ int whichfork;
struct xfs_bmbt_irec imap;
struct xfs_iext_cursor icur;
int retries = 0;
@@ -381,6 +381,8 @@ xfs_map_blocks(
* landed in a hole and we skip the block.
*/
retry:
+ cow_fsb = NULLFILEOFF;
+ whichfork = XFS_DATA_FORK;
xfs_ilock(ip, XFS_ILOCK_SHARED);
ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE ||
(ip->i_df.if_flags & XFS_IFEXTENTS));
@@ -527,13 +529,15 @@ xfs_prepare_ioend(
*/
static void
xfs_discard_page(
- struct page *page)
+ struct page *page,
+ loff_t fileoff)
{
struct inode *inode = page->mapping->host;
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
- loff_t offset = page_offset(page);
- xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, offset);
+ unsigned int pageoff = offset_in_page(fileoff);
+ xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, fileoff);
+ xfs_fileoff_t pageoff_fsb = XFS_B_TO_FSBT(mp, pageoff);
int error;
if (XFS_FORCED_SHUTDOWN(mp))
@@ -541,14 +545,14 @@ xfs_discard_page(
xfs_alert_ratelimited(mp,
"page discard on page "PTR_FMT", inode 0x%llx, offset %llu.",
- page, ip->i_ino, offset);
+ page, ip->i_ino, fileoff);
error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
- i_blocks_per_page(inode, page));
+ i_blocks_per_page(inode, page) - pageoff_fsb);
if (error && !XFS_FORCED_SHUTDOWN(mp))
xfs_alert(mp, "page discard unable to remove delalloc mapping.");
out_invalidate:
- iomap_invalidatepage(page, 0, PAGE_SIZE);
+ iomap_invalidatepage(page, pageoff, PAGE_SIZE - pageoff);
}
static const struct iomap_writeback_ops xfs_writeback_ops = {
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 5e165456da68..1414ab79eacf 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -911,6 +911,16 @@ xfs_setattr_size(
error = iomap_zero_range(inode, oldsize, newsize - oldsize,
&did_zeroing, &xfs_buffered_write_iomap_ops);
} else {
+ /*
+ * iomap won't detect a dirty page over an unwritten block (or a
+ * cow block over a hole) and subsequently skips zeroing the
+ * newly post-EOF portion of the page. Flush the new EOF to
+ * convert the block before the pagecache truncate.
+ */
+ error = filemap_write_and_wait_range(inode->i_mapping, newsize,
+ newsize);
+ if (error)
+ return error;
error = iomap_truncate_page(inode, newsize, &did_zeroing,
&xfs_buffered_write_iomap_ops);
}
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 16098dc42add..6fa05fb78189 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1502,7 +1502,8 @@ xfs_reflink_unshare(
&xfs_buffered_write_iomap_ops);
if (error)
goto out;
- error = filemap_write_and_wait(inode->i_mapping);
+
+ error = filemap_write_and_wait_range(inode->i_mapping, offset, len);
if (error)
goto out;