summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig22
-rw-r--r--fs/cifs/cifs_debug.c11
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifsglob.h124
-rw-r--r--fs/cifs/cifsproto.h9
-rw-r--r--fs/cifs/cifssmb.c5
-rw-r--r--fs/cifs/connect.c107
-rw-r--r--fs/cifs/dfs_cache.c6
-rw-r--r--fs/cifs/file.c20
-rw-r--r--fs/cifs/fs_context.c33
-rw-r--r--fs/cifs/fs_context.h4
-rw-r--r--fs/cifs/misc.c11
-rw-r--r--fs/cifs/readdir.c179
-rw-r--r--fs/cifs/sess.c33
-rw-r--r--fs/cifs/smb2inode.c7
-rw-r--r--fs/cifs/smb2misc.c12
-rw-r--r--fs/cifs/smb2ops.c32
-rw-r--r--fs/cifs/smb2pdu.c5
-rw-r--r--fs/cifs/smb2pdu.h22
-rw-r--r--fs/cifs/smb2transport.c7
-rw-r--r--fs/cifs/smbdirect.c4
-rw-r--r--fs/cifs/trace.h2
-rw-r--r--fs/cifs/transport.c8
-rw-r--r--fs/dax.c120
-rw-r--r--fs/exec.c6
-rw-r--r--fs/fat/fat.h14
-rw-r--r--fs/fat/fatent.c7
-rw-r--r--fs/fat/file.c14
-rw-r--r--fs/fat/inode.c19
-rw-r--r--fs/fat/misc.c78
-rw-r--r--fs/fat/namei_vfat.c4
-rw-r--r--fs/fcntl.c9
-rw-r--r--fs/fuse/dax.c4
-rw-r--r--fs/fuse/virtio_fs.c6
-rw-r--r--fs/hugetlbfs/inode.c42
-rw-r--r--fs/jfs/Makefile2
-rw-r--r--fs/jfs/inode.c18
-rw-r--r--fs/jfs/jfs_dmap.c71
-rw-r--r--fs/jfs/jfs_dtree.c298
-rw-r--r--fs/jfs/jfs_extent.c255
-rw-r--r--fs/jfs/jfs_logmgr.c8
-rw-r--r--fs/jfs/jfs_mount.c4
-rw-r--r--fs/jfs/jfs_txnmgr.c34
-rw-r--r--fs/jfs/jfs_xtree.c961
-rw-r--r--fs/jfs/jfs_xtree.h4
-rw-r--r--fs/locks.c61
-rw-r--r--fs/namei.c8
-rw-r--r--fs/nfs/direct.c23
-rw-r--r--fs/nfs/file.c18
-rw-r--r--fs/nfsd/filecache.c54
-rw-r--r--fs/nfsd/filecache.h2
-rw-r--r--fs/nfsd/nfs3proc.c141
-rw-r--r--fs/nfsd/nfs4proc.c264
-rw-r--r--fs/nfsd/nfs4state.c353
-rw-r--r--fs/nfsd/nfs4xdr.c2
-rw-r--r--fs/nfsd/nfscache.c2
-rw-r--r--fs/nfsd/nfsctl.c20
-rw-r--r--fs/nfsd/nfsd.h5
-rw-r--r--fs/nfsd/state.h31
-rw-r--r--fs/nfsd/trace.h34
-rw-r--r--fs/nfsd/vfs.c255
-rw-r--r--fs/nfsd/vfs.h14
-rw-r--r--fs/nfsd/xdr4.h1
-rw-r--r--fs/ntfs/file.c4
-rw-r--r--fs/ntfs3/super.c10
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c12
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c21
-rw-r--r--fs/ocfs2/dlmfs/userdlm.c17
-rw-r--r--fs/ocfs2/inode.c4
-rw-r--r--fs/ocfs2/journal.c33
-rw-r--r--fs/ocfs2/journal.h2
-rw-r--r--fs/ocfs2/quota_local.c10
-rw-r--r--fs/ocfs2/reservations.c4
-rw-r--r--fs/ocfs2/reservations.h9
-rw-r--r--fs/ocfs2/super.c180
-rw-r--r--fs/open.c51
-rw-r--r--fs/overlayfs/file.c13
-rw-r--r--fs/pipe.c33
-rw-r--r--fs/proc/base.c22
-rw-r--r--fs/proc/generic.c3
-rw-r--r--fs/proc/kcore.c14
-rw-r--r--fs/proc/meminfo.c7
-rw-r--r--fs/proc/proc_net.c3
-rw-r--r--fs/proc/proc_sysctl.c87
-rw-r--r--fs/proc/task_mmu.c9
-rw-r--r--fs/proc/vmcore.c130
-rw-r--r--fs/smbfs_common/smb2pdu.h108
-rw-r--r--fs/smbfs_common/smbfsctl.h6
-rw-r--r--fs/sysv/super.c4
-rw-r--r--fs/userfaultfd.c32
90 files changed, 2057 insertions, 2702 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 30b751c7f11a..5976eb33535f 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -245,19 +245,27 @@ config HUGETLBFS
config HUGETLB_PAGE
def_bool HUGETLBFS
-config HUGETLB_PAGE_FREE_VMEMMAP
+#
+# Select this config option from the architecture Kconfig, if it is preferred
+# to enable the feature of minimizing overhead of struct page associated with
+# each HugeTLB page.
+#
+config ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
+ bool
+
+config HUGETLB_PAGE_OPTIMIZE_VMEMMAP
def_bool HUGETLB_PAGE
- depends on X86_64
+ depends on ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
depends on SPARSEMEM_VMEMMAP
-config HUGETLB_PAGE_FREE_VMEMMAP_DEFAULT_ON
- bool "Default freeing vmemmap pages of HugeTLB to on"
+config HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON
+ bool "Default optimizing vmemmap pages of HugeTLB to on"
default n
- depends on HUGETLB_PAGE_FREE_VMEMMAP
+ depends on HUGETLB_PAGE_OPTIMIZE_VMEMMAP
help
- When using HUGETLB_PAGE_FREE_VMEMMAP, the freeing unused vmemmap
+ When using HUGETLB_PAGE_OPTIMIZE_VMEMMAP, the optimizing unused vmemmap
pages associated with each HugeTLB page is default off. Say Y here
- to enable freeing vmemmap pages of HugeTLB by default. It can then
+ to enable optimizing vmemmap pages of HugeTLB by default. It can then
be disabled on the command line via hugetlb_free_vmemmap=off.
config MEMFD_CREATE
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 9d334816eac0..1dd995efd5b8 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -116,7 +116,8 @@ static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon)
tcon->ses->server->ops->dump_share_caps(m, tcon);
if (tcon->use_witness)
seq_puts(m, " Witness");
-
+ if (tcon->broken_sparse_sup)
+ seq_puts(m, " nosparse");
if (tcon->need_reconnect)
seq_puts(m, "\tDISCONNECTED ");
seq_putc(m, '\n');
@@ -386,7 +387,7 @@ skip_rdma:
(ses->serverNOS == NULL)) {
seq_printf(m, "\n\t%d) Address: %s Uses: %d Capability: 0x%x\tSession Status: %d ",
i, ses->ip_addr, ses->ses_count,
- ses->capabilities, ses->status);
+ ses->capabilities, ses->ses_status);
if (ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
seq_printf(m, "Guest ");
else if (ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
@@ -398,7 +399,7 @@ skip_rdma:
"\n\tSMB session status: %d ",
i, ses->ip_addr, ses->serverDomain,
ses->ses_count, ses->serverOS, ses->serverNOS,
- ses->capabilities, ses->status);
+ ses->capabilities, ses->ses_status);
}
seq_printf(m, "\n\tSecurity type: %s ",
@@ -418,6 +419,8 @@ skip_rdma:
spin_lock(&ses->chan_lock);
if (CIFS_CHAN_NEEDS_RECONNECT(ses, 0))
seq_puts(m, "\tPrimary channel: DISCONNECTED ");
+ if (CIFS_CHAN_IN_RECONNECT(ses, 0))
+ seq_puts(m, "\t[RECONNECTING] ");
if (ses->chan_count > 1) {
seq_printf(m, "\n\n\tExtra Channels: %zu ",
@@ -426,6 +429,8 @@ skip_rdma:
cifs_dump_channel(m, j, &ses->chans[j]);
if (CIFS_CHAN_NEEDS_RECONNECT(ses, j))
seq_puts(m, "\tDISCONNECTED ");
+ if (CIFS_CHAN_IN_RECONNECT(ses, j))
+ seq_puts(m, "\t[RECONNECTING] ");
}
}
spin_unlock(&ses->chan_lock);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 2b1a1c029c75..f539a39d47f5 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -582,6 +582,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_puts(s, ",nocase");
if (tcon->nodelete)
seq_puts(s, ",nodelete");
+ if (cifs_sb->ctx->no_sparse)
+ seq_puts(s, ",nosparse");
if (tcon->local_lease)
seq_puts(s, ",locallease");
if (tcon->retry)
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 8de977c359b1..68da230c7f11 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -106,7 +106,7 @@
* CIFS vfs client Status information (based on what we know.)
*/
-/* associated with each tcp and smb session */
+/* associated with each connection */
enum statusEnum {
CifsNew = 0,
CifsGood,
@@ -114,8 +114,15 @@ enum statusEnum {
CifsNeedReconnect,
CifsNeedNegotiate,
CifsInNegotiate,
- CifsNeedSessSetup,
- CifsInSessSetup,
+};
+
+/* associated with each smb session */
+enum ses_status_enum {
+ SES_NEW = 0,
+ SES_GOOD,
+ SES_EXITING,
+ SES_NEED_RECON,
+ SES_IN_SETUP
};
/* associated with each tree connection to the server */
@@ -915,6 +922,7 @@ struct cifs_server_iface {
};
struct cifs_chan {
+ unsigned int in_reconnect : 1; /* if session setup in progress for this channel */
struct TCP_Server_Info *server;
__u8 signkey[SMB3_SIGN_KEY_SIZE];
};
@@ -930,7 +938,7 @@ struct cifs_ses {
struct mutex session_mutex;
struct TCP_Server_Info *server; /* pointer to server info */
int ses_count; /* reference counter */
- enum statusEnum status; /* updates protected by cifs_tcp_ses_lock */
+ enum ses_status_enum ses_status; /* updates protected by cifs_tcp_ses_lock */
unsigned overrideSecFlg; /* if non-zero override global sec flags */
char *serverOS; /* name of operating system underlying server */
char *serverNOS; /* name of network operating system of server */
@@ -944,7 +952,7 @@ struct cifs_ses {
and after mount option parsing we fill it */
char *domainName;
char *password;
- char *workstation_name;
+ char workstation_name[CIFS_MAX_WORKSTATION_LEN];
struct session_key auth_key;
struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */
enum securityEnum sectype; /* what security flavor was specified? */
@@ -977,12 +985,16 @@ struct cifs_ses {
#define CIFS_MAX_CHANNELS 16
#define CIFS_ALL_CHANNELS_SET(ses) \
((1UL << (ses)->chan_count) - 1)
+#define CIFS_ALL_CHANS_GOOD(ses) \
+ (!(ses)->chans_need_reconnect)
#define CIFS_ALL_CHANS_NEED_RECONNECT(ses) \
((ses)->chans_need_reconnect == CIFS_ALL_CHANNELS_SET(ses))
#define CIFS_SET_ALL_CHANS_NEED_RECONNECT(ses) \
((ses)->chans_need_reconnect = CIFS_ALL_CHANNELS_SET(ses))
#define CIFS_CHAN_NEEDS_RECONNECT(ses, index) \
test_bit((index), &(ses)->chans_need_reconnect)
+#define CIFS_CHAN_IN_RECONNECT(ses, index) \
+ ((ses)->chans[(index)].in_reconnect)
struct cifs_chan chans[CIFS_MAX_CHANNELS];
size_t chan_count;
@@ -1009,6 +1021,58 @@ cap_unix(struct cifs_ses *ses)
return ses->server->vals->cap_unix & ses->capabilities;
}
+/*
+ * common struct for holding inode info when searching for or updating an
+ * inode with new info
+ */
+
+#define CIFS_FATTR_DFS_REFERRAL 0x1
+#define CIFS_FATTR_DELETE_PENDING 0x2
+#define CIFS_FATTR_NEED_REVAL 0x4
+#define CIFS_FATTR_INO_COLLISION 0x8
+#define CIFS_FATTR_UNKNOWN_NLINK 0x10
+#define CIFS_FATTR_FAKE_ROOT_INO 0x20
+
+struct cifs_fattr {
+ u32 cf_flags;
+ u32 cf_cifsattrs;
+ u64 cf_uniqueid;
+ u64 cf_eof;
+ u64 cf_bytes;
+ u64 cf_createtime;
+ kuid_t cf_uid;
+ kgid_t cf_gid;
+ umode_t cf_mode;
+ dev_t cf_rdev;
+ unsigned int cf_nlink;
+ unsigned int cf_dtype;
+ struct timespec64 cf_atime;
+ struct timespec64 cf_mtime;
+ struct timespec64 cf_ctime;
+ u32 cf_cifstag;
+};
+
+struct cached_dirent {
+ struct list_head entry;
+ char *name;
+ int namelen;
+ loff_t pos;
+
+ struct cifs_fattr fattr;
+};
+
+struct cached_dirents {
+ bool is_valid:1;
+ bool is_failed:1;
+ struct dir_context *ctx; /*
+ * Only used to make sure we only take entries
+ * from a single context. Never dereferenced.
+ */
+ struct mutex de_mutex;
+ int pos; /* Expected ctx->pos */
+ struct list_head entries;
+};
+
struct cached_fid {
bool is_valid:1; /* Do we have a useable root fid */
bool file_all_info_is_valid:1;
@@ -1021,6 +1085,7 @@ struct cached_fid {
struct dentry *dentry;
struct work_struct lease_break;
struct smb2_file_all_info file_all_info;
+ struct cached_dirents dirents;
};
/*
@@ -1641,37 +1706,6 @@ struct file_list {
struct cifsFileInfo *cfile;
};
-/*
- * common struct for holding inode info when searching for or updating an
- * inode with new info
- */
-
-#define CIFS_FATTR_DFS_REFERRAL 0x1
-#define CIFS_FATTR_DELETE_PENDING 0x2
-#define CIFS_FATTR_NEED_REVAL 0x4
-#define CIFS_FATTR_INO_COLLISION 0x8
-#define CIFS_FATTR_UNKNOWN_NLINK 0x10
-#define CIFS_FATTR_FAKE_ROOT_INO 0x20
-
-struct cifs_fattr {
- u32 cf_flags;
- u32 cf_cifsattrs;
- u64 cf_uniqueid;
- u64 cf_eof;
- u64 cf_bytes;
- u64 cf_createtime;
- kuid_t cf_uid;
- kgid_t cf_gid;
- umode_t cf_mode;
- dev_t cf_rdev;
- unsigned int cf_nlink;
- unsigned int cf_dtype;
- struct timespec64 cf_atime;
- struct timespec64 cf_mtime;
- struct timespec64 cf_ctime;
- u32 cf_cifstag;
-};
-
static inline void free_dfs_info_param(struct dfs_info3_param *param)
{
if (param) {
@@ -1979,4 +2013,22 @@ static inline bool cifs_is_referral_server(struct cifs_tcon *tcon,
return is_tcon_dfs(tcon) || (ref && (ref->flags & DFSREF_REFERRAL_SERVER));
}
+static inline u64 cifs_flock_len(struct file_lock *fl)
+{
+ return fl->fl_end == OFFSET_MAX ? 0 : fl->fl_end - fl->fl_start + 1;
+}
+
+static inline size_t ntlmssp_workstation_name_size(const struct cifs_ses *ses)
+{
+ if (WARN_ON_ONCE(!ses || !ses->server))
+ return 0;
+ /*
+ * Make workstation name no more than 15 chars when using insecure dialects as some legacy
+ * servers do require it during NTLMSSP.
+ */
+ if (ses->server->dialect <= SMB20_PROT_ID)
+ return min_t(size_t, sizeof(ses->workstation_name), RFC1001_NAME_LEN_WITH_NULL);
+ return sizeof(ses->workstation_name);
+}
+
#endif /* _CIFS_GLOB_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 0df3b24a0bf4..3b7366ec03c7 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -619,6 +619,15 @@ unsigned int
cifs_ses_get_chan_index(struct cifs_ses *ses,
struct TCP_Server_Info *server);
void
+cifs_chan_set_in_reconnect(struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
+void
+cifs_chan_clear_in_reconnect(struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
+bool
+cifs_chan_in_reconnect(struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
+void
cifs_chan_set_need_reconnect(struct cifs_ses *ses,
struct TCP_Server_Info *server);
void
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 47e927c4ff8d..6371b9eebdad 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -75,7 +75,7 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
/* only send once per connect */
spin_lock(&cifs_tcp_ses_lock);
- if ((tcon->ses->status != CifsGood) || (tcon->status != TID_NEED_RECON)) {
+ if ((tcon->ses->ses_status != SES_GOOD) || (tcon->status != TID_NEED_RECON)) {
spin_unlock(&cifs_tcp_ses_lock);
return;
}
@@ -2558,7 +2558,8 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon,
pLockData->fl_start = le64_to_cpu(parm_data->start);
pLockData->fl_end = pLockData->fl_start +
- le64_to_cpu(parm_data->length) - 1;
+ (le64_to_cpu(parm_data->length) ?
+ le64_to_cpu(parm_data->length) - 1 : 0);
pLockData->fl_pid = -le32_to_cpu(parm_data->pid);
}
}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 42e14f408856..53373a3649e1 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -241,7 +241,7 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses))
goto next_session;
- ses->status = CifsNeedReconnect;
+ ses->ses_status = SES_NEED_RECON;
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
tcon->need_reconnect = true;
@@ -1789,7 +1789,7 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx)
goto out;
}
- cifs_dbg(FYI, "IPC tcon rc = %d ipc tid = %d\n", rc, tcon->tid);
+ cifs_dbg(FYI, "IPC tcon rc=%d ipc tid=0x%x\n", rc, tcon->tid);
ses->tcon_ipc = tcon;
out:
@@ -1828,7 +1828,7 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
- if (ses->status == CifsExiting)
+ if (ses->ses_status == SES_EXITING)
continue;
if (!match_session(ses, ctx))
continue;
@@ -1848,7 +1848,7 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count);
spin_lock(&cifs_tcp_ses_lock);
- if (ses->status == CifsExiting) {
+ if (ses->ses_status == SES_EXITING) {
spin_unlock(&cifs_tcp_ses_lock);
return;
}
@@ -1864,13 +1864,13 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
/* ses_count can never go negative */
WARN_ON(ses->ses_count < 0);
- if (ses->status == CifsGood)
- ses->status = CifsExiting;
+ if (ses->ses_status == SES_GOOD)
+ ses->ses_status = SES_EXITING;
spin_unlock(&cifs_tcp_ses_lock);
cifs_free_ipc(ses);
- if (ses->status == CifsExiting && server->ops->logoff) {
+ if (ses->ses_status == SES_EXITING && server->ops->logoff) {
xid = get_xid();
rc = server->ops->logoff(xid, ses);
if (rc)
@@ -2037,18 +2037,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
}
}
- ctx->workstation_name = kstrdup(ses->workstation_name, GFP_KERNEL);
- if (!ctx->workstation_name) {
- cifs_dbg(FYI, "Unable to allocate memory for workstation_name\n");
- rc = -ENOMEM;
- kfree(ctx->username);
- ctx->username = NULL;
- kfree_sensitive(ctx->password);
- ctx->password = NULL;
- kfree(ctx->domainname);
- ctx->domainname = NULL;
- goto out_key_put;
- }
+ strscpy(ctx->workstation_name, ses->workstation_name, sizeof(ctx->workstation_name));
out_key_put:
up_read(&key->sem);
@@ -2090,7 +2079,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
ses = cifs_find_smb_ses(server, ctx);
if (ses) {
cifs_dbg(FYI, "Existing smb sess found (status=%d)\n",
- ses->status);
+ ses->ses_status);
spin_lock(&ses->chan_lock);
if (cifs_chan_needs_reconnect(ses, server)) {
@@ -2157,12 +2146,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
if (!ses->domainName)
goto get_ses_fail;
}
- if (ctx->workstation_name) {
- ses->workstation_name = kstrdup(ctx->workstation_name,
- GFP_KERNEL);
- if (!ses->workstation_name)
- goto get_ses_fail;
- }
+
+ strscpy(ses->workstation_name, ctx->workstation_name, sizeof(ses->workstation_name));
+
if (ctx->domainauto)
ses->domainAuto = ctx->domainauto;
ses->cred_uid = ctx->cred_uid;
@@ -2509,6 +2495,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
*/
tcon->retry = ctx->retry;
tcon->nocase = ctx->nocase;
+ tcon->broken_sparse_sup = ctx->no_sparse;
if (ses->server->capabilities & SMB2_GLOBAL_CAP_DIRECTORY_LEASING)
tcon->nohandlecache = ctx->nohandlecache;
else
@@ -3420,8 +3407,9 @@ cifs_are_all_path_components_accessible(struct TCP_Server_Info *server,
}
/*
- * Check if path is remote (e.g. a DFS share). Return -EREMOTE if it is,
- * otherwise 0.
+ * Check if path is remote (i.e. a DFS share).
+ *
+ * Return -EREMOTE if it is, otherwise 0 or -errno.
*/
static int is_path_remote(struct mount_ctx *mnt_ctx)
{
@@ -3432,6 +3420,9 @@ static int is_path_remote(struct mount_ctx *mnt_ctx)
struct cifs_tcon *tcon = mnt_ctx->tcon;
struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
char *full_path;
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ bool nodfs = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS;
+#endif
if (!server->ops->is_path_accessible)
return -EOPNOTSUPP;
@@ -3449,14 +3440,20 @@ static int is_path_remote(struct mount_ctx *mnt_ctx)
rc = server->ops->is_path_accessible(xid, tcon, cifs_sb,
full_path);
#ifdef CONFIG_CIFS_DFS_UPCALL
+ if (nodfs) {
+ if (rc == -EREMOTE)
+ rc = -EOPNOTSUPP;
+ goto out;
+ }
+
+ /* path *might* exist with non-ASCII characters in DFS root
+ * try again with full path (only if nodfs is not set) */
if (rc == -ENOENT && is_tcon_dfs(tcon))
rc = cifs_dfs_query_info_nonascii_quirk(xid, tcon, cifs_sb,
full_path);
#endif
- if (rc != 0 && rc != -EREMOTE) {
- kfree(full_path);
- return rc;
- }
+ if (rc != 0 && rc != -EREMOTE)
+ goto out;
if (rc != -EREMOTE) {
rc = cifs_are_all_path_components_accessible(server, xid, tcon,
@@ -3468,6 +3465,7 @@ static int is_path_remote(struct mount_ctx *mnt_ctx)
}
}
+out:
kfree(full_path);
return rc;
}
@@ -3703,6 +3701,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
if (!isdfs)
goto out;
+ /* proceed as DFS mount */
uuid_gen(&mnt_ctx.mount_id);
rc = connect_dfs_root(&mnt_ctx, &tl);
dfs_cache_free_tgts(&tl);
@@ -3960,7 +3959,7 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses,
if (rc == 0) {
spin_lock(&cifs_tcp_ses_lock);
if (server->tcpStatus == CifsInNegotiate)
- server->tcpStatus = CifsNeedSessSetup;
+ server->tcpStatus = CifsGood;
else
rc = -EHOSTDOWN;
spin_unlock(&cifs_tcp_ses_lock);
@@ -3982,20 +3981,31 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
int rc = -ENOSYS;
bool is_binding = false;
- /* only send once per connect */
+
spin_lock(&cifs_tcp_ses_lock);
- if ((server->tcpStatus != CifsNeedSessSetup) &&
- (ses->status == CifsGood)) {
+ if (ses->ses_status != SES_GOOD &&
+ ses->ses_status != SES_NEW &&
+ ses->ses_status != SES_NEED_RECON) {
spin_unlock(&cifs_tcp_ses_lock);
return 0;
}
- server->tcpStatus = CifsInSessSetup;
- spin_unlock(&cifs_tcp_ses_lock);
+ /* only send once per connect */
spin_lock(&ses->chan_lock);
+ if (CIFS_ALL_CHANS_GOOD(ses) ||
+ cifs_chan_in_reconnect(ses, server)) {
+ spin_unlock(&ses->chan_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
+ return 0;
+ }
is_binding = !CIFS_ALL_CHANS_NEED_RECONNECT(ses);
+ cifs_chan_set_in_reconnect(ses, server);
spin_unlock(&ses->chan_lock);
+ if (!is_binding)
+ ses->ses_status = SES_IN_SETUP;
+ spin_unlock(&cifs_tcp_ses_lock);
+
if (!is_binding) {
ses->capabilities = server->capabilities;
if (!linuxExtEnabled)
@@ -4019,20 +4029,21 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
if (rc) {
cifs_server_dbg(VFS, "Send error in SessSetup = %d\n", rc);
spin_lock(&cifs_tcp_ses_lock);
- if (server->tcpStatus == CifsInSessSetup)
- server->tcpStatus = CifsNeedSessSetup;
+ if (ses->ses_status == SES_IN_SETUP)
+ ses->ses_status = SES_NEED_RECON;
+ spin_lock(&ses->chan_lock);
+ cifs_chan_clear_in_reconnect(ses, server);
+ spin_unlock(&ses->chan_lock);
spin_unlock(&cifs_tcp_ses_lock);
} else {
spin_lock(&cifs_tcp_ses_lock);
- if (server->tcpStatus == CifsInSessSetup)
- server->tcpStatus = CifsGood;
- /* Even if one channel is active, session is in good state */
- ses->status = CifsGood;
- spin_unlock(&cifs_tcp_ses_lock);
-
+ if (ses->ses_status == SES_IN_SETUP)
+ ses->ses_status = SES_GOOD;
spin_lock(&ses->chan_lock);
+ cifs_chan_clear_in_reconnect(ses, server);
cifs_chan_clear_need_reconnect(ses, server);
spin_unlock(&ses->chan_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
}
return rc;
@@ -4497,7 +4508,7 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru
/* only send once per connect */
spin_lock(&cifs_tcp_ses_lock);
- if (tcon->ses->status != CifsGood ||
+ if (tcon->ses->ses_status != SES_GOOD ||
(tcon->status != TID_NEW &&
tcon->status != TID_NEED_TCON)) {
spin_unlock(&cifs_tcp_ses_lock);
@@ -4565,7 +4576,7 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru
/* only send once per connect */
spin_lock(&cifs_tcp_ses_lock);
- if (tcon->ses->status != CifsGood ||
+ if (tcon->ses->ses_status != SES_GOOD ||
(tcon->status != TID_NEW &&
tcon->status != TID_NEED_TCON)) {
spin_unlock(&cifs_tcp_ses_lock);
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index 956f8e5cf3e7..c5dd6f7305bd 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -654,7 +654,7 @@ static struct cache_entry *__lookup_cache_entry(const char *path, unsigned int h
return ce;
}
}
- return ERR_PTR(-EEXIST);
+ return ERR_PTR(-ENOENT);
}
/*
@@ -662,7 +662,7 @@ static struct cache_entry *__lookup_cache_entry(const char *path, unsigned int h
*
* Use whole path components in the match. Must be called with htable_rw_lock held.
*
- * Return ERR_PTR(-EEXIST) if the entry is not found.
+ * Return ERR_PTR(-ENOENT) if the entry is not found.
*/
static struct cache_entry *lookup_cache_entry(const char *path)
{
@@ -710,7 +710,7 @@ static struct cache_entry *lookup_cache_entry(const char *path)
while (e > s && *e != sep)
e--;
}
- return ERR_PTR(-EEXIST);
+ return ERR_PTR(-ENOENT);
}
/**
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 06003bb9cbe9..1618e0537d58 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1395,7 +1395,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
cifs_dbg(VFS, "Can't push all brlocks!\n");
break;
}
- length = 1 + flock->fl_end - flock->fl_start;
+ length = cifs_flock_len(flock);
if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
type = CIFS_RDLCK;
else
@@ -1511,7 +1511,7 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
bool wait_flag, bool posix_lck, unsigned int xid)
{
int rc = 0;
- __u64 length = 1 + flock->fl_end - flock->fl_start;
+ __u64 length = cifs_flock_len(flock);
struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
@@ -1609,7 +1609,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
struct cifsLockInfo *li, *tmp;
- __u64 length = 1 + flock->fl_end - flock->fl_start;
+ __u64 length = cifs_flock_len(flock);
struct list_head tmp_llist;
INIT_LIST_HEAD(&tmp_llist);
@@ -1713,7 +1713,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
unsigned int xid)
{
int rc = 0;
- __u64 length = 1 + flock->fl_end - flock->fl_start;
+ __u64 length = cifs_flock_len(flock);
struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
@@ -2777,8 +2777,11 @@ int cifs_flush(struct file *file, fl_owner_t id)
rc = filemap_write_and_wait(inode->i_mapping);
cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
- if (rc)
+ if (rc) {
+ /* get more nuanced writeback errors */
+ rc = filemap_check_wb_err(file->f_mapping, 0);
trace_cifs_flush_err(inode->i_ino, rc);
+ }
return rc;
}
@@ -4906,6 +4909,10 @@ static int cifs_swap_activate(struct swap_info_struct *sis,
cifs_dbg(FYI, "swap activate\n");
+ if (!swap_file->f_mapping->a_ops->swap_rw)
+ /* Cannot support swap */
+ return -EINVAL;
+
spin_lock(&inode->i_lock);
blocks = inode->i_blocks;
isize = inode->i_size;
@@ -4934,7 +4941,8 @@ static int cifs_swap_activate(struct swap_info_struct *sis,
* from reading or writing the file
*/
- return 0;
+ sis->flags |= SWP_FS_OPS;
+ return add_swap_extent(sis, 0, sis->max, 0);
}
static void cifs_swap_deactivate(struct file *file)
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index a92e9eec521f..8dc0d923ef6a 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -119,6 +119,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
fsparam_flag_no("persistenthandles", Opt_persistent),
fsparam_flag_no("resilienthandles", Opt_resilient),
fsparam_flag_no("tcpnodelay", Opt_tcp_nodelay),
+ fsparam_flag("nosparse", Opt_nosparse),
fsparam_flag("domainauto", Opt_domainauto),
fsparam_flag("rdma", Opt_rdma),
fsparam_flag("modesid", Opt_modesid),
@@ -312,7 +313,6 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
new_ctx->password = NULL;
new_ctx->server_hostname = NULL;
new_ctx->domainname = NULL;
- new_ctx->workstation_name = NULL;
new_ctx->UNC = NULL;
new_ctx->source = NULL;
new_ctx->iocharset = NULL;
@@ -327,7 +327,6 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
DUP_CTX_STR(UNC);
DUP_CTX_STR(source);
DUP_CTX_STR(domainname);
- DUP_CTX_STR(workstation_name);
DUP_CTX_STR(nodename);
DUP_CTX_STR(iocharset);
@@ -766,8 +765,7 @@ static int smb3_verify_reconfigure_ctx(struct fs_context *fc,
cifs_errorf(fc, "can not change domainname during remount\n");
return -EINVAL;
}
- if (new_ctx->workstation_name &&
- (!old_ctx->workstation_name || strcmp(new_ctx->workstation_name, old_ctx->workstation_name))) {
+ if (strcmp(new_ctx->workstation_name, old_ctx->workstation_name)) {
cifs_errorf(fc, "can not change workstation_name during remount\n");
return -EINVAL;
}
@@ -814,7 +812,6 @@ static int smb3_reconfigure(struct fs_context *fc)
STEAL_STRING(cifs_sb, ctx, username);
STEAL_STRING(cifs_sb, ctx, password);
STEAL_STRING(cifs_sb, ctx, domainname);
- STEAL_STRING(cifs_sb, ctx, workstation_name);
STEAL_STRING(cifs_sb, ctx, nodename);
STEAL_STRING(cifs_sb, ctx, iocharset);
@@ -943,6 +940,9 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
case Opt_nolease:
ctx->no_lease = 1;
break;
+ case Opt_nosparse:
+ ctx->no_sparse = 1;
+ break;
case Opt_nodelete:
ctx->nodelete = 1;
break;
@@ -1467,22 +1467,15 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
int smb3_init_fs_context(struct fs_context *fc)
{
- int rc;
struct smb3_fs_context *ctx;
char *nodename = utsname()->nodename;
int i;
ctx = kzalloc(sizeof(struct smb3_fs_context), GFP_KERNEL);
- if (unlikely(!ctx)) {
- rc = -ENOMEM;
- goto err_exit;
- }
+ if (unlikely(!ctx))
+ return -ENOMEM;
- ctx->workstation_name = kstrdup(nodename, GFP_KERNEL);
- if (unlikely(!ctx->workstation_name)) {
- rc = -ENOMEM;
- goto err_exit;
- }
+ strscpy(ctx->workstation_name, nodename, sizeof(ctx->workstation_name));
/*
* does not have to be perfect mapping since field is
@@ -1555,14 +1548,6 @@ int smb3_init_fs_context(struct fs_context *fc)
fc->fs_private = ctx;
fc->ops = &smb3_fs_context_ops;
return 0;
-
-err_exit:
- if (ctx) {
- kfree(ctx->workstation_name);
- kfree(ctx);
- }
-
- return rc;
}
void
@@ -1588,8 +1573,6 @@ smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx)
ctx->source = NULL;
kfree(ctx->domainname);
ctx->domainname = NULL;
- kfree(ctx->workstation_name);
- ctx->workstation_name = NULL;
kfree(ctx->nodename);
ctx->nodename = NULL;
kfree(ctx->iocharset);
diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h
index e54090d9ef36..5f093cb7e9b9 100644
--- a/fs/cifs/fs_context.h
+++ b/fs/cifs/fs_context.h
@@ -62,6 +62,7 @@ enum cifs_param {
Opt_noblocksend,
Opt_noautotune,
Opt_nolease,
+ Opt_nosparse,
Opt_hard,
Opt_soft,
Opt_perm,
@@ -170,7 +171,7 @@ struct smb3_fs_context {
char *server_hostname;
char *UNC;
char *nodename;
- char *workstation_name;
+ char workstation_name[CIFS_MAX_WORKSTATION_LEN];
char *iocharset; /* local code page for mapping to and from Unicode */
char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */
char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */
@@ -222,6 +223,7 @@ struct smb3_fs_context {
bool noautotune:1;
bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
bool no_lease:1; /* disable requesting leases */
+ bool no_sparse:1; /* do not attempt to set files sparse */
bool fsc:1; /* enable fscache */
bool mfsymlinks:1; /* use Minshall+French Symlinks */
bool multiuser:1;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index afaf59c22193..35962a1a23b9 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -69,7 +69,7 @@ sesInfoAlloc(void)
ret_buf = kzalloc(sizeof(struct cifs_ses), GFP_KERNEL);
if (ret_buf) {
atomic_inc(&sesInfoAllocCount);
- ret_buf->status = CifsNew;
+ ret_buf->ses_status = SES_NEW;
++ret_buf->ses_count;
INIT_LIST_HEAD(&ret_buf->smb_ses_list);
INIT_LIST_HEAD(&ret_buf->tcon_list);
@@ -95,7 +95,6 @@ sesInfoFree(struct cifs_ses *buf_to_free)
kfree_sensitive(buf_to_free->password);
kfree(buf_to_free->user_name);
kfree(buf_to_free->domainName);
- kfree(buf_to_free->workstation_name);
kfree_sensitive(buf_to_free->auth_key.response);
kfree(buf_to_free->iface_list);
kfree_sensitive(buf_to_free);
@@ -114,6 +113,8 @@ tconInfoAlloc(void)
kfree(ret_buf);
return NULL;
}
+ INIT_LIST_HEAD(&ret_buf->crfid.dirents.entries);
+ mutex_init(&ret_buf->crfid.dirents.de_mutex);
atomic_inc(&tconInfoAllocCount);
ret_buf->status = TID_NEW;
@@ -1309,7 +1310,7 @@ int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix)
* for "\<server>\<dfsname>\<linkpath>" DFS reference,
* where <dfsname> contains non-ASCII unicode symbols.
*
- * Check such DFS reference and emulate -ENOENT if it is actual.
+ * Check such DFS reference.
*/
int cifs_dfs_query_info_nonascii_quirk(const unsigned int xid,
struct cifs_tcon *tcon,
@@ -1341,10 +1342,6 @@ int cifs_dfs_query_info_nonascii_quirk(const unsigned int xid,
cifs_dbg(FYI, "DFS ref '%s' is found, emulate -EREMOTE\n",
dfspath);
rc = -EREMOTE;
- } else if (rc == -EEXIST) {
- cifs_dbg(FYI, "DFS ref '%s' is not found, emulate -ENOENT\n",
- dfspath);
- rc = -ENOENT;
} else {
cifs_dbg(FYI, "%s: dfs_cache_find returned %d\n", __func__, rc);
}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 1929e80c09ee..384cabdf47ca 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -840,9 +840,109 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
return rc;
}
+static bool emit_cached_dirents(struct cached_dirents *cde,
+ struct dir_context *ctx)
+{
+ struct cached_dirent *dirent;
+ int rc;
+
+ list_for_each_entry(dirent, &cde->entries, entry) {
+ if (ctx->pos >= dirent->pos)
+ continue;
+ ctx->pos = dirent->pos;
+ rc = dir_emit(ctx, dirent->name, dirent->namelen,
+ dirent->fattr.cf_uniqueid,
+ dirent->fattr.cf_dtype);
+ if (!rc)
+ return rc;
+ }
+ return true;
+}
+
+static void update_cached_dirents_count(struct cached_dirents *cde,
+ struct dir_context *ctx)
+{
+ if (cde->ctx != ctx)
+ return;
+ if (cde->is_valid || cde->is_failed)
+ return;
+
+ cde->pos++;
+}
+
+static void finished_cached_dirents_count(struct cached_dirents *cde,
+ struct dir_context *ctx)
+{
+ if (cde->ctx != ctx)
+ return;
+ if (cde->is_valid || cde->is_failed)
+ return;
+ if (ctx->pos != cde->pos)
+ return;
+
+ cde->is_valid = 1;
+}
+
+static void add_cached_dirent(struct cached_dirents *cde,
+ struct dir_context *ctx,
+ const char *name, int namelen,
+ struct cifs_fattr *fattr)
+{
+ struct cached_dirent *de;
+
+ if (cde->ctx != ctx)
+ return;
+ if (cde->is_valid || cde->is_failed)
+ return;
+ if (ctx->pos != cde->pos) {
+ cde->is_failed = 1;
+ return;
+ }
+ de = kzalloc(sizeof(*de), GFP_ATOMIC);
+ if (de == NULL) {
+ cde->is_failed = 1;
+ return;
+ }
+ de->namelen = namelen;
+ de->name = kstrndup(name, namelen, GFP_ATOMIC);
+ if (de->name == NULL) {
+ kfree(de);
+ cde->is_failed = 1;
+ return;
+ }
+ de->pos = ctx->pos;
+
+ memcpy(&de->fattr, fattr, sizeof(struct cifs_fattr));
+
+ list_add_tail(&de->entry, &cde->entries);
+}
+
+static bool cifs_dir_emit(struct dir_context *ctx,
+ const char *name, int namelen,
+ struct cifs_fattr *fattr,
+ struct cached_fid *cfid)
+{
+ bool rc;
+ ino_t ino = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid);
+
+ rc = dir_emit(ctx, name, namelen, ino, fattr->cf_dtype);
+ if (!rc)
+ return rc;
+
+ if (cfid) {
+ mutex_lock(&cfid->dirents.de_mutex);
+ add_cached_dirent(&cfid->dirents, ctx, name, namelen,
+ fattr);
+ mutex_unlock(&cfid->dirents.de_mutex);
+ }
+
+ return rc;
+}
+
static int cifs_filldir(char *find_entry, struct file *file,
- struct dir_context *ctx,
- char *scratch_buf, unsigned int max_len)
+ struct dir_context *ctx,
+ char *scratch_buf, unsigned int max_len,
+ struct cached_fid *cfid)
{
struct cifsFileInfo *file_info = file->private_data;
struct super_block *sb = file_inode(file)->i_sb;
@@ -851,7 +951,6 @@ static int cifs_filldir(char *find_entry, struct file *file,
struct cifs_fattr fattr;
struct qstr name;
int rc = 0;
- ino_t ino;
rc = cifs_fill_dirent(&de, find_entry, file_info->srch_inf.info_level,
file_info->srch_inf.unicode);
@@ -931,8 +1030,8 @@ static int cifs_filldir(char *find_entry, struct file *file,
cifs_prime_dcache(file_dentry(file), &name, &fattr);
- ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid);
- return !dir_emit(ctx, name.name, name.len, ino, fattr.cf_dtype);
+ return !cifs_dir_emit(ctx, name.name, name.len,
+ &fattr, cfid);
}
@@ -941,8 +1040,9 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
int rc = 0;
unsigned int xid;
int i;
+ struct tcon_link *tlink = NULL;
struct cifs_tcon *tcon;
- struct cifsFileInfo *cifsFile = NULL;
+ struct cifsFileInfo *cifsFile;
char *current_entry;
int num_to_fill = 0;
char *tmp_buf = NULL;
@@ -950,6 +1050,8 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
unsigned int max_len;
const char *full_path;
void *page = alloc_dentry_path();
+ struct cached_fid *cfid = NULL;
+ struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
xid = get_xid();
@@ -959,6 +1061,54 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
goto rddir2_exit;
}
+ if (file->private_data == NULL) {
+ tlink = cifs_sb_tlink(cifs_sb);
+ if (IS_ERR(tlink))
+ goto cache_not_found;
+ tcon = tlink_tcon(tlink);
+ } else {
+ cifsFile = file->private_data;
+ tcon = tlink_tcon(cifsFile->tlink);
+ }
+
+ rc = open_cached_dir(xid, tcon, full_path, cifs_sb, &cfid);
+ cifs_put_tlink(tlink);
+ if (rc)
+ goto cache_not_found;
+
+ mutex_lock(&cfid->dirents.de_mutex);
+ /*
+ * If this was reading from the start of the directory
+ * we need to initialize scanning and storing the
+ * directory content.
+ */
+ if (ctx->pos == 0 && cfid->dirents.ctx == NULL) {
+ cfid->dirents.ctx = ctx;
+ cfid->dirents.pos = 2;
+ }
+ /*
+ * If we already have the entire directory cached then
+ * we can just serve the cache.
+ */
+ if (cfid->dirents.is_valid) {
+ if (!dir_emit_dots(file, ctx)) {
+ mutex_unlock(&cfid->dirents.de_mutex);
+ goto rddir2_exit;
+ }
+ emit_cached_dirents(&cfid->dirents, ctx);
+ mutex_unlock(&cfid->dirents.de_mutex);
+ goto rddir2_exit;
+ }
+ mutex_unlock(&cfid->dirents.de_mutex);
+
+ /* Drop the cache while calling initiate_cifs_search and
+ * find_cifs_entry in case there will be reconnects during
+ * query_directory.
+ */
+ close_cached_dir(cfid);
+ cfid = NULL;
+
+ cache_not_found:
/*
* Ensure FindFirst doesn't fail before doing filldir() for '.' and
* '..'. Otherwise we won't be able to notify VFS in case of failure.
@@ -977,7 +1127,6 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
is in current search buffer?
if it before then restart search
if after then keep searching till find it */
-
cifsFile = file->private_data;
if (cifsFile->srch_inf.endOfSearch) {
if (cifsFile->srch_inf.emptyDir) {
@@ -993,12 +1142,18 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
tcon = tlink_tcon(cifsFile->tlink);
rc = find_cifs_entry(xid, tcon, ctx->pos, file, full_path,
&current_entry, &num_to_fill);
+ open_cached_dir(xid, tcon, full_path, cifs_sb, &cfid);
if (rc) {
cifs_dbg(FYI, "fce error %d\n", rc);
goto rddir2_exit;
} else if (current_entry != NULL) {
cifs_dbg(FYI, "entry %lld found\n", ctx->pos);
} else {
+ if (cfid) {
+ mutex_lock(&cfid->dirents.de_mutex);
+ finished_cached_dirents_count(&cfid->dirents, ctx);
+ mutex_unlock(&cfid->dirents.de_mutex);
+ }
cifs_dbg(FYI, "Could not find entry\n");
goto rddir2_exit;
}
@@ -1028,7 +1183,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
*/
*tmp_buf = 0;
rc = cifs_filldir(current_entry, file, ctx,
- tmp_buf, max_len);
+ tmp_buf, max_len, cfid);
if (rc) {
if (rc > 0)
rc = 0;
@@ -1036,6 +1191,12 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
}
ctx->pos++;
+ if (cfid) {
+ mutex_lock(&cfid->dirents.de_mutex);
+ update_cached_dirents_count(&cfid->dirents, ctx);
+ mutex_unlock(&cfid->dirents.de_mutex);
+ }
+
if (ctx->pos ==
cifsFile->srch_inf.index_of_last_entry) {
cifs_dbg(FYI, "last entry in buf at pos %lld %s\n",
@@ -1050,6 +1211,8 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
kfree(tmp_buf);
rddir2_exit:
+ if (cfid)
+ close_cached_dir(cfid);
free_dentry_path(page);
free_xid(xid);
return rc;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 32f478c7a66d..c6214cfc575f 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -86,6 +86,33 @@ cifs_ses_get_chan_index(struct cifs_ses *ses,
}
void
+cifs_chan_set_in_reconnect(struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
+{
+ unsigned int chan_index = cifs_ses_get_chan_index(ses, server);
+
+ ses->chans[chan_index].in_reconnect = true;
+}
+
+void
+cifs_chan_clear_in_reconnect(struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
+{
+ unsigned int chan_index = cifs_ses_get_chan_index(ses, server);
+
+ ses->chans[chan_index].in_reconnect = false;
+}
+
+bool
+cifs_chan_in_reconnect(struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
+{
+ unsigned int chan_index = cifs_ses_get_chan_index(ses, server);
+
+ return CIFS_CHAN_IN_RECONNECT(ses, chan_index);
+}
+
+void
cifs_chan_set_need_reconnect(struct cifs_ses *ses,
struct TCP_Server_Info *server)
{
@@ -714,9 +741,9 @@ static int size_of_ntlmssp_blob(struct cifs_ses *ses, int base_size)
else
sz += sizeof(__le16);
- if (ses->workstation_name)
+ if (ses->workstation_name[0])
sz += sizeof(__le16) * strnlen(ses->workstation_name,
- CIFS_MAX_WORKSTATION_LEN);
+ ntlmssp_workstation_name_size(ses));
else
sz += sizeof(__le16);
@@ -960,7 +987,7 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer,
cifs_security_buffer_from_str(&sec_blob->WorkstationName,
ses->workstation_name,
- CIFS_MAX_WORKSTATION_LEN,
+ ntlmssp_workstation_name_size(ses),
*pbuffer, &tmp,
nls_cp);
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index fe5bfa245fa7..8571a459c710 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -362,8 +362,6 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
num_rqst++;
if (cfile) {
- cifsFileInfo_put(cfile);
- cfile = NULL;
rc = compound_send_recv(xid, ses, server,
flags, num_rqst - 2,
&rqst[1], &resp_buftype[1],
@@ -514,8 +512,11 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
if (smb2_data == NULL)
return -ENOMEM;
+ if (strcmp(full_path, ""))
+ rc = -ENOENT;
+ else
+ rc = open_cached_dir(xid, tcon, full_path, cifs_sb, &cfid);
/* If it is a root and its handle is cached then use it */
- rc = open_cached_dir(xid, tcon, full_path, cifs_sb, &cfid);
if (!rc) {
if (tcon->crfid.file_all_info_is_valid) {
move_smb2_info_to_cifs(data,
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 3fe47a88f47d..17813c3d0c6e 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -656,6 +656,12 @@ smb2_is_valid_lease_break(char *buffer)
}
spin_unlock(&cifs_tcp_ses_lock);
cifs_dbg(FYI, "Can not process lease break - no lease matched\n");
+ trace_smb3_lease_not_found(le32_to_cpu(rsp->CurrentLeaseState),
+ le32_to_cpu(rsp->hdr.Id.SyncId.TreeId),
+ le64_to_cpu(rsp->hdr.SessionId),
+ *((u64 *)rsp->LeaseKey),
+ *((u64 *)&rsp->LeaseKey[8]));
+
return false;
}
@@ -726,6 +732,10 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
}
spin_unlock(&cifs_tcp_ses_lock);
cifs_dbg(FYI, "No file id matched, oplock break ignored\n");
+ trace_smb3_oplock_not_found(0 /* no xid */, rsp->PersistentFid,
+ le32_to_cpu(rsp->hdr.Id.SyncId.TreeId),
+ le64_to_cpu(rsp->hdr.SessionId));
+
return true;
}
@@ -798,7 +808,7 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid,
if (tcon->ses)
server = tcon->ses->server;
- cifs_server_dbg(FYI, "tid=%u: tcon is closing, skipping async close retry of fid %llu %llu\n",
+ cifs_server_dbg(FYI, "tid=0x%x: tcon is closing, skipping async close retry of fid %llu %llu\n",
tcon->tid, persistent_fid, volatile_fid);
return 0;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index d6aaeff4a30a..d7ade739cde1 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -699,6 +699,7 @@ smb2_close_cached_fid(struct kref *ref)
{
struct cached_fid *cfid = container_of(ref, struct cached_fid,
refcount);
+ struct cached_dirent *dirent, *q;
if (cfid->is_valid) {
cifs_dbg(FYI, "clear cached root file handle\n");
@@ -718,6 +719,21 @@ smb2_close_cached_fid(struct kref *ref)
dput(cfid->dentry);
cfid->dentry = NULL;
}
+ /*
+ * Delete all cached dirent names
+ */
+ mutex_lock(&cfid->dirents.de_mutex);
+ list_for_each_entry_safe(dirent, q, &cfid->dirents.entries, entry) {
+ list_del(&dirent->entry);
+ kfree(dirent->name);
+ kfree(dirent);
+ }
+ cfid->dirents.is_valid = 0;
+ cfid->dirents.is_failed = 0;
+ cfid->dirents.ctx = NULL;
+ cfid->dirents.pos = 0;
+ mutex_unlock(&cfid->dirents.de_mutex);
+
}
void close_cached_dir(struct cached_fid *cfid)
@@ -754,14 +770,15 @@ smb2_cached_lease_break(struct work_struct *work)
/*
* Open the and cache a directory handle.
* Only supported for the root handle.
+ * If error then *cfid is not initialized.
*/
int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
const char *path,
struct cifs_sb_info *cifs_sb,
struct cached_fid **cfid)
{
- struct cifs_ses *ses = tcon->ses;
- struct TCP_Server_Info *server = ses->server;
+ struct cifs_ses *ses;
+ struct TCP_Server_Info *server;
struct cifs_open_parms oparms;
struct smb2_create_rsp *o_rsp = NULL;
struct smb2_query_info_rsp *qi_rsp = NULL;
@@ -776,9 +793,13 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *pfid;
struct dentry *dentry;
- if (tcon->nohandlecache)
+ if (tcon == NULL || tcon->nohandlecache ||
+ is_smb1_server(tcon->ses->server))
return -ENOTSUPP;
+ ses = tcon->ses;
+ server = ses->server;
+
if (cifs_sb->root == NULL)
return -ENOENT;
@@ -824,7 +845,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
rqst[0].rq_nvec = SMB2_CREATE_IOV_SIZE;
oparms.tcon = tcon;
- oparms.create_options = cifs_create_options(cifs_sb, 0);
+ oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_FILE);
oparms.desired_access = FILE_READ_ATTRIBUTES;
oparms.disposition = FILE_OPEN;
oparms.fid = pfid;
@@ -2695,7 +2716,8 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon,
resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER;
memset(rsp_iov, 0, sizeof(rsp_iov));
- rc = open_cached_dir(xid, tcon, path, cifs_sb, &cfid);
+ if (!strcmp(path, ""))
+ open_cached_dir(xid, tcon, path, cifs_sb, &cfid); /* cfid null if open dir failed */
memset(&open_iov, 0, sizeof(open_iov));
rqst[0].rq_iov = open_iov;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 1b7ad0c09566..084be3a90198 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -179,7 +179,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
}
}
spin_unlock(&cifs_tcp_ses_lock);
- if ((!tcon->ses) || (tcon->ses->status == CifsExiting) ||
+ if ((!tcon->ses) || (tcon->ses->ses_status == SES_EXITING) ||
(!tcon->ses->server) || !server)
return -EIO;
@@ -3899,7 +3899,8 @@ SMB2_echo(struct TCP_Server_Info *server)
cifs_dbg(FYI, "In echo request for conn_id %lld\n", server->conn_id);
spin_lock(&cifs_tcp_ses_lock);
- if (server->tcpStatus == CifsNeedNegotiate) {
+ if (server->ops->need_neg &&
+ server->ops->need_neg(server)) {
spin_unlock(&cifs_tcp_ses_lock);
/* No need to send echo on newly established connections */
mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index d8c4388b190d..f57881b8464f 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -260,28 +260,6 @@ struct get_retrieval_pointers_refcount_rsp {
struct smb3_extents extents[];
} __packed;
-struct fsctl_set_integrity_information_req {
- __le16 ChecksumAlgorithm;
- __le16 Reserved;
- __le32 Flags;
-} __packed;
-
-struct fsctl_get_integrity_information_rsp {
- __le16 ChecksumAlgorithm;
- __le16 Reserved;
- __le32 Flags;
- __le32 ChecksumChunkSizeInBytes;
- __le32 ClusterSizeInBytes;
-} __packed;
-
-/* Integrity ChecksumAlgorithm choices for above */
-#define CHECKSUM_TYPE_NONE 0x0000
-#define CHECKSUM_TYPE_CRC64 0x0002
-#define CHECKSUM_TYPE_UNCHANGED 0xFFFF /* set only */
-
-/* Integrity flags for above */
-#define FSCTL_INTEGRITY_FLAG_CHECKSUM_ENFORCEMENT_OFF 0x00000001
-
/* See MS-DFSC 2.2.2 */
struct fsctl_get_dfs_referral_req {
__le16 MaxReferralLevel;
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 2af79093b78b..55e79f6ee78d 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -641,7 +641,8 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server)
if (!is_signed)
return 0;
spin_lock(&cifs_tcp_ses_lock);
- if (server->tcpStatus == CifsNeedNegotiate) {
+ if (server->ops->need_neg &&
+ server->ops->need_neg(server)) {
spin_unlock(&cifs_tcp_ses_lock);
return 0;
}
@@ -779,7 +780,7 @@ smb2_get_mid_entry(struct cifs_ses *ses, struct TCP_Server_Info *server,
return -EAGAIN;
}
- if (ses->status == CifsNew) {
+ if (ses->ses_status == SES_NEW) {
if ((shdr->Command != SMB2_SESSION_SETUP) &&
(shdr->Command != SMB2_NEGOTIATE)) {
spin_unlock(&cifs_tcp_ses_lock);
@@ -788,7 +789,7 @@ smb2_get_mid_entry(struct cifs_ses *ses, struct TCP_Server_Info *server,
/* else ok - we are setting up session */
}
- if (ses->status == CifsExiting) {
+ if (ses->ses_status == SES_EXITING) {
if (shdr->Command != SMB2_LOGOFF) {
spin_unlock(&cifs_tcp_ses_lock);
return -EAGAIN;
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index 31ef64eb7fbb..c3278db1cade 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -649,7 +649,7 @@ static int smbd_ia_open(
smbd_max_frmr_depth,
info->id->device->attrs.max_fast_reg_page_list_len);
info->mr_type = IB_MR_TYPE_MEM_REG;
- if (info->id->device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
+ if (info->id->device->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)
info->mr_type = IB_MR_TYPE_SG_GAPS;
info->pd = ib_alloc_pd(info->id->device, 0);
@@ -1350,7 +1350,7 @@ void smbd_destroy(struct TCP_Server_Info *server)
wait_event(info->wait_send_pending,
atomic_read(&info->send_pending) == 0);
- /* It's not posssible for upper layer to get to reassembly */
+ /* It's not possible for upper layer to get to reassembly */
log_rdma_event(INFO, "drain the reassembly queue\n");
do {
spin_lock_irqsave(&info->reassembly_queue_lock, flags);
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index bc279616c513..2be5e0c8564d 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -158,6 +158,7 @@ DEFINE_SMB3_FD_EVENT(flush_enter);
DEFINE_SMB3_FD_EVENT(flush_done);
DEFINE_SMB3_FD_EVENT(close_enter);
DEFINE_SMB3_FD_EVENT(close_done);
+DEFINE_SMB3_FD_EVENT(oplock_not_found);
DECLARE_EVENT_CLASS(smb3_fd_err_class,
TP_PROTO(unsigned int xid,
@@ -814,6 +815,7 @@ DEFINE_EVENT(smb3_lease_done_class, smb3_##name, \
TP_ARGS(lease_state, tid, sesid, lease_key_low, lease_key_high))
DEFINE_SMB3_LEASE_DONE_EVENT(lease_done);
+DEFINE_SMB3_LEASE_DONE_EVENT(lease_not_found);
DECLARE_EVENT_CLASS(smb3_lease_err_class,
TP_PROTO(__u32 lease_state,
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index c667e6ddfe2f..05eca41e3b1e 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -726,7 +726,7 @@ static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
struct mid_q_entry **ppmidQ)
{
spin_lock(&cifs_tcp_ses_lock);
- if (ses->status == CifsNew) {
+ if (ses->ses_status == SES_NEW) {
if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) &&
(in_buf->Command != SMB_COM_NEGOTIATE)) {
spin_unlock(&cifs_tcp_ses_lock);
@@ -735,7 +735,7 @@ static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf,
/* else ok - we are setting up session */
}
- if (ses->status == CifsExiting) {
+ if (ses->ses_status == SES_EXITING) {
/* check if SMB session is bad because we are setting it up */
if (in_buf->Command != SMB_COM_LOGOFF_ANDX) {
spin_unlock(&cifs_tcp_ses_lock);
@@ -1187,7 +1187,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
* Compounding is never used during session establish.
*/
spin_lock(&cifs_tcp_ses_lock);
- if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
+ if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
spin_unlock(&cifs_tcp_ses_lock);
mutex_lock(&server->srv_mutex);
@@ -1260,7 +1260,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
* Compounding is never used during session establish.
*/
spin_lock(&cifs_tcp_ses_lock);
- if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
+ if ((ses->ses_status == SES_NEW) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
struct kvec iov = {
.iov_base = resp_iov[0].iov_base,
.iov_len = resp_iov[0].iov_len
diff --git a/fs/dax.c b/fs/dax.c
index 67a08a32fccb..4155a6107fa1 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -24,6 +24,7 @@
#include <linux/sizes.h>
#include <linux/mmu_notifier.h>
#include <linux/iomap.h>
+#include <linux/rmap.h>
#include <asm/pgalloc.h>
#define CREATE_TRACE_POINTS
@@ -721,7 +722,8 @@ static int copy_cow_page_dax(struct vm_fault *vmf, const struct iomap_iter *iter
int id;
id = dax_read_lock();
- rc = dax_direct_access(iter->iomap.dax_dev, pgoff, 1, &kaddr, NULL);
+ rc = dax_direct_access(iter->iomap.dax_dev, pgoff, 1, DAX_ACCESS,
+ &kaddr, NULL);
if (rc < 0) {
dax_read_unlock(id);
return rc;
@@ -789,95 +791,12 @@ static void *dax_insert_entry(struct xa_state *xas,
return entry;
}
-static inline
-unsigned long pgoff_address(pgoff_t pgoff, struct vm_area_struct *vma)
-{
- unsigned long address;
-
- address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
- VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
- return address;
-}
-
-/* Walk all mappings of a given index of a file and writeprotect them */
-static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index,
- unsigned long pfn)
-{
- struct vm_area_struct *vma;
- pte_t pte, *ptep = NULL;
- pmd_t *pmdp = NULL;
- spinlock_t *ptl;
-
- i_mmap_lock_read(mapping);
- vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) {
- struct mmu_notifier_range range;
- unsigned long address;
-
- cond_resched();
-
- if (!(vma->vm_flags & VM_SHARED))
- continue;
-
- address = pgoff_address(index, vma);
-
- /*
- * follow_invalidate_pte() will use the range to call
- * mmu_notifier_invalidate_range_start() on our behalf before
- * taking any lock.
- */
- if (follow_invalidate_pte(vma->vm_mm, address, &range, &ptep,
- &pmdp, &ptl))
- continue;
-
- /*
- * No need to call mmu_notifier_invalidate_range() as we are
- * downgrading page table protection not changing it to point
- * to a new page.
- *
- * See Documentation/vm/mmu_notifier.rst
- */
- if (pmdp) {
-#ifdef CONFIG_FS_DAX_PMD
- pmd_t pmd;
-
- if (pfn != pmd_pfn(*pmdp))
- goto unlock_pmd;
- if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
- goto unlock_pmd;
-
- flush_cache_page(vma, address, pfn);
- pmd = pmdp_invalidate(vma, address, pmdp);
- pmd = pmd_wrprotect(pmd);
- pmd = pmd_mkclean(pmd);
- set_pmd_at(vma->vm_mm, address, pmdp, pmd);
-unlock_pmd:
-#endif
- spin_unlock(ptl);
- } else {
- if (pfn != pte_pfn(*ptep))
- goto unlock_pte;
- if (!pte_dirty(*ptep) && !pte_write(*ptep))
- goto unlock_pte;
-
- flush_cache_page(vma, address, pfn);
- pte = ptep_clear_flush(vma, address, ptep);
- pte = pte_wrprotect(pte);
- pte = pte_mkclean(pte);
- set_pte_at(vma->vm_mm, address, ptep, pte);
-unlock_pte:
- pte_unmap_unlock(ptep, ptl);
- }
-
- mmu_notifier_invalidate_range_end(&range);
- }
- i_mmap_unlock_read(mapping);
-}
-
static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
struct address_space *mapping, void *entry)
{
- unsigned long pfn, index, count;
+ unsigned long pfn, index, count, end;
long ret = 0;
+ struct vm_area_struct *vma;
/*
* A page got tagged dirty in DAX mapping? Something is seriously
@@ -935,8 +854,16 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
pfn = dax_to_pfn(entry);
count = 1UL << dax_entry_order(entry);
index = xas->xa_index & ~(count - 1);
+ end = index + count - 1;
+
+ /* Walk all mappings of a given index of a file and writeprotect them */
+ i_mmap_lock_read(mapping);
+ vma_interval_tree_foreach(vma, &mapping->i_mmap, index, end) {
+ pfn_mkclean_range(pfn, count, index, vma);
+ cond_resched();
+ }
+ i_mmap_unlock_read(mapping);
- dax_entry_mkclean(mapping, index, pfn);
dax_flush(dax_dev, page_address(pfn_to_page(pfn)), count * PAGE_SIZE);
/*
* After we have flushed the cache, we can clear the dirty tag. There
@@ -1013,7 +940,7 @@ static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size,
id = dax_read_lock();
length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
- NULL, pfnp);
+ DAX_ACCESS, NULL, pfnp);
if (length < 0) {
rc = length;
goto out;
@@ -1122,7 +1049,7 @@ static int dax_memzero(struct dax_device *dax_dev, pgoff_t pgoff,
void *kaddr;
long ret;
- ret = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
+ ret = dax_direct_access(dax_dev, pgoff, 1, DAX_ACCESS, &kaddr, NULL);
if (ret > 0) {
memset(kaddr + offset, 0, size);
dax_flush(dax_dev, kaddr + offset, size);
@@ -1239,6 +1166,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
const size_t size = ALIGN(length + offset, PAGE_SIZE);
pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
ssize_t map_len;
+ bool recovery = false;
void *kaddr;
if (fatal_signal_pending(current)) {
@@ -1247,7 +1175,14 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
}
map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
- &kaddr, NULL);
+ DAX_ACCESS, &kaddr, NULL);
+ if (map_len == -EIO && iov_iter_rw(iter) == WRITE) {
+ map_len = dax_direct_access(dax_dev, pgoff,
+ PHYS_PFN(size), DAX_RECOVERY_WRITE,
+ &kaddr, NULL);
+ if (map_len > 0)
+ recovery = true;
+ }
if (map_len < 0) {
ret = map_len;
break;
@@ -1259,7 +1194,10 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
if (map_len > end - pos)
map_len = end - pos;
- if (iov_iter_rw(iter) == WRITE)
+ if (recovery)
+ xfer = dax_recovery_write(dax_dev, pgoff, kaddr,
+ map_len, iter);
+ else if (iov_iter_rw(iter) == WRITE)
xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr,
map_len, iter);
else
diff --git a/fs/exec.c b/fs/exec.c
index e3e55d5e0be1..14b4b3755580 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -758,6 +758,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
unsigned long stack_size;
unsigned long stack_expand;
unsigned long rlim_stack;
+ struct mmu_gather tlb;
#ifdef CONFIG_STACK_GROWSUP
/* Limit stack size */
@@ -812,8 +813,11 @@ int setup_arg_pages(struct linux_binprm *bprm,
vm_flags |= mm->def_flags;
vm_flags |= VM_STACK_INCOMPLETE_SETUP;
- ret = mprotect_fixup(vma, &prev, vma->vm_start, vma->vm_end,
+ tlb_gather_mmu(&tlb, mm);
+ ret = mprotect_fixup(&tlb, vma, &prev, vma->vm_start, vma->vm_end,
vm_flags);
+ tlb_finish_mmu(&tlb);
+
if (ret)
goto out_unlock;
BUG_ON(prev != vma);
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 02d4d4234956..a415c02ede39 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -126,6 +126,7 @@ struct msdos_inode_info {
struct hlist_node i_fat_hash; /* hash by i_location */
struct hlist_node i_dir_hash; /* hash by i_logstart */
struct rw_semaphore truncate_lock; /* protect bmap against truncate */
+ struct timespec64 i_crtime; /* File creation (birth) time */
struct inode vfs_inode;
};
@@ -433,8 +434,15 @@ void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...);
__fat_fs_error(sb, 1, fmt , ## args)
#define fat_fs_error_ratelimit(sb, fmt, args...) \
__fat_fs_error(sb, __ratelimit(&MSDOS_SB(sb)->ratelimit), fmt , ## args)
+
+#define FAT_PRINTK_PREFIX "%sFAT-fs (%s): "
+#define fat_msg(sb, level, fmt, args...) \
+do { \
+ printk_index_subsys_emit(FAT_PRINTK_PREFIX, level, fmt, ##args);\
+ _fat_msg(sb, level, fmt, ##args); \
+} while (0)
__printf(3, 4) __cold
-void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...);
+void _fat_msg(struct super_block *sb, const char *level, const char *fmt, ...);
#define fat_msg_ratelimit(sb, level, fmt, args...) \
do { \
if (__ratelimit(&MSDOS_SB(sb)->ratelimit)) \
@@ -446,6 +454,10 @@ extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec64 *ts,
__le16 __time, __le16 __date, u8 time_cs);
extern void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec64 *ts,
__le16 *time, __le16 *date, u8 *time_cs);
+extern struct timespec64 fat_truncate_atime(const struct msdos_sb_info *sbi,
+ const struct timespec64 *ts);
+extern struct timespec64 fat_truncate_mtime(const struct msdos_sb_info *sbi,
+ const struct timespec64 *ts);
extern int fat_truncate_time(struct inode *inode, struct timespec64 *now,
int flags);
extern int fat_update_time(struct inode *inode, struct timespec64 *now,
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 978ac6751aeb..1db348f8f887 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -94,7 +94,8 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent,
err_brelse:
brelse(bhs[0]);
err:
- fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr);
+ fat_msg_ratelimit(sb, KERN_ERR, "FAT read failed (blocknr %llu)",
+ (llu)blocknr);
return -EIO;
}
@@ -107,8 +108,8 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent,
fatent->fat_inode = MSDOS_SB(sb)->fat_inode;
fatent->bhs[0] = sb_bread(sb, blocknr);
if (!fatent->bhs[0]) {
- fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)",
- (llu)blocknr);
+ fat_msg_ratelimit(sb, KERN_ERR, "FAT read failed (blocknr %llu)",
+ (llu)blocknr);
return -EIO;
}
fatent->nr_bhs = 1;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index bf91f977debe..3dae3ed60f3a 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -398,13 +398,21 @@ int fat_getattr(struct user_namespace *mnt_userns, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct inode *inode = d_inode(path->dentry);
+ struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
+
generic_fillattr(mnt_userns, inode, stat);
- stat->blksize = MSDOS_SB(inode->i_sb)->cluster_size;
+ stat->blksize = sbi->cluster_size;
- if (MSDOS_SB(inode->i_sb)->options.nfs == FAT_NFS_NOSTALE_RO) {
+ if (sbi->options.nfs == FAT_NFS_NOSTALE_RO) {
/* Use i_pos for ino. This is used as fileid of nfs. */
- stat->ino = fat_i_pos_read(MSDOS_SB(inode->i_sb), inode);
+ stat->ino = fat_i_pos_read(sbi, inode);
}
+
+ if (sbi->options.isvfat && request_mask & STATX_BTIME) {
+ stat->result_mask |= STATX_BTIME;
+ stat->btime = MSDOS_I(inode)->i_crtime;
+ }
+
return 0;
}
EXPORT_SYMBOL_GPL(fat_getattr);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 69b4d4ae64d7..a38238d75c08 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -567,12 +567,13 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
& ~((loff_t)sbi->cluster_size - 1)) >> 9;
fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0);
+ inode->i_ctime = inode->i_mtime;
if (sbi->options.isvfat) {
- fat_time_fat2unix(sbi, &inode->i_ctime, de->ctime,
- de->cdate, de->ctime_cs);
fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0);
+ fat_time_fat2unix(sbi, &MSDOS_I(inode)->i_crtime, de->ctime,
+ de->cdate, de->ctime_cs);
} else
- fat_truncate_time(inode, &inode->i_mtime, S_ATIME|S_CTIME);
+ inode->i_atime = fat_truncate_atime(sbi, &inode->i_mtime);
return 0;
}
@@ -757,6 +758,8 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
ei->i_logstart = 0;
ei->i_attrs = 0;
ei->i_pos = 0;
+ ei->i_crtime.tv_sec = 0;
+ ei->i_crtime.tv_nsec = 0;
return &ei->vfs_inode;
}
@@ -888,10 +891,10 @@ retry:
&raw_entry->date, NULL);
if (sbi->options.isvfat) {
__le16 atime;
- fat_time_unix2fat(sbi, &inode->i_ctime, &raw_entry->ctime,
- &raw_entry->cdate, &raw_entry->ctime_cs);
fat_time_unix2fat(sbi, &inode->i_atime, &atime,
&raw_entry->adate, NULL);
+ fat_time_unix2fat(sbi, &MSDOS_I(inode)->i_crtime, &raw_entry->ctime,
+ &raw_entry->cdate, &raw_entry->ctime_cs);
}
spin_unlock(&sbi->inode_hash_lock);
mark_buffer_dirty(bh);
@@ -1885,10 +1888,8 @@ out_invalid:
fat_msg(sb, KERN_INFO, "Can't find a valid FAT filesystem");
out_fail:
- if (fsinfo_inode)
- iput(fsinfo_inode);
- if (fat_inode)
- iput(fat_inode);
+ iput(fsinfo_inode);
+ iput(fat_inode);
unload_nls(sbi->nls_io);
unload_nls(sbi->nls_disk);
fat_reset_iocharset(&sbi->options);
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 91ca3c304211..7e5d6ae305f2 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -42,10 +42,16 @@ void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
EXPORT_SYMBOL_GPL(__fat_fs_error);
/**
- * fat_msg() - print preformated FAT specific messages. Every thing what is
- * not fat_fs_error() should be fat_msg().
+ * _fat_msg() - Print a preformatted FAT message based on a superblock.
+ * @sb: A pointer to a &struct super_block
+ * @level: A Kernel printk level constant
+ * @fmt: The printf-style format string to print.
+ *
+ * Everything that is not fat_fs_error() should be fat_msg().
+ *
+ * fat_msg() wraps _fat_msg() for printk indexing.
*/
-void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...)
+void _fat_msg(struct super_block *sb, const char *level, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
@@ -53,7 +59,7 @@ void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...)
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
- printk("%sFAT-fs (%s): %pV\n", level, sb->s_id, &vaf);
+ _printk(FAT_PRINTK_PREFIX "%pV\n", level, sb->s_id, &vaf);
va_end(args);
}
@@ -187,7 +193,7 @@ static long days_in_year[] = {
0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0,
};
-static inline int fat_tz_offset(struct msdos_sb_info *sbi)
+static inline int fat_tz_offset(const struct msdos_sb_info *sbi)
{
return (sbi->options.tz_set ?
-sbi->options.time_offset :
@@ -275,23 +281,35 @@ static inline struct timespec64 fat_timespec64_trunc_2secs(struct timespec64 ts)
return (struct timespec64){ ts.tv_sec & ~1ULL, 0 };
}
-static inline struct timespec64 fat_timespec64_trunc_10ms(struct timespec64 ts)
+/*
+ * truncate atime to 24 hour granularity (00:00:00 in local timezone)
+ */
+struct timespec64 fat_truncate_atime(const struct msdos_sb_info *sbi,
+ const struct timespec64 *ts)
+{
+ /* to localtime */
+ time64_t seconds = ts->tv_sec - fat_tz_offset(sbi);
+ s32 remainder;
+
+ div_s64_rem(seconds, SECS_PER_DAY, &remainder);
+ /* to day boundary, and back to unix time */
+ seconds = seconds + fat_tz_offset(sbi) - remainder;
+
+ return (struct timespec64){ seconds, 0 };
+}
+
+/*
+ * truncate mtime to 2 second granularity
+ */
+struct timespec64 fat_truncate_mtime(const struct msdos_sb_info *sbi,
+ const struct timespec64 *ts)
{
- if (ts.tv_nsec)
- ts.tv_nsec -= ts.tv_nsec % 10000000UL;
- return ts;
+ return fat_timespec64_trunc_2secs(*ts);
}
/*
* truncate the various times with appropriate granularity:
- * root inode:
- * all times always 0
- * all other inodes:
- * mtime - 2 seconds
- * ctime
- * msdos - 2 seconds
- * vfat - 10 milliseconds
- * atime - 24 hours (00:00:00 in local timezone)
+ * all times in root node are always 0
*/
int fat_truncate_time(struct inode *inode, struct timespec64 *now, int flags)
{
@@ -306,25 +324,15 @@ int fat_truncate_time(struct inode *inode, struct timespec64 *now, int flags)
ts = current_time(inode);
}
- if (flags & S_ATIME) {
- /* to localtime */
- time64_t seconds = now->tv_sec - fat_tz_offset(sbi);
- s32 remainder;
-
- div_s64_rem(seconds, SECS_PER_DAY, &remainder);
- /* to day boundary, and back to unix time */
- seconds = seconds + fat_tz_offset(sbi) - remainder;
-
- inode->i_atime = (struct timespec64){ seconds, 0 };
- }
- if (flags & S_CTIME) {
- if (sbi->options.isvfat)
- inode->i_ctime = fat_timespec64_trunc_10ms(*now);
- else
- inode->i_ctime = fat_timespec64_trunc_2secs(*now);
- }
+ if (flags & S_ATIME)
+ inode->i_atime = fat_truncate_atime(sbi, now);
+ /*
+ * ctime and mtime share the same on-disk field, and should be
+ * identical in memory. all mtime updates will be applied to ctime,
+ * but ctime updates are ignored.
+ */
if (flags & S_MTIME)
- inode->i_mtime = fat_timespec64_trunc_2secs(*now);
+ inode->i_mtime = inode->i_ctime = fat_truncate_mtime(sbi, now);
return 0;
}
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 5369d82e0bfb..c573314806cf 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -780,8 +780,6 @@ static int vfat_create(struct user_namespace *mnt_userns, struct inode *dir,
goto out;
}
inode_inc_iversion(inode);
- fat_truncate_time(inode, &ts, S_ATIME|S_CTIME|S_MTIME);
- /* timestamp is already written, so mark_inode_dirty() is unneeded. */
d_instantiate(dentry, inode);
out:
@@ -878,8 +876,6 @@ static int vfat_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
}
inode_inc_iversion(inode);
set_nlink(inode, 2);
- fat_truncate_time(inode, &ts, S_ATIME|S_CTIME|S_MTIME);
- /* timestamp is already written, so mark_inode_dirty() is unneeded. */
d_instantiate(dentry, inode);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f15d885b9796..34a3faa4886d 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -56,11 +56,10 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
arg |= O_NONBLOCK;
/* Pipe packetized mode is controlled by O_DIRECT flag */
- if (!S_ISFIFO(inode->i_mode) && (arg & O_DIRECT)) {
- if (!filp->f_mapping || !filp->f_mapping->a_ops ||
- !filp->f_mapping->a_ops->direct_IO)
- return -EINVAL;
- }
+ if (!S_ISFIFO(inode->i_mode) &&
+ (arg & O_DIRECT) &&
+ !(filp->f_mode & FMODE_CAN_ODIRECT))
+ return -EINVAL;
if (filp->f_op->check_flags)
error = filp->f_op->check_flags(arg);
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index d7d3a7f06862..10eb50cbf398 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -1241,8 +1241,8 @@ static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd)
INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker);
id = dax_read_lock();
- nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), NULL,
- NULL);
+ nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size),
+ DAX_ACCESS, NULL, NULL);
dax_read_unlock(id);
if (nr_pages < 0) {
pr_debug("dax_direct_access() returned %ld\n", nr_pages);
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 86b7dbb6a0d4..8db53fa67359 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -752,7 +752,8 @@ static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
* offset.
*/
static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
- long nr_pages, void **kaddr, pfn_t *pfn)
+ long nr_pages, enum dax_access_mode mode,
+ void **kaddr, pfn_t *pfn)
{
struct virtio_fs *fs = dax_get_private(dax_dev);
phys_addr_t offset = PFN_PHYS(pgoff);
@@ -772,7 +773,8 @@ static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
long rc;
void *kaddr;
- rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL);
+ rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr,
+ NULL);
if (rc < 0)
return rc;
memset(kaddr, 0, nr_pages << PAGE_SHIFT);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 2de9ca5d260d..62408047e8d7 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -195,7 +195,6 @@ out:
* Called under mmap_write_lock(mm).
*/
-#ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
static unsigned long
hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
@@ -206,7 +205,7 @@ hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
info.flags = 0;
info.length = len;
info.low_limit = current->mm->mmap_base;
- info.high_limit = arch_get_mmap_end(addr);
+ info.high_limit = arch_get_mmap_end(addr, len, flags);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
return vm_unmapped_area(&info);
@@ -237,21 +236,22 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr,
VM_BUG_ON(addr != -ENOMEM);
info.flags = 0;
info.low_limit = current->mm->mmap_base;
- info.high_limit = arch_get_mmap_end(addr);
+ info.high_limit = arch_get_mmap_end(addr, len, flags);
addr = vm_unmapped_area(&info);
}
return addr;
}
-static unsigned long
-hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
+unsigned long
+generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct hstate *h = hstate_file(file);
- const unsigned long mmap_end = arch_get_mmap_end(addr);
+ const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
if (len & ~huge_page_mask(h))
return -EINVAL;
@@ -283,6 +283,15 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
return hugetlb_get_unmapped_area_bottomup(file, addr, len,
pgoff, flags);
}
+
+#ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+static unsigned long
+hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ return generic_hugetlb_get_unmapped_area(file, addr, len, pgoff, flags);
+}
#endif
static size_t
@@ -405,7 +414,8 @@ static void remove_huge_page(struct page *page)
}
static void
-hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
+hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
+ zap_flags_t zap_flags)
{
struct vm_area_struct *vma;
@@ -439,7 +449,7 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
}
unmap_hugepage_range(vma, vma->vm_start + v_offset, v_end,
- NULL);
+ NULL, zap_flags);
}
}
@@ -517,7 +527,8 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
mutex_lock(&hugetlb_fault_mutex_table[hash]);
hugetlb_vmdelete_list(&mapping->i_mmap,
index * pages_per_huge_page(h),
- (index + 1) * pages_per_huge_page(h));
+ (index + 1) * pages_per_huge_page(h),
+ ZAP_FLAG_DROP_MARKER);
i_mmap_unlock_write(mapping);
}
@@ -583,7 +594,8 @@ static void hugetlb_vmtruncate(struct inode *inode, loff_t offset)
i_mmap_lock_write(mapping);
i_size_write(inode, offset);
if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
- hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
+ hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0,
+ ZAP_FLAG_DROP_MARKER);
i_mmap_unlock_write(mapping);
remove_inode_hugepages(inode, offset, LLONG_MAX);
}
@@ -616,8 +628,8 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
i_mmap_lock_write(mapping);
if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
hugetlb_vmdelete_list(&mapping->i_mmap,
- hole_start >> PAGE_SHIFT,
- hole_end >> PAGE_SHIFT);
+ hole_start >> PAGE_SHIFT,
+ hole_end >> PAGE_SHIFT, 0);
i_mmap_unlock_write(mapping);
remove_inode_hugepages(inode, hole_start, hole_end);
inode_unlock(inode);
@@ -1048,12 +1060,12 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
if (sbinfo->spool) {
long free_pages;
- spin_lock(&sbinfo->spool->lock);
+ spin_lock_irq(&sbinfo->spool->lock);
buf->f_blocks = sbinfo->spool->max_hpages;
free_pages = sbinfo->spool->max_hpages
- sbinfo->spool->used_hpages;
buf->f_bavail = buf->f_bfree = free_pages;
- spin_unlock(&sbinfo->spool->lock);
+ spin_unlock_irq(&sbinfo->spool->lock);
buf->f_files = sbinfo->max_inodes;
buf->f_ffree = sbinfo->free_inodes;
}
diff --git a/fs/jfs/Makefile b/fs/jfs/Makefile
index 285ec189ed5c..7156d2c218c7 100644
--- a/fs/jfs/Makefile
+++ b/fs/jfs/Makefile
@@ -13,5 +13,3 @@ jfs-y := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \
resize.o xattr.o ioctl.o
jfs-$(CONFIG_JFS_POSIX_ACL) += acl.o
-
-ccflags-y := -D_JFS_4K
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index a5dd7e53754a..259326556ada 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -224,18 +224,9 @@ int jfs_get_block(struct inode *ip, sector_t lblock,
* this as a hole
*/
goto unlock;
-#ifdef _JFS_4K
XADoffset(&xad, lblock64);
XADlength(&xad, xlen);
XADaddress(&xad, xaddr);
-#else /* _JFS_4K */
- /*
- * As long as block size = 4K, this isn't a problem.
- * We should mark the whole page not ABNR, but how
- * will we know to mark the other blocks BH_New?
- */
- BUG();
-#endif /* _JFS_4K */
rc = extRecord(ip, &xad);
if (rc)
goto unlock;
@@ -252,7 +243,6 @@ int jfs_get_block(struct inode *ip, sector_t lblock,
/*
* Allocate a new block
*/
-#ifdef _JFS_4K
if ((rc = extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad)))
goto unlock;
rc = extAlloc(ip, xlen, lblock64, &xad, false);
@@ -263,14 +253,6 @@ int jfs_get_block(struct inode *ip, sector_t lblock,
map_bh(bh_result, ip->i_sb, addressXAD(&xad));
bh_result->b_size = lengthXAD(&xad) << ip->i_blkbits;
-#else /* _JFS_4K */
- /*
- * We need to do whatever it takes to keep all but the last buffers
- * in 4K pages - see jfs_write.c
- */
- BUG();
-#endif /* _JFS_4K */
-
unlock:
/*
* Release lock on inode
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index d8502f4989d9..6b838d3ae7c2 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -385,7 +385,8 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
}
/* write the last buffer. */
- write_metapage(mp);
+ if (mp)
+ write_metapage(mp);
IREAD_UNLOCK(ipbmap);
@@ -868,74 +869,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
return (rc);
}
-#ifdef _NOTYET
-/*
- * NAME: dbAllocExact()
- *
- * FUNCTION: try to allocate the requested extent;
- *
- * PARAMETERS:
- * ip - pointer to in-core inode;
- * blkno - extent address;
- * nblocks - extent length;
- *
- * RETURN VALUES:
- * 0 - success
- * -ENOSPC - insufficient disk resources
- * -EIO - i/o error
- */
-int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
-{
- int rc;
- struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
- struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
- struct dmap *dp;
- s64 lblkno;
- struct metapage *mp;
-
- IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
-
- /*
- * validate extent request:
- *
- * note: defragfs policy:
- * max 64 blocks will be moved.
- * allocation request size must be satisfied from a single dmap.
- */
- if (nblocks <= 0 || nblocks > BPERDMAP || blkno >= bmp->db_mapsize) {
- IREAD_UNLOCK(ipbmap);
- return -EINVAL;
- }
-
- if (nblocks > ((s64) 1 << bmp->db_maxfreebud)) {
- /* the free space is no longer available */
- IREAD_UNLOCK(ipbmap);
- return -ENOSPC;
- }
-
- /* read in the dmap covering the extent */
- lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
- mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
- if (mp == NULL) {
- IREAD_UNLOCK(ipbmap);
- return -EIO;
- }
- dp = (struct dmap *) mp->data;
-
- /* try to allocate the requested extent */
- rc = dbAllocNext(bmp, dp, blkno, nblocks);
-
- IREAD_UNLOCK(ipbmap);
-
- if (rc == 0)
- mark_metapage_dirty(mp);
-
- release_metapage(mp);
-
- return (rc);
-}
-#endif /* _NOTYET */
-
/*
* NAME: dbReAlloc()
*
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 837d42f61464..92b7c533407c 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -2423,304 +2423,6 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
return 0;
}
-#ifdef _NOTYET
-/*
- * NAME: dtRelocate()
- *
- * FUNCTION: relocate dtpage (internal or leaf) of directory;
- * This function is mainly used by defragfs utility.
- */
-int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
- s64 nxaddr)
-{
- int rc = 0;
- struct metapage *mp, *pmp, *lmp, *rmp;
- dtpage_t *p, *pp, *rp = 0, *lp= 0;
- s64 bn;
- int index;
- struct btstack btstack;
- pxd_t *pxd;
- s64 oxaddr, nextbn, prevbn;
- int xlen, xsize;
- struct tlock *tlck;
- struct dt_lock *dtlck;
- struct pxd_lock *pxdlock;
- s8 *stbl;
- struct lv *lv;
-
- oxaddr = addressPXD(opxd);
- xlen = lengthPXD(opxd);
-
- jfs_info("dtRelocate: lmxaddr:%Ld xaddr:%Ld:%Ld xlen:%d",
- (long long)lmxaddr, (long long)oxaddr, (long long)nxaddr,
- xlen);
-
- /*
- * 1. get the internal parent dtpage covering
- * router entry for the tartget page to be relocated;
- */
- rc = dtSearchNode(ip, lmxaddr, opxd, &btstack);
- if (rc)
- return rc;
-
- /* retrieve search result */
- DT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
- jfs_info("dtRelocate: parent router entry validated.");
-
- /*
- * 2. relocate the target dtpage
- */
- /* read in the target page from src extent */
- DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
- if (rc) {
- /* release the pinned parent page */
- DT_PUTPAGE(pmp);
- return rc;
- }
-
- /*
- * read in sibling pages if any to update sibling pointers;
- */
- rmp = NULL;
- if (p->header.next) {
- nextbn = le64_to_cpu(p->header.next);
- DT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
- if (rc) {
- DT_PUTPAGE(mp);
- DT_PUTPAGE(pmp);
- return (rc);
- }
- }
-
- lmp = NULL;
- if (p->header.prev) {
- prevbn = le64_to_cpu(p->header.prev);
- DT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
- if (rc) {
- DT_PUTPAGE(mp);
- DT_PUTPAGE(pmp);
- if (rmp)
- DT_PUTPAGE(rmp);
- return (rc);
- }
- }
-
- /* at this point, all xtpages to be updated are in memory */
-
- /*
- * update sibling pointers of sibling dtpages if any;
- */
- if (lmp) {
- tlck = txLock(tid, ip, lmp, tlckDTREE | tlckRELINK);
- dtlck = (struct dt_lock *) & tlck->lock;
- /* linelock header */
- ASSERT(dtlck->index == 0);
- lv = & dtlck->lv[0];
- lv->offset = 0;
- lv->length = 1;
- dtlck->index++;
-
- lp->header.next = cpu_to_le64(nxaddr);
- DT_PUTPAGE(lmp);
- }
-
- if (rmp) {
- tlck = txLock(tid, ip, rmp, tlckDTREE | tlckRELINK);
- dtlck = (struct dt_lock *) & tlck->lock;
- /* linelock header */
- ASSERT(dtlck->index == 0);
- lv = & dtlck->lv[0];
- lv->offset = 0;
- lv->length = 1;
- dtlck->index++;
-
- rp->header.prev = cpu_to_le64(nxaddr);
- DT_PUTPAGE(rmp);
- }
-
- /*
- * update the target dtpage to be relocated
- *
- * write LOG_REDOPAGE of LOG_NEW type for dst page
- * for the whole target page (logredo() will apply
- * after image and update bmap for allocation of the
- * dst extent), and update bmap for allocation of
- * the dst extent;
- */
- tlck = txLock(tid, ip, mp, tlckDTREE | tlckNEW);
- dtlck = (struct dt_lock *) & tlck->lock;
- /* linelock header */
- ASSERT(dtlck->index == 0);
- lv = & dtlck->lv[0];
-
- /* update the self address in the dtpage header */
- pxd = &p->header.self;
- PXDaddress(pxd, nxaddr);
-
- /* the dst page is the same as the src page, i.e.,
- * linelock for afterimage of the whole page;
- */
- lv->offset = 0;
- lv->length = p->header.maxslot;
- dtlck->index++;
-
- /* update the buffer extent descriptor of the dtpage */
- xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
-
- /* unpin the relocated page */
- DT_PUTPAGE(mp);
- jfs_info("dtRelocate: target dtpage relocated.");
-
- /* the moved extent is dtpage, then a LOG_NOREDOPAGE log rec
- * needs to be written (in logredo(), the LOG_NOREDOPAGE log rec
- * will also force a bmap update ).
- */
-
- /*
- * 3. acquire maplock for the source extent to be freed;
- */
- /* for dtpage relocation, write a LOG_NOREDOPAGE record
- * for the source dtpage (logredo() will init NoRedoPage
- * filter and will also update bmap for free of the source
- * dtpage), and upadte bmap for free of the source dtpage;
- */
- tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
- pxdlock = (struct pxd_lock *) & tlck->lock;
- pxdlock->flag = mlckFREEPXD;
- PXDaddress(&pxdlock->pxd, oxaddr);
- PXDlength(&pxdlock->pxd, xlen);
- pxdlock->index = 1;
-
- /*
- * 4. update the parent router entry for relocation;
- *
- * acquire tlck for the parent entry covering the target dtpage;
- * write LOG_REDOPAGE to apply after image only;
- */
- jfs_info("dtRelocate: update parent router entry.");
- tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
- dtlck = (struct dt_lock *) & tlck->lock;
- lv = & dtlck->lv[dtlck->index];
-
- /* update the PXD with the new address */
- stbl = DT_GETSTBL(pp);
- pxd = (pxd_t *) & pp->slot[stbl[index]];
- PXDaddress(pxd, nxaddr);
- lv->offset = stbl[index];
- lv->length = 1;
- dtlck->index++;
-
- /* unpin the parent dtpage */
- DT_PUTPAGE(pmp);
-
- return rc;
-}
-
-/*
- * NAME: dtSearchNode()
- *
- * FUNCTION: Search for an dtpage containing a specified address
- * This function is mainly used by defragfs utility.
- *
- * NOTE: Search result on stack, the found page is pinned at exit.
- * The result page must be an internal dtpage.
- * lmxaddr give the address of the left most page of the
- * dtree level, in which the required dtpage resides.
- */
-static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
- struct btstack * btstack)
-{
- int rc = 0;
- s64 bn;
- struct metapage *mp;
- dtpage_t *p;
- int psize = 288; /* initial in-line directory */
- s8 *stbl;
- int i;
- pxd_t *pxd;
- struct btframe *btsp;
-
- BT_CLR(btstack); /* reset stack */
-
- /*
- * descend tree to the level with specified leftmost page
- *
- * by convention, root bn = 0.
- */
- for (bn = 0;;) {
- /* get/pin the page to search */
- DT_GETPAGE(ip, bn, mp, psize, p, rc);
- if (rc)
- return rc;
-
- /* does the xaddr of leftmost page of the levevl
- * matches levevl search key ?
- */
- if (p->header.flag & BT_ROOT) {
- if (lmxaddr == 0)
- break;
- } else if (addressPXD(&p->header.self) == lmxaddr)
- break;
-
- /*
- * descend down to leftmost child page
- */
- if (p->header.flag & BT_LEAF) {
- DT_PUTPAGE(mp);
- return -ESTALE;
- }
-
- /* get the leftmost entry */
- stbl = DT_GETSTBL(p);
- pxd = (pxd_t *) & p->slot[stbl[0]];
-
- /* get the child page block address */
- bn = addressPXD(pxd);
- psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
- /* unpin the parent page */
- DT_PUTPAGE(mp);
- }
-
- /*
- * search each page at the current levevl
- */
- loop:
- stbl = DT_GETSTBL(p);
- for (i = 0; i < p->header.nextindex; i++) {
- pxd = (pxd_t *) & p->slot[stbl[i]];
-
- /* found the specified router entry */
- if (addressPXD(pxd) == addressPXD(kpxd) &&
- lengthPXD(pxd) == lengthPXD(kpxd)) {
- btsp = btstack->top;
- btsp->bn = bn;
- btsp->index = i;
- btsp->mp = mp;
-
- return 0;
- }
- }
-
- /* get the right sibling page if any */
- if (p->header.next)
- bn = le64_to_cpu(p->header.next);
- else {
- DT_PUTPAGE(mp);
- return -ESTALE;
- }
-
- /* unpin current page */
- DT_PUTPAGE(mp);
-
- /* get the right sibling page */
- DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
- if (rc)
- return rc;
-
- goto loop;
-}
-#endif /* _NOTYET */
-
/*
* dtRelink()
*
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index bb4a342a193d..ae99a7e232ee 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -16,9 +16,6 @@
* forward references
*/
static int extBalloc(struct inode *, s64, s64 *, s64 *);
-#ifdef _NOTYET
-static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *);
-#endif
static s64 extRoundDown(s64 nb);
#define DPD(a) (printk("(a): %d\n",(a)))
@@ -177,162 +174,6 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
return (0);
}
-
-#ifdef _NOTYET
-/*
- * NAME: extRealloc()
- *
- * FUNCTION: extend the allocation of a file extent containing a
- * partial back last page.
- *
- * PARAMETERS:
- * ip - the inode of the file.
- * cp - cbuf for the partial backed last page.
- * xlen - request size of the resulting extent.
- * xp - pointer to an xad. on successful exit, the xad
- * describes the newly allocated extent.
- * abnr - bool indicating whether the newly allocated extent
- * should be marked as allocated but not recorded.
- *
- * RETURN VALUES:
- * 0 - success
- * -EIO - i/o error.
- * -ENOSPC - insufficient disk resources.
- */
-int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
-{
- struct super_block *sb = ip->i_sb;
- s64 xaddr, xlen, nxaddr, delta, xoff;
- s64 ntail, nextend, ninsert;
- int rc, nbperpage = JFS_SBI(sb)->nbperpage;
- int xflag;
-
- /* This blocks if we are low on resources */
- txBeginAnon(ip->i_sb);
-
- mutex_lock(&JFS_IP(ip)->commit_mutex);
- /* validate extent length */
- if (nxlen > MAXXLEN)
- nxlen = MAXXLEN;
-
- /* get the extend (partial) page's disk block address and
- * number of blocks.
- */
- xaddr = addressXAD(xp);
- xlen = lengthXAD(xp);
- xoff = offsetXAD(xp);
-
- /* if the extend page is abnr and if the request is for
- * the extent to be allocated and recorded,
- * make the page allocated and recorded.
- */
- if ((xp->flag & XAD_NOTRECORDED) && !abnr) {
- xp->flag = 0;
- if ((rc = xtUpdate(0, ip, xp)))
- goto exit;
- }
-
- /* try to allocated the request number of blocks for the
- * extent. dbRealloc() first tries to satisfy the request
- * by extending the allocation in place. otherwise, it will
- * try to allocate a new set of blocks large enough for the
- * request. in satisfying a request, dbReAlloc() may allocate
- * less than what was request but will always allocate enough
- * space as to satisfy the extend page.
- */
- if ((rc = extBrealloc(ip, xaddr, xlen, &nxlen, &nxaddr)))
- goto exit;
-
- /* Allocat blocks to quota. */
- rc = dquot_alloc_block(ip, nxlen);
- if (rc) {
- dbFree(ip, nxaddr, (s64) nxlen);
- mutex_unlock(&JFS_IP(ip)->commit_mutex);
- return rc;
- }
-
- delta = nxlen - xlen;
-
- /* check if the extend page is not abnr but the request is abnr
- * and the allocated disk space is for more than one page. if this
- * is the case, there is a miss match of abnr between the extend page
- * and the one or more pages following the extend page. as a result,
- * two extents will have to be manipulated. the first will be that
- * of the extent of the extend page and will be manipulated thru
- * an xtExtend() or an xtTailgate(), depending upon whether the
- * disk allocation occurred as an inplace extension. the second
- * extent will be manipulated (created) through an xtInsert() and
- * will be for the pages following the extend page.
- */
- if (abnr && (!(xp->flag & XAD_NOTRECORDED)) && (nxlen > nbperpage)) {
- ntail = nbperpage;
- nextend = ntail - xlen;
- ninsert = nxlen - nbperpage;
-
- xflag = XAD_NOTRECORDED;
- } else {
- ntail = nxlen;
- nextend = delta;
- ninsert = 0;
-
- xflag = xp->flag;
- }
-
- /* if we were able to extend the disk allocation in place,
- * extend the extent. otherwise, move the extent to a
- * new disk location.
- */
- if (xaddr == nxaddr) {
- /* extend the extent */
- if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) {
- dbFree(ip, xaddr + xlen, delta);
- dquot_free_block(ip, nxlen);
- goto exit;
- }
- } else {
- /*
- * move the extent to a new location:
- *
- * xtTailgate() accounts for relocated tail extent;
- */
- if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) {
- dbFree(ip, nxaddr, nxlen);
- dquot_free_block(ip, nxlen);
- goto exit;
- }
- }
-
-
- /* check if we need to also insert a new extent */
- if (ninsert) {
- /* perform the insert. if it fails, free the blocks
- * to be inserted and make it appear that we only did
- * the xtExtend() or xtTailgate() above.
- */
- xaddr = nxaddr + ntail;
- if (xtInsert (0, ip, xflag, xoff + ntail, (int) ninsert,
- &xaddr, 0)) {
- dbFree(ip, xaddr, (s64) ninsert);
- delta = nextend;
- nxlen = ntail;
- xflag = 0;
- }
- }
-
- /* set the return results */
- XADaddress(xp, nxaddr);
- XADlength(xp, nxlen);
- XADoffset(xp, xoff);
- xp->flag = xflag;
-
- mark_inode_dirty(ip);
-exit:
- mutex_unlock(&JFS_IP(ip)->commit_mutex);
- return (rc);
-}
-#endif /* _NOTYET */
-
-
/*
* NAME: extHint()
*
@@ -423,44 +264,6 @@ int extRecord(struct inode *ip, xad_t * xp)
return rc;
}
-
-#ifdef _NOTYET
-/*
- * NAME: extFill()
- *
- * FUNCTION: allocate disk space for a file page that represents
- * a file hole.
- *
- * PARAMETERS:
- * ip - the inode of the file.
- * cp - cbuf of the file page represent the hole.
- *
- * RETURN VALUES:
- * 0 - success
- * -EIO - i/o error.
- * -ENOSPC - insufficient disk resources.
- */
-int extFill(struct inode *ip, xad_t * xp)
-{
- int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage;
- s64 blkno = offsetXAD(xp) >> ip->i_blkbits;
-
-// assert(ISSPARSE(ip));
-
- /* initialize the extent allocation hint */
- XADaddress(xp, 0);
-
- /* allocate an extent to fill the hole */
- if ((rc = extAlloc(ip, nbperpage, blkno, xp, false)))
- return (rc);
-
- assert(lengthPXD(xp) == nbperpage);
-
- return (0);
-}
-#endif /* _NOTYET */
-
-
/*
* NAME: extBalloc()
*
@@ -550,64 +353,6 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
return (0);
}
-
-#ifdef _NOTYET
-/*
- * NAME: extBrealloc()
- *
- * FUNCTION: attempt to extend an extent's allocation.
- *
- * Initially, we will try to extend the extent's allocation
- * in place. If this fails, we'll try to move the extent
- * to a new set of blocks. If moving the extent, we initially
- * will try to allocate disk blocks for the requested size
- * (newnblks). if this fails (new contiguous free blocks not
- * available), we'll try to allocate a smaller number of
- * blocks (producing a smaller extent), with this smaller
- * number of blocks consisting of the requested number of
- * blocks rounded down to the next smaller power of 2
- * number (i.e. 16 -> 8). We'll continue to round down and
- * retry the allocation until the number of blocks to allocate
- * is smaller than the number of blocks per page.
- *
- * PARAMETERS:
- * ip - the inode of the file.
- * blkno - starting block number of the extents current allocation.
- * nblks - number of blocks within the extents current allocation.
- * newnblks - pointer to a s64 value. on entry, this value is the
- * new desired extent size (number of blocks). on
- * successful exit, this value is set to the extent's actual
- * new size (new number of blocks).
- * newblkno - the starting block number of the extents new allocation.
- *
- * RETURN VALUES:
- * 0 - success
- * -EIO - i/o error.
- * -ENOSPC - insufficient disk resources.
- */
-static int
-extBrealloc(struct inode *ip,
- s64 blkno, s64 nblks, s64 * newnblks, s64 * newblkno)
-{
- int rc;
-
- /* try to extend in place */
- if ((rc = dbExtend(ip, blkno, nblks, *newnblks - nblks)) == 0) {
- *newblkno = blkno;
- return (0);
- } else {
- if (rc != -ENOSPC)
- return (rc);
- }
-
- /* in place extension not possible.
- * try to move the extent to a new set of blocks.
- */
- return (extBalloc(ip, blkno, newnblks, newblkno));
-}
-#endif /* _NOTYET */
-
-
/*
* NAME: extRoundDown()
*
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 997c81fcea34..695415cbfe98 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -388,14 +388,6 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
linelock = (struct linelock *) & tlck->lock;
}
-#ifdef _JFS_WIP
- else if (tlck->flag & tlckINLINELOCK) {
-
- inlinelock = (struct inlinelock *) & tlck;
- p = (caddr_t) & inlinelock->pxd;
- linelock = (struct linelock *) & tlck;
- }
-#endif /* _JFS_WIP */
else {
jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
return 0; /* Probably should trap */
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index aa4ff7bcaff2..48d1f70f786c 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -307,13 +307,11 @@ static int chkSuper(struct super_block *sb)
}
bsize = le32_to_cpu(j_sb->s_bsize);
-#ifdef _JFS_4K
if (bsize != PSIZE) {
- jfs_err("Currently only 4K block size supported!");
+ jfs_err("Only 4K block size supported!");
rc = -EINVAL;
goto out;
}
-#endif /* _JFS_4K */
jfs_info("superblock: flag:0x%08x state:0x%08x size:0x%Lx",
le32_to_cpu(j_sb->s_flag), le32_to_cpu(j_sb->s_state),
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 042bbe6d8ac2..ffd4feece078 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1490,40 +1490,6 @@ static void diLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd,
tlck->flag |= tlckWRITEPAGE;
} else
jfs_err("diLog: UFO type tlck:0x%p", tlck);
-#ifdef _JFS_WIP
- /*
- * alloc/free external EA extent
- *
- * a maplock for txUpdateMap() to update bPWMAP for alloc/free
- * of the extent has been formatted at txLock() time;
- */
- else {
- assert(tlck->type & tlckEA);
-
- /* log LOG_UPDATEMAP for logredo() to update bmap for
- * alloc of new (and free of old) external EA extent;
- */
- lrd->type = cpu_to_le16(LOG_UPDATEMAP);
- pxdlock = (struct pxd_lock *) & tlck->lock;
- nlock = pxdlock->index;
- for (i = 0; i < nlock; i++, pxdlock++) {
- if (pxdlock->flag & mlckALLOCPXD)
- lrd->log.updatemap.type =
- cpu_to_le16(LOG_ALLOCPXD);
- else
- lrd->log.updatemap.type =
- cpu_to_le16(LOG_FREEPXD);
- lrd->log.updatemap.nxd = cpu_to_le16(1);
- lrd->log.updatemap.pxd = pxdlock->pxd;
- lrd->backchain =
- cpu_to_le32(lmLog(log, tblk, lrd, NULL));
- }
-
- /* update bmap */
- tlck->flag |= tlckUPDATEMAP;
- }
-#endif /* _JFS_WIP */
-
return;
}
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index 3148e9b35f3b..2d304cee884c 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -114,17 +114,6 @@ static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
static int xtSplitRoot(tid_t tid, struct inode *ip,
struct xtsplit * split, struct metapage ** rmpp);
-#ifdef _STILL_TO_PORT
-static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
- xtpage_t * fp, struct btstack * btstack);
-
-static int xtSearchNode(struct inode *ip,
- xad_t * xad,
- int *cmpp, struct btstack * btstack, int flag);
-
-static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
-#endif /* _STILL_TO_PORT */
-
/*
* xtLookup()
*
@@ -1493,189 +1482,6 @@ int xtExtend(tid_t tid, /* transaction id */
return rc;
}
-#ifdef _NOTYET
-/*
- * xtTailgate()
- *
- * function: split existing 'tail' extent
- * (split offset >= start offset of tail extent), and
- * relocate and extend the split tail half;
- *
- * note: existing extent may or may not have been committed.
- * caller is responsible for pager buffer cache update, and
- * working block allocation map update;
- * update pmap: free old split tail extent, alloc new extent;
- */
-int xtTailgate(tid_t tid, /* transaction id */
- struct inode *ip, s64 xoff, /* split/new extent offset */
- s32 xlen, /* new extent length */
- s64 xaddr, /* new extent address */
- int flag)
-{
- int rc = 0;
- int cmp;
- struct metapage *mp; /* meta-page buffer */
- xtpage_t *p; /* base B+-tree index page */
- s64 bn;
- int index, nextindex, llen, rlen;
- struct btstack btstack; /* traverse stack */
- struct xtsplit split; /* split information */
- xad_t *xad;
- struct tlock *tlck;
- struct xtlock *xtlck = 0;
- struct tlock *mtlck;
- struct maplock *pxdlock;
-
-/*
-printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
- (ulong)xoff, xlen, (ulong)xaddr);
-*/
-
- /* there must exist extent to be tailgated */
- if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT)))
- return rc;
-
- /* retrieve search result */
- XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
-
- if (cmp != 0) {
- XT_PUTPAGE(mp);
- jfs_error(ip->i_sb, "couldn't find extent\n");
- return -EIO;
- }
-
- /* entry found must be last entry */
- nextindex = le16_to_cpu(p->header.nextindex);
- if (index != nextindex - 1) {
- XT_PUTPAGE(mp);
- jfs_error(ip->i_sb, "the entry found is not the last entry\n");
- return -EIO;
- }
-
- BT_MARK_DIRTY(mp, ip);
- /*
- * acquire tlock of the leaf page containing original entry
- */
- if (!test_cflag(COMMIT_Nolink, ip)) {
- tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
- xtlck = (struct xtlock *) & tlck->lock;
- }
-
- /* completely replace extent ? */
- xad = &p->xad[index];
-/*
-printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
- (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
-*/
- if ((llen = xoff - offsetXAD(xad)) == 0)
- goto updateOld;
-
- /*
- * partially replace extent: insert entry for new extent
- */
-//insertNew:
- /*
- * if the leaf page is full, insert the new entry and
- * propagate up the router entry for the new page from split
- *
- * The xtSplitUp() will insert the entry and unpin the leaf page.
- */
- if (nextindex == le16_to_cpu(p->header.maxentry)) {
- /* xtSpliUp() unpins leaf pages */
- split.mp = mp;
- split.index = index + 1;
- split.flag = XAD_NEW;
- split.off = xoff; /* split offset */
- split.len = xlen;
- split.addr = xaddr;
- split.pxdlist = NULL;
- if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
- return rc;
-
- /* get back old page */
- XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
- if (rc)
- return rc;
- /*
- * if leaf root has been split, original root has been
- * copied to new child page, i.e., original entry now
- * resides on the new child page;
- */
- if (p->header.flag & BT_INTERNAL) {
- ASSERT(p->header.nextindex ==
- cpu_to_le16(XTENTRYSTART + 1));
- xad = &p->xad[XTENTRYSTART];
- bn = addressXAD(xad);
- XT_PUTPAGE(mp);
-
- /* get new child page */
- XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
- if (rc)
- return rc;
-
- BT_MARK_DIRTY(mp, ip);
- if (!test_cflag(COMMIT_Nolink, ip)) {
- tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
- xtlck = (struct xtlock *) & tlck->lock;
- }
- }
- }
- /*
- * insert the new entry into the leaf page
- */
- else {
- /* insert the new entry: mark the entry NEW */
- xad = &p->xad[index + 1];
- XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
-
- /* advance next available entry index */
- le16_add_cpu(&p->header.nextindex, 1);
- }
-
- /* get back old XAD */
- xad = &p->xad[index];
-
- /*
- * truncate/relocate old extent at split offset
- */
- updateOld:
- /* update dmap for old/committed/truncated extent */
- rlen = lengthXAD(xad) - llen;
- if (!(xad->flag & XAD_NEW)) {
- /* free from PWMAP at commit */
- if (!test_cflag(COMMIT_Nolink, ip)) {
- mtlck = txMaplock(tid, ip, tlckMAP);
- pxdlock = (struct maplock *) & mtlck->lock;
- pxdlock->flag = mlckFREEPXD;
- PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen);
- PXDlength(&pxdlock->pxd, rlen);
- pxdlock->index = 1;
- }
- } else
- /* free from WMAP */
- dbFree(ip, addressXAD(xad) + llen, (s64) rlen);
-
- if (llen)
- /* truncate */
- XADlength(xad, llen);
- else
- /* replace */
- XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
-
- if (!test_cflag(COMMIT_Nolink, ip)) {
- xtlck->lwm.offset = (xtlck->lwm.offset) ?
- min(index, (int)xtlck->lwm.offset) : index;
- xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
- xtlck->lwm.offset;
- }
-
- /* unpin the leaf page */
- XT_PUTPAGE(mp);
-
- return rc;
-}
-#endif /* _NOTYET */
-
/*
* xtUpdate()
*
@@ -1753,32 +1559,12 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
newindex = index + 1;
nextindex = le16_to_cpu(p->header.nextindex);
-#ifdef _JFS_WIP_NOCOALESCE
- if (xoff < nxoff)
- goto updateRight;
-
- /*
- * replace XAD with nXAD
- */
- replace: /* (nxoff == xoff) */
- if (nxlen == xlen) {
- /* replace XAD with nXAD:recorded */
- *xad = *nxad;
- xad->flag = xflag & ~XAD_NOTRECORDED;
-
- goto out;
- } else /* (nxlen < xlen) */
- goto updateLeft;
-#endif /* _JFS_WIP_NOCOALESCE */
-
-/* #ifdef _JFS_WIP_COALESCE */
if (xoff < nxoff)
goto coalesceRight;
/*
* coalesce with left XAD
*/
-//coalesceLeft: /* (xoff == nxoff) */
/* is XAD first entry of page ? */
if (index == XTENTRYSTART)
goto replace;
@@ -1897,7 +1683,6 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
jfs_error(ip->i_sb, "xoff >= nxoff\n");
return -EIO;
}
-/* #endif _JFS_WIP_COALESCE */
/*
* split XAD into (lXAD, nXAD):
@@ -2305,752 +2090,6 @@ int xtAppend(tid_t tid, /* transaction id */
return rc;
}
-#ifdef _STILL_TO_PORT
-
-/* - TBD for defragmentaion/reorganization -
- *
- * xtDelete()
- *
- * function:
- * delete the entry with the specified key.
- *
- * N.B.: whole extent of the entry is assumed to be deleted.
- *
- * parameter:
- *
- * return:
- * ENOENT: if the entry is not found.
- *
- * exception:
- */
-int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
-{
- int rc = 0;
- struct btstack btstack;
- int cmp;
- s64 bn;
- struct metapage *mp;
- xtpage_t *p;
- int index, nextindex;
- struct tlock *tlck;
- struct xtlock *xtlck;
-
- /*
- * find the matching entry; xtSearch() pins the page
- */
- if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
- return rc;
-
- XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
- if (cmp) {
- /* unpin the leaf page */
- XT_PUTPAGE(mp);
- return -ENOENT;
- }
-
- /*
- * delete the entry from the leaf page
- */
- nextindex = le16_to_cpu(p->header.nextindex);
- le16_add_cpu(&p->header.nextindex, -1);
-
- /*
- * if the leaf page bocome empty, free the page
- */
- if (p->header.nextindex == cpu_to_le16(XTENTRYSTART))
- return (xtDeleteUp(tid, ip, mp, p, &btstack));
-
- BT_MARK_DIRTY(mp, ip);
- /*
- * acquire a transaction lock on the leaf page;
- *
- * action:xad deletion;
- */
- tlck = txLock(tid, ip, mp, tlckXTREE);
- xtlck = (struct xtlock *) & tlck->lock;
- xtlck->lwm.offset =
- (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index;
-
- /* if delete from middle, shift left/compact the remaining entries */
- if (index < nextindex - 1)
- memmove(&p->xad[index], &p->xad[index + 1],
- (nextindex - index - 1) * sizeof(xad_t));
-
- XT_PUTPAGE(mp);
-
- return 0;
-}
-
-
-/* - TBD for defragmentaion/reorganization -
- *
- * xtDeleteUp()
- *
- * function:
- * free empty pages as propagating deletion up the tree
- *
- * parameter:
- *
- * return:
- */
-static int
-xtDeleteUp(tid_t tid, struct inode *ip,
- struct metapage * fmp, xtpage_t * fp, struct btstack * btstack)
-{
- int rc = 0;
- struct metapage *mp;
- xtpage_t *p;
- int index, nextindex;
- s64 xaddr;
- int xlen;
- struct btframe *parent;
- struct tlock *tlck;
- struct xtlock *xtlck;
-
- /*
- * keep root leaf page which has become empty
- */
- if (fp->header.flag & BT_ROOT) {
- /* keep the root page */
- fp->header.flag &= ~BT_INTERNAL;
- fp->header.flag |= BT_LEAF;
- fp->header.nextindex = cpu_to_le16(XTENTRYSTART);
-
- /* XT_PUTPAGE(fmp); */
-
- return 0;
- }
-
- /*
- * free non-root leaf page
- */
- if ((rc = xtRelink(tid, ip, fp))) {
- XT_PUTPAGE(fmp);
- return rc;
- }
-
- xaddr = addressPXD(&fp->header.self);
- xlen = lengthPXD(&fp->header.self);
- /* free the page extent */
- dbFree(ip, xaddr, (s64) xlen);
-
- /* free the buffer page */
- discard_metapage(fmp);
-
- /*
- * propagate page deletion up the index tree
- *
- * If the delete from the parent page makes it empty,
- * continue all the way up the tree.
- * stop if the root page is reached (which is never deleted) or
- * if the entry deletion does not empty the page.
- */
- while ((parent = BT_POP(btstack)) != NULL) {
- /* get/pin the parent page <sp> */
- XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
- if (rc)
- return rc;
-
- index = parent->index;
-
- /* delete the entry for the freed child page from parent.
- */
- nextindex = le16_to_cpu(p->header.nextindex);
-
- /*
- * the parent has the single entry being deleted:
- * free the parent page which has become empty.
- */
- if (nextindex == 1) {
- if (p->header.flag & BT_ROOT) {
- /* keep the root page */
- p->header.flag &= ~BT_INTERNAL;
- p->header.flag |= BT_LEAF;
- p->header.nextindex =
- cpu_to_le16(XTENTRYSTART);
-
- /* XT_PUTPAGE(mp); */
-
- break;
- } else {
- /* free the parent page */
- if ((rc = xtRelink(tid, ip, p)))
- return rc;
-
- xaddr = addressPXD(&p->header.self);
- /* free the page extent */
- dbFree(ip, xaddr,
- (s64) JFS_SBI(ip->i_sb)->nbperpage);
-
- /* unpin/free the buffer page */
- discard_metapage(mp);
-
- /* propagate up */
- continue;
- }
- }
- /*
- * the parent has other entries remaining:
- * delete the router entry from the parent page.
- */
- else {
- BT_MARK_DIRTY(mp, ip);
- /*
- * acquire a transaction lock on the leaf page;
- *
- * action:xad deletion;
- */
- tlck = txLock(tid, ip, mp, tlckXTREE);
- xtlck = (struct xtlock *) & tlck->lock;
- xtlck->lwm.offset =
- (xtlck->lwm.offset) ? min(index,
- xtlck->lwm.
- offset) : index;
-
- /* if delete from middle,
- * shift left/compact the remaining entries in the page
- */
- if (index < nextindex - 1)
- memmove(&p->xad[index], &p->xad[index + 1],
- (nextindex - index -
- 1) << L2XTSLOTSIZE);
-
- le16_add_cpu(&p->header.nextindex, -1);
- jfs_info("xtDeleteUp(entry): 0x%lx[%d]",
- (ulong) parent->bn, index);
- }
-
- /* unpin the parent page */
- XT_PUTPAGE(mp);
-
- /* exit propagation up */
- break;
- }
-
- return 0;
-}
-
-
-/*
- * NAME: xtRelocate()
- *
- * FUNCTION: relocate xtpage or data extent of regular file;
- * This function is mainly used by defragfs utility.
- *
- * NOTE: This routine does not have the logic to handle
- * uncommitted allocated extent. The caller should call
- * txCommit() to commit all the allocation before call
- * this routine.
- */
-int
-xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */
- s64 nxaddr, /* new xaddr */
- int xtype)
-{ /* extent type: XTPAGE or DATAEXT */
- int rc = 0;
- struct tblock *tblk;
- struct tlock *tlck;
- struct xtlock *xtlck;
- struct metapage *mp, *pmp, *lmp, *rmp; /* meta-page buffer */
- xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */
- xad_t *xad;
- pxd_t *pxd;
- s64 xoff, xsize;
- int xlen;
- s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn;
- cbuf_t *cp;
- s64 offset, nbytes, nbrd, pno;
- int nb, npages, nblks;
- s64 bn;
- int cmp;
- int index;
- struct pxd_lock *pxdlock;
- struct btstack btstack; /* traverse stack */
-
- xtype = xtype & EXTENT_TYPE;
-
- xoff = offsetXAD(oxad);
- oxaddr = addressXAD(oxad);
- xlen = lengthXAD(oxad);
-
- /* validate extent offset */
- offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
- if (offset >= ip->i_size)
- return -ESTALE; /* stale extent */
-
- jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx",
- xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr);
-
- /*
- * 1. get and validate the parent xtpage/xad entry
- * covering the source extent to be relocated;
- */
- if (xtype == DATAEXT) {
- /* search in leaf entry */
- rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
- if (rc)
- return rc;
-
- /* retrieve search result */
- XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
-
- if (cmp) {
- XT_PUTPAGE(pmp);
- return -ESTALE;
- }
-
- /* validate for exact match with a single entry */
- xad = &pp->xad[index];
- if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) {
- XT_PUTPAGE(pmp);
- return -ESTALE;
- }
- } else { /* (xtype == XTPAGE) */
-
- /* search in internal entry */
- rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0);
- if (rc)
- return rc;
-
- /* retrieve search result */
- XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
-
- if (cmp) {
- XT_PUTPAGE(pmp);
- return -ESTALE;
- }
-
- /* xtSearchNode() validated for exact match with a single entry
- */
- xad = &pp->xad[index];
- }
- jfs_info("xtRelocate: parent xad entry validated.");
-
- /*
- * 2. relocate the extent
- */
- if (xtype == DATAEXT) {
- /* if the extent is allocated-but-not-recorded
- * there is no real data to be moved in this extent,
- */
- if (xad->flag & XAD_NOTRECORDED)
- goto out;
- else
- /* release xtpage for cmRead()/xtLookup() */
- XT_PUTPAGE(pmp);
-
- /*
- * cmRelocate()
- *
- * copy target data pages to be relocated;
- *
- * data extent must start at page boundary and
- * multiple of page size (except the last data extent);
- * read in each page of the source data extent into cbuf,
- * update the cbuf extent descriptor of the page to be
- * homeward bound to new dst data extent
- * copy the data from the old extent to new extent.
- * copy is essential for compressed files to avoid problems
- * that can arise if there was a change in compression
- * algorithms.
- * it is a good strategy because it may disrupt cache
- * policy to keep the pages in memory afterwards.
- */
- offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
- assert((offset & CM_OFFSET) == 0);
- nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize;
- pno = offset >> CM_L2BSIZE;
- npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE;
-/*
- npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
- (offset >> CM_L2BSIZE) + 1;
-*/
- sxaddr = oxaddr;
- dxaddr = nxaddr;
-
- /* process the request one cache buffer at a time */
- for (nbrd = 0; nbrd < nbytes; nbrd += nb,
- offset += nb, pno++, npages--) {
- /* compute page size */
- nb = min(nbytes - nbrd, CM_BSIZE);
-
- /* get the cache buffer of the page */
- if (rc = cmRead(ip, offset, npages, &cp))
- break;
-
- assert(addressPXD(&cp->cm_pxd) == sxaddr);
- assert(!cp->cm_modified);
-
- /* bind buffer with the new extent address */
- nblks = nb >> JFS_IP(ip->i_sb)->l2bsize;
- cmSetXD(ip, cp, pno, dxaddr, nblks);
-
- /* release the cbuf, mark it as modified */
- cmPut(cp, true);
-
- dxaddr += nblks;
- sxaddr += nblks;
- }
-
- /* get back parent page */
- if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
- return rc;
-
- XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
- jfs_info("xtRelocate: target data extent relocated.");
- } else { /* (xtype == XTPAGE) */
-
- /*
- * read in the target xtpage from the source extent;
- */
- XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
- if (rc) {
- XT_PUTPAGE(pmp);
- return rc;
- }
-
- /*
- * read in sibling pages if any to update sibling pointers;
- */
- rmp = NULL;
- if (p->header.next) {
- nextbn = le64_to_cpu(p->header.next);
- XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
- if (rc) {
- XT_PUTPAGE(pmp);
- XT_PUTPAGE(mp);
- return (rc);
- }
- }
-
- lmp = NULL;
- if (p->header.prev) {
- prevbn = le64_to_cpu(p->header.prev);
- XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
- if (rc) {
- XT_PUTPAGE(pmp);
- XT_PUTPAGE(mp);
- if (rmp)
- XT_PUTPAGE(rmp);
- return (rc);
- }
- }
-
- /* at this point, all xtpages to be updated are in memory */
-
- /*
- * update sibling pointers of sibling xtpages if any;
- */
- if (lmp) {
- BT_MARK_DIRTY(lmp, ip);
- tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
- lp->header.next = cpu_to_le64(nxaddr);
- XT_PUTPAGE(lmp);
- }
-
- if (rmp) {
- BT_MARK_DIRTY(rmp, ip);
- tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
- rp->header.prev = cpu_to_le64(nxaddr);
- XT_PUTPAGE(rmp);
- }
-
- /*
- * update the target xtpage to be relocated
- *
- * update the self address of the target page
- * and write to destination extent;
- * redo image covers the whole xtpage since it is new page
- * to the destination extent;
- * update of bmap for the free of source extent
- * of the target xtpage itself:
- * update of bmap for the allocation of destination extent
- * of the target xtpage itself:
- * update of bmap for the extents covered by xad entries in
- * the target xtpage is not necessary since they are not
- * updated;
- * if not committed before this relocation,
- * target page may contain XAD_NEW entries which must
- * be scanned for bmap update (logredo() always
- * scan xtpage REDOPAGE image for bmap update);
- * if committed before this relocation (tlckRELOCATE),
- * scan may be skipped by commit() and logredo();
- */
- BT_MARK_DIRTY(mp, ip);
- /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */
- tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW);
- xtlck = (struct xtlock *) & tlck->lock;
-
- /* update the self address in the xtpage header */
- pxd = &p->header.self;
- PXDaddress(pxd, nxaddr);
-
- /* linelock for the after image of the whole page */
- xtlck->lwm.length =
- le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
-
- /* update the buffer extent descriptor of target xtpage */
- xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
- bmSetXD(mp, nxaddr, xsize);
-
- /* unpin the target page to new homeward bound */
- XT_PUTPAGE(mp);
- jfs_info("xtRelocate: target xtpage relocated.");
- }
-
- /*
- * 3. acquire maplock for the source extent to be freed;
- *
- * acquire a maplock saving the src relocated extent address;
- * to free of the extent at commit time;
- */
- out:
- /* if DATAEXT relocation, write a LOG_UPDATEMAP record for
- * free PXD of the source data extent (logredo() will update
- * bmap for free of source data extent), and update bmap for
- * free of the source data extent;
- */
- if (xtype == DATAEXT)
- tlck = txMaplock(tid, ip, tlckMAP);
- /* if XTPAGE relocation, write a LOG_NOREDOPAGE record
- * for the source xtpage (logredo() will init NoRedoPage
- * filter and will also update bmap for free of the source
- * xtpage), and update bmap for free of the source xtpage;
- * N.B. We use tlckMAP instead of tlkcXTREE because there
- * is no buffer associated with this lock since the buffer
- * has been redirected to the target location.
- */
- else /* (xtype == XTPAGE) */
- tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE);
-
- pxdlock = (struct pxd_lock *) & tlck->lock;
- pxdlock->flag = mlckFREEPXD;
- PXDaddress(&pxdlock->pxd, oxaddr);
- PXDlength(&pxdlock->pxd, xlen);
- pxdlock->index = 1;
-
- /*
- * 4. update the parent xad entry for relocation;
- *
- * acquire tlck for the parent entry with XAD_NEW as entry
- * update which will write LOG_REDOPAGE and update bmap for
- * allocation of XAD_NEW destination extent;
- */
- jfs_info("xtRelocate: update parent xad entry.");
- BT_MARK_DIRTY(pmp, ip);
- tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW);
- xtlck = (struct xtlock *) & tlck->lock;
-
- /* update the XAD with the new destination extent; */
- xad = &pp->xad[index];
- xad->flag |= XAD_NEW;
- XADaddress(xad, nxaddr);
-
- xtlck->lwm.offset = min(index, xtlck->lwm.offset);
- xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) -
- xtlck->lwm.offset;
-
- /* unpin the parent xtpage */
- XT_PUTPAGE(pmp);
-
- return rc;
-}
-
-
-/*
- * xtSearchNode()
- *
- * function: search for the internal xad entry covering specified extent.
- * This function is mainly used by defragfs utility.
- *
- * parameters:
- * ip - file object;
- * xad - extent to find;
- * cmpp - comparison result:
- * btstack - traverse stack;
- * flag - search process flag;
- *
- * returns:
- * btstack contains (bn, index) of search path traversed to the entry.
- * *cmpp is set to result of comparison with the entry returned.
- * the page containing the entry is pinned at exit.
- */
-static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */
- int *cmpp, struct btstack * btstack, int flag)
-{
- int rc = 0;
- s64 xoff, xaddr;
- int xlen;
- int cmp = 1; /* init for empty page */
- s64 bn; /* block number */
- struct metapage *mp; /* meta-page buffer */
- xtpage_t *p; /* page */
- int base, index, lim;
- struct btframe *btsp;
- s64 t64;
-
- BT_CLR(btstack);
-
- xoff = offsetXAD(xad);
- xlen = lengthXAD(xad);
- xaddr = addressXAD(xad);
-
- /*
- * search down tree from root:
- *
- * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
- * internal page, child page Pi contains entry with k, Ki <= K < Kj.
- *
- * if entry with search key K is not found
- * internal page search find the entry with largest key Ki
- * less than K which point to the child page to search;
- * leaf page search find the entry with smallest key Kj
- * greater than K so that the returned index is the position of
- * the entry to be shifted right for insertion of new entry.
- * for empty tree, search key is greater than any key of the tree.
- *
- * by convention, root bn = 0.
- */
- for (bn = 0;;) {
- /* get/pin the page to search */
- XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
- if (rc)
- return rc;
- if (p->header.flag & BT_LEAF) {
- XT_PUTPAGE(mp);
- return -ESTALE;
- }
-
- lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
-
- /*
- * binary search with search key K on the current page
- */
- for (base = XTENTRYSTART; lim; lim >>= 1) {
- index = base + (lim >> 1);
-
- XT_CMP(cmp, xoff, &p->xad[index], t64);
- if (cmp == 0) {
- /*
- * search hit
- *
- * verify for exact match;
- */
- if (xaddr == addressXAD(&p->xad[index]) &&
- xoff == offsetXAD(&p->xad[index])) {
- *cmpp = cmp;
-
- /* save search result */
- btsp = btstack->top;
- btsp->bn = bn;
- btsp->index = index;
- btsp->mp = mp;
-
- return 0;
- }
-
- /* descend/search its child page */
- goto next;
- }
-
- if (cmp > 0) {
- base = index + 1;
- --lim;
- }
- }
-
- /*
- * search miss - non-leaf page:
- *
- * base is the smallest index with key (Kj) greater than
- * search key (K) and may be zero or maxentry index.
- * if base is non-zero, decrement base by one to get the parent
- * entry of the child page to search.
- */
- index = base ? base - 1 : base;
-
- /*
- * go down to child page
- */
- next:
- /* get the child page block number */
- bn = addressXAD(&p->xad[index]);
-
- /* unpin the parent page */
- XT_PUTPAGE(mp);
- }
-}
-
-
-/*
- * xtRelink()
- *
- * function:
- * link around a freed page.
- *
- * Parameter:
- * int tid,
- * struct inode *ip,
- * xtpage_t *p)
- *
- * returns:
- */
-static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p)
-{
- int rc = 0;
- struct metapage *mp;
- s64 nextbn, prevbn;
- struct tlock *tlck;
-
- nextbn = le64_to_cpu(p->header.next);
- prevbn = le64_to_cpu(p->header.prev);
-
- /* update prev pointer of the next page */
- if (nextbn != 0) {
- XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
- if (rc)
- return rc;
-
- /*
- * acquire a transaction lock on the page;
- *
- * action: update prev pointer;
- */
- BT_MARK_DIRTY(mp, ip);
- tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
-
- /* the page may already have been tlock'd */
-
- p->header.prev = cpu_to_le64(prevbn);
-
- XT_PUTPAGE(mp);
- }
-
- /* update next pointer of the previous page */
- if (prevbn != 0) {
- XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
- if (rc)
- return rc;
-
- /*
- * acquire a transaction lock on the page;
- *
- * action: update next pointer;
- */
- BT_MARK_DIRTY(mp, ip);
- tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
-
- /* the page may already have been tlock'd */
-
- p->header.next = le64_to_cpu(nextbn);
-
- XT_PUTPAGE(mp);
- }
-
- return 0;
-}
-#endif /* _STILL_TO_PORT */
-
/*
* xtInitRoot()
diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h
index 5f51be8596b3..142caafc73b1 100644
--- a/fs/jfs/jfs_xtree.h
+++ b/fs/jfs/jfs_xtree.h
@@ -95,10 +95,6 @@ extern int xtInsert(tid_t tid, struct inode *ip,
int xflag, s64 xoff, int xlen, s64 * xaddrp, int flag);
extern int xtExtend(tid_t tid, struct inode *ip, s64 xoff, int xlen,
int flag);
-#ifdef _NOTYET
-extern int xtTailgate(tid_t tid, struct inode *ip,
- s64 xoff, int xlen, s64 xaddr, int flag);
-#endif
extern int xtUpdate(tid_t tid, struct inode *ip, struct xad *nxad);
extern int xtDelete(tid_t tid, struct inode *ip, s64 xoff, int xlen,
int flag);
diff --git a/fs/locks.c b/fs/locks.c
index 8c6df10cd9ed..ca28e0e50e56 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -300,6 +300,34 @@ void locks_release_private(struct file_lock *fl)
}
EXPORT_SYMBOL_GPL(locks_release_private);
+/**
+ * locks_owner_has_blockers - Check for blocking lock requests
+ * @flctx: file lock context
+ * @owner: lock owner
+ *
+ * Return values:
+ * %true: @owner has at least one blocker
+ * %false: @owner has no blockers
+ */
+bool locks_owner_has_blockers(struct file_lock_context *flctx,
+ fl_owner_t owner)
+{
+ struct file_lock *fl;
+
+ spin_lock(&flctx->flc_lock);
+ list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
+ if (fl->fl_owner != owner)
+ continue;
+ if (!list_empty(&fl->fl_blocked_requests)) {
+ spin_unlock(&flctx->flc_lock);
+ return true;
+ }
+ }
+ spin_unlock(&flctx->flc_lock);
+ return false;
+}
+EXPORT_SYMBOL_GPL(locks_owner_has_blockers);
+
/* Free a lock which is not in use. */
void locks_free_lock(struct file_lock *fl)
{
@@ -874,6 +902,8 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
struct file_lock *cfl;
struct file_lock_context *ctx;
struct inode *inode = locks_inode(filp);
+ void *owner;
+ void (*func)(void);
ctx = smp_load_acquire(&inode->i_flctx);
if (!ctx || list_empty_careful(&ctx->flc_posix)) {
@@ -881,12 +911,23 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
return;
}
+retry:
spin_lock(&ctx->flc_lock);
list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
- if (posix_locks_conflict(fl, cfl)) {
- locks_copy_conflock(fl, cfl);
- goto out;
+ if (!posix_locks_conflict(fl, cfl))
+ continue;
+ if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable
+ && (*cfl->fl_lmops->lm_lock_expirable)(cfl)) {
+ owner = cfl->fl_lmops->lm_mod_owner;
+ func = cfl->fl_lmops->lm_expire_lock;
+ __module_get(owner);
+ spin_unlock(&ctx->flc_lock);
+ (*func)();
+ module_put(owner);
+ goto retry;
}
+ locks_copy_conflock(fl, cfl);
+ goto out;
}
fl->fl_type = F_UNLCK;
out:
@@ -1060,6 +1101,8 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
int error;
bool added = false;
LIST_HEAD(dispose);
+ void *owner;
+ void (*func)(void);
ctx = locks_get_lock_context(inode, request->fl_type);
if (!ctx)
@@ -1078,6 +1121,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
new_fl2 = locks_alloc_lock();
}
+retry:
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
/*
@@ -1089,6 +1133,17 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
if (!posix_locks_conflict(request, fl))
continue;
+ if (fl->fl_lmops && fl->fl_lmops->lm_lock_expirable
+ && (*fl->fl_lmops->lm_lock_expirable)(fl)) {
+ owner = fl->fl_lmops->lm_mod_owner;
+ func = fl->fl_lmops->lm_expire_lock;
+ __module_get(owner);
+ spin_unlock(&ctx->flc_lock);
+ percpu_up_read(&file_rwsem);
+ (*func)();
+ module_put(owner);
+ goto retry;
+ }
if (conflock)
locks_copy_conflock(conflock, fl);
error = -EAGAIN;
diff --git a/fs/namei.c b/fs/namei.c
index 896ade8b7400..3dc0db2df561 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1032,7 +1032,7 @@ static struct ctl_table namei_sysctls[] = {
.procname = "protected_symlinks",
.data = &sysctl_protected_symlinks,
.maxlen = sizeof(int),
- .mode = 0600,
+ .mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
@@ -1041,7 +1041,7 @@ static struct ctl_table namei_sysctls[] = {
.procname = "protected_hardlinks",
.data = &sysctl_protected_hardlinks,
.maxlen = sizeof(int),
- .mode = 0600,
+ .mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
@@ -1050,7 +1050,7 @@ static struct ctl_table namei_sysctls[] = {
.procname = "protected_fifos",
.data = &sysctl_protected_fifos,
.maxlen = sizeof(int),
- .mode = 0600,
+ .mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
@@ -1059,7 +1059,7 @@ static struct ctl_table namei_sysctls[] = {
.procname = "protected_regular",
.data = &sysctl_protected_regular,
.maxlen = sizeof(int),
- .mode = 0600,
+ .mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 11c566d8769f..4eb2a8380a28 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -153,28 +153,25 @@ nfs_direct_count_bytes(struct nfs_direct_req *dreq,
}
/**
- * nfs_direct_IO - NFS address space operation for direct I/O
+ * nfs_swap_rw - NFS address space operation for swap I/O
* @iocb: target I/O control block
* @iter: I/O buffer
*
- * The presence of this routine in the address space ops vector means
- * the NFS client supports direct I/O. However, for most direct IO, we
- * shunt off direct read and write requests before the VFS gets them,
- * so this method is only ever called for swap.
+ * Perform IO to the swap-file. This is much like direct IO.
*/
-ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
{
- struct inode *inode = iocb->ki_filp->f_mapping->host;
-
- /* we only support swap file calling nfs_direct_IO */
- if (!IS_SWAPFILE(inode))
- return 0;
+ ssize_t ret;
VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
if (iov_iter_rw(iter) == READ)
- return nfs_file_direct_read(iocb, iter, true);
- return nfs_file_direct_write(iocb, iter, true);
+ ret = nfs_file_direct_read(iocb, iter, true);
+ else
+ ret = nfs_file_direct_write(iocb, iter, true);
+ if (ret < 0)
+ return ret;
+ return 0;
}
static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d764b3ce7905..6f5425e89ca6 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -69,6 +69,8 @@ nfs_file_open(struct inode *inode, struct file *filp)
return res;
res = nfs_open(inode, filp);
+ if (res == 0)
+ filp->f_mode |= FMODE_CAN_ODIRECT;
return res;
}
@@ -480,6 +482,7 @@ static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
{
unsigned long blocks;
long long isize;
+ int ret;
struct inode *inode = file_inode(file);
struct rpc_clnt *clnt = NFS_CLIENT(inode);
struct nfs_client *cl = NFS_SERVER(inode)->nfs_client;
@@ -493,13 +496,22 @@ static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
return -EINVAL;
}
- *span = sis->pages;
+ ret = rpc_clnt_swap_activate(clnt);
+ if (ret)
+ return ret;
+ ret = add_swap_extent(sis, 0, sis->max, 0);
+ if (ret < 0) {
+ rpc_clnt_swap_deactivate(clnt);
+ return ret;
+ }
+ *span = sis->pages;
if (cl->rpc_ops->enable_swap)
cl->rpc_ops->enable_swap(inode);
- return rpc_clnt_swap_activate(clnt);
+ sis->flags |= SWP_FS_OPS;
+ return ret;
}
static void nfs_swap_deactivate(struct file *file)
@@ -523,7 +535,6 @@ const struct address_space_operations nfs_file_aops = {
.write_end = nfs_write_end,
.invalidate_folio = nfs_invalidate_folio,
.release_folio = nfs_release_folio,
- .direct_IO = nfs_direct_IO,
#ifdef CONFIG_MIGRATION
.migratepage = nfs_migrate_page,
#endif
@@ -532,6 +543,7 @@ const struct address_space_operations nfs_file_aops = {
.error_remove_page = generic_error_remove_page,
.swap_activate = nfs_swap_activate,
.swap_deactivate = nfs_swap_deactivate,
+ .swap_rw = nfs_swap_rw,
};
/*
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 489c9c1d8f31..f172412447f5 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -303,6 +303,8 @@ nfsd_file_put_noref(struct nfsd_file *nf)
void
nfsd_file_put(struct nfsd_file *nf)
{
+ might_sleep();
+
set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
nfsd_file_flush(nf);
@@ -899,9 +901,9 @@ nfsd_file_is_cached(struct inode *inode)
return ret;
}
-__be32
-nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
- unsigned int may_flags, struct nfsd_file **pnf)
+static __be32
+nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf, bool open)
{
__be32 status;
struct net *net = SVC_NET(rqstp);
@@ -996,10 +998,14 @@ open_file:
nfsd_file_gc();
nf->nf_mark = nfsd_file_mark_find_or_create(nf);
- if (nf->nf_mark)
- status = nfsd_open_verified(rqstp, fhp, S_IFREG,
- may_flags, &nf->nf_file);
- else
+ if (nf->nf_mark) {
+ if (open) {
+ status = nfsd_open_verified(rqstp, fhp, may_flags,
+ &nf->nf_file);
+ trace_nfsd_file_open(nf, status);
+ } else
+ status = nfs_ok;
+ } else
status = nfserr_jukebox;
/*
* If construction failed, or we raced with a call to unlink()
@@ -1019,6 +1025,40 @@ open_file:
goto out;
}
+/**
+ * nfsd_file_acquire - Get a struct nfsd_file with an open file
+ * @rqstp: the RPC transaction being executed
+ * @fhp: the NFS filehandle of the file to be opened
+ * @may_flags: NFSD_MAY_ settings for the file
+ * @pnf: OUT: new or found "struct nfsd_file" object
+ *
+ * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
+ * network byte order is returned.
+ */
+__be32
+nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf)
+{
+ return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, true);
+}
+
+/**
+ * nfsd_file_create - Get a struct nfsd_file, do not open
+ * @rqstp: the RPC transaction being executed
+ * @fhp: the NFS filehandle of the file just created
+ * @may_flags: NFSD_MAY_ settings for the file
+ * @pnf: OUT: new or found "struct nfsd_file" object
+ *
+ * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
+ * network byte order is returned.
+ */
+__be32
+nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf)
+{
+ return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, false);
+}
+
/*
* Note that fields may be added, removed or reordered in the future. Programs
* scraping this file for info should test the labels to ensure they're
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index 435ceab27897..1da0c79a5580 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -59,5 +59,7 @@ void nfsd_file_close_inode_sync(struct inode *inode);
bool nfsd_file_is_cached(struct inode *inode);
__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);
+__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **nfp);
int nfsd_file_cache_stats_open(struct inode *, struct file *);
#endif /* _FS_NFSD_FILECACHE_H */
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 936eebd4c56d..981a3a7a6e16 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -8,6 +8,7 @@
#include <linux/fs.h>
#include <linux/ext2_fs.h>
#include <linux/magic.h>
+#include <linux/namei.h>
#include "cache.h"
#include "xdr3.h"
@@ -220,17 +221,132 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
}
/*
- * With NFSv3, CREATE processing is a lot easier than with NFSv2.
- * At least in theory; we'll see how it fares in practice when the
- * first reports about SunOS compatibility problems start to pour in...
+ * Implement NFSv3's unchecked, guarded, and exclusive CREATE
+ * semantics for regular files. Except for the created file,
+ * this operation is stateless on the server.
+ *
+ * Upon return, caller must release @fhp and @resfhp.
*/
static __be32
+nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct svc_fh *resfhp, struct nfsd3_createargs *argp)
+{
+ struct iattr *iap = &argp->attrs;
+ struct dentry *parent, *child;
+ __u32 v_mtime, v_atime;
+ struct inode *inode;
+ __be32 status;
+ int host_err;
+
+ if (isdotent(argp->name, argp->len))
+ return nfserr_exist;
+ if (!(iap->ia_valid & ATTR_MODE))
+ iap->ia_mode = 0;
+
+ status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
+ if (status != nfs_ok)
+ return status;
+
+ parent = fhp->fh_dentry;
+ inode = d_inode(parent);
+
+ host_err = fh_want_write(fhp);
+ if (host_err)
+ return nfserrno(host_err);
+
+ fh_lock_nested(fhp, I_MUTEX_PARENT);
+
+ child = lookup_one_len(argp->name, parent, argp->len);
+ if (IS_ERR(child)) {
+ status = nfserrno(PTR_ERR(child));
+ goto out;
+ }
+
+ if (d_really_is_negative(child)) {
+ status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
+ if (status != nfs_ok)
+ goto out;
+ }
+
+ status = fh_compose(resfhp, fhp->fh_export, child, fhp);
+ if (status != nfs_ok)
+ goto out;
+
+ v_mtime = 0;
+ v_atime = 0;
+ if (argp->createmode == NFS3_CREATE_EXCLUSIVE) {
+ u32 *verifier = (u32 *)argp->verf;
+
+ /*
+ * Solaris 7 gets confused (bugid 4218508) if these have
+ * the high bit set, as do xfs filesystems without the
+ * "bigtime" feature. So just clear the high bits.
+ */
+ v_mtime = verifier[0] & 0x7fffffff;
+ v_atime = verifier[1] & 0x7fffffff;
+ }
+
+ if (d_really_is_positive(child)) {
+ status = nfs_ok;
+
+ switch (argp->createmode) {
+ case NFS3_CREATE_UNCHECKED:
+ if (!d_is_reg(child))
+ break;
+ iap->ia_valid &= ATTR_SIZE;
+ goto set_attr;
+ case NFS3_CREATE_GUARDED:
+ status = nfserr_exist;
+ break;
+ case NFS3_CREATE_EXCLUSIVE:
+ if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
+ d_inode(child)->i_atime.tv_sec == v_atime &&
+ d_inode(child)->i_size == 0) {
+ break;
+ }
+ status = nfserr_exist;
+ }
+ goto out;
+ }
+
+ if (!IS_POSIXACL(inode))
+ iap->ia_mode &= ~current_umask();
+
+ host_err = vfs_create(&init_user_ns, inode, child, iap->ia_mode, true);
+ if (host_err < 0) {
+ status = nfserrno(host_err);
+ goto out;
+ }
+
+ /* A newly created file already has a file size of zero. */
+ if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
+ iap->ia_valid &= ~ATTR_SIZE;
+ if (argp->createmode == NFS3_CREATE_EXCLUSIVE) {
+ iap->ia_valid = ATTR_MTIME | ATTR_ATIME |
+ ATTR_MTIME_SET | ATTR_ATIME_SET;
+ iap->ia_mtime.tv_sec = v_mtime;
+ iap->ia_atime.tv_sec = v_atime;
+ iap->ia_mtime.tv_nsec = 0;
+ iap->ia_atime.tv_nsec = 0;
+ }
+
+set_attr:
+ status = nfsd_create_setattr(rqstp, fhp, resfhp, iap);
+
+out:
+ fh_unlock(fhp);
+ if (child && !IS_ERR(child))
+ dput(child);
+ fh_drop_write(fhp);
+ return status;
+}
+
+static __be32
nfsd3_proc_create(struct svc_rqst *rqstp)
{
struct nfsd3_createargs *argp = rqstp->rq_argp;
struct nfsd3_diropres *resp = rqstp->rq_resp;
- svc_fh *dirfhp, *newfhp = NULL;
- struct iattr *attr;
+ svc_fh *dirfhp, *newfhp;
dprintk("nfsd: CREATE(3) %s %.*s\n",
SVCFH_fmt(&argp->fh),
@@ -239,21 +355,8 @@ nfsd3_proc_create(struct svc_rqst *rqstp)
dirfhp = fh_copy(&resp->dirfh, &argp->fh);
newfhp = fh_init(&resp->fh, NFS3_FHSIZE);
- attr = &argp->attrs;
-
- /* Unfudge the mode bits */
- attr->ia_mode &= ~S_IFMT;
- if (!(attr->ia_valid & ATTR_MODE)) {
- attr->ia_valid |= ATTR_MODE;
- attr->ia_mode = S_IFREG;
- } else {
- attr->ia_mode = (attr->ia_mode & ~S_IFMT) | S_IFREG;
- }
- /* Now create the file and set attributes */
- resp->status = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len,
- attr, newfhp, argp->createmode,
- (u32 *)argp->verf, NULL, NULL);
+ resp->status = nfsd3_create_file(rqstp, dirfhp, newfhp, argp);
return rpc_success;
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index b207c76a873f..3895eb52d2b1 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -37,6 +37,8 @@
#include <linux/falloc.h>
#include <linux/slab.h>
#include <linux/kthread.h>
+#include <linux/namei.h>
+
#include <linux/sunrpc/addr.h>
#include <linux/nfs_ssc.h>
@@ -235,6 +237,183 @@ static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate
&resfh->fh_handle);
}
+static inline bool nfsd4_create_is_exclusive(int createmode)
+{
+ return createmode == NFS4_CREATE_EXCLUSIVE ||
+ createmode == NFS4_CREATE_EXCLUSIVE4_1;
+}
+
+static __be32
+nfsd4_vfs_create(struct svc_fh *fhp, struct dentry *child,
+ struct nfsd4_open *open)
+{
+ struct file *filp;
+ struct path path;
+ int oflags;
+
+ oflags = O_CREAT | O_LARGEFILE;
+ switch (open->op_share_access & NFS4_SHARE_ACCESS_BOTH) {
+ case NFS4_SHARE_ACCESS_WRITE:
+ oflags |= O_WRONLY;
+ break;
+ case NFS4_SHARE_ACCESS_BOTH:
+ oflags |= O_RDWR;
+ break;
+ default:
+ oflags |= O_RDONLY;
+ }
+
+ path.mnt = fhp->fh_export->ex_path.mnt;
+ path.dentry = child;
+ filp = dentry_create(&path, oflags, open->op_iattr.ia_mode,
+ current_cred());
+ if (IS_ERR(filp))
+ return nfserrno(PTR_ERR(filp));
+
+ open->op_filp = filp;
+ return nfs_ok;
+}
+
+/*
+ * Implement NFSv4's unchecked, guarded, and exclusive create
+ * semantics for regular files. Open state for this new file is
+ * subsequently fabricated in nfsd4_process_open2().
+ *
+ * Upon return, caller must release @fhp and @resfhp.
+ */
+static __be32
+nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct svc_fh *resfhp, struct nfsd4_open *open)
+{
+ struct iattr *iap = &open->op_iattr;
+ struct dentry *parent, *child;
+ __u32 v_mtime, v_atime;
+ struct inode *inode;
+ __be32 status;
+ int host_err;
+
+ if (isdotent(open->op_fname, open->op_fnamelen))
+ return nfserr_exist;
+ if (!(iap->ia_valid & ATTR_MODE))
+ iap->ia_mode = 0;
+
+ status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
+ if (status != nfs_ok)
+ return status;
+ parent = fhp->fh_dentry;
+ inode = d_inode(parent);
+
+ host_err = fh_want_write(fhp);
+ if (host_err)
+ return nfserrno(host_err);
+
+ fh_lock_nested(fhp, I_MUTEX_PARENT);
+
+ child = lookup_one_len(open->op_fname, parent, open->op_fnamelen);
+ if (IS_ERR(child)) {
+ status = nfserrno(PTR_ERR(child));
+ goto out;
+ }
+
+ if (d_really_is_negative(child)) {
+ status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
+ if (status != nfs_ok)
+ goto out;
+ }
+
+ status = fh_compose(resfhp, fhp->fh_export, child, fhp);
+ if (status != nfs_ok)
+ goto out;
+
+ v_mtime = 0;
+ v_atime = 0;
+ if (nfsd4_create_is_exclusive(open->op_createmode)) {
+ u32 *verifier = (u32 *)open->op_verf.data;
+
+ /*
+ * Solaris 7 gets confused (bugid 4218508) if these have
+ * the high bit set, as do xfs filesystems without the
+ * "bigtime" feature. So just clear the high bits. If this
+ * is ever changed to use different attrs for storing the
+ * verifier, then do_open_lookup() will also need to be
+ * fixed accordingly.
+ */
+ v_mtime = verifier[0] & 0x7fffffff;
+ v_atime = verifier[1] & 0x7fffffff;
+ }
+
+ if (d_really_is_positive(child)) {
+ status = nfs_ok;
+
+ switch (open->op_createmode) {
+ case NFS4_CREATE_UNCHECKED:
+ if (!d_is_reg(child))
+ break;
+
+ /*
+ * In NFSv4, we don't want to truncate the file
+ * now. This would be wrong if the OPEN fails for
+ * some other reason. Furthermore, if the size is
+ * nonzero, we should ignore it according to spec!
+ */
+ open->op_truncate = (iap->ia_valid & ATTR_SIZE) &&
+ !iap->ia_size;
+ break;
+ case NFS4_CREATE_GUARDED:
+ status = nfserr_exist;
+ break;
+ case NFS4_CREATE_EXCLUSIVE:
+ if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
+ d_inode(child)->i_atime.tv_sec == v_atime &&
+ d_inode(child)->i_size == 0) {
+ open->op_created = true;
+ break; /* subtle */
+ }
+ status = nfserr_exist;
+ break;
+ case NFS4_CREATE_EXCLUSIVE4_1:
+ if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
+ d_inode(child)->i_atime.tv_sec == v_atime &&
+ d_inode(child)->i_size == 0) {
+ open->op_created = true;
+ goto set_attr; /* subtle */
+ }
+ status = nfserr_exist;
+ }
+ goto out;
+ }
+
+ if (!IS_POSIXACL(inode))
+ iap->ia_mode &= ~current_umask();
+
+ status = nfsd4_vfs_create(fhp, child, open);
+ if (status != nfs_ok)
+ goto out;
+ open->op_created = true;
+
+ /* A newly created file already has a file size of zero. */
+ if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
+ iap->ia_valid &= ~ATTR_SIZE;
+ if (nfsd4_create_is_exclusive(open->op_createmode)) {
+ iap->ia_valid = ATTR_MTIME | ATTR_ATIME |
+ ATTR_MTIME_SET|ATTR_ATIME_SET;
+ iap->ia_mtime.tv_sec = v_mtime;
+ iap->ia_atime.tv_sec = v_atime;
+ iap->ia_mtime.tv_nsec = 0;
+ iap->ia_atime.tv_nsec = 0;
+ }
+
+set_attr:
+ status = nfsd_create_setattr(rqstp, fhp, resfhp, iap);
+
+out:
+ fh_unlock(fhp);
+ if (child && !IS_ERR(child))
+ dput(child);
+ fh_drop_write(fhp);
+ return status;
+}
+
static __be32
do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh)
{
@@ -264,16 +443,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
* yes | yes | GUARDED4 | GUARDED4
*/
- /*
- * Note: create modes (UNCHECKED,GUARDED...) are the same
- * in NFSv4 as in v3 except EXCLUSIVE4_1.
- */
current->fs->umask = open->op_umask;
- status = do_nfsd_create(rqstp, current_fh, open->op_fname,
- open->op_fnamelen, &open->op_iattr,
- *resfh, open->op_createmode,
- (u32 *)open->op_verf.data,
- &open->op_truncate, &open->op_created);
+ status = nfsd4_create_file(rqstp, current_fh, *resfh, open);
current->fs->umask = 0;
if (!status && open->op_label.len)
@@ -284,7 +455,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
* use the returned bitmask to indicate which attributes
* we used to store the verifier:
*/
- if (nfsd_create_is_exclusive(open->op_createmode) && status == 0)
+ if (nfsd4_create_is_exclusive(open->op_createmode) && status == 0)
open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS |
FATTR4_WORD1_TIME_MODIFY);
} else
@@ -375,6 +546,8 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
(int)open->op_fnamelen, open->op_fname,
open->op_openowner);
+ open->op_filp = NULL;
+
/* This check required by spec. */
if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL)
return nfserr_inval;
@@ -427,43 +600,35 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
switch (open->op_claim_type) {
- case NFS4_OPEN_CLAIM_DELEGATE_CUR:
- case NFS4_OPEN_CLAIM_NULL:
- status = do_open_lookup(rqstp, cstate, open, &resfh);
- if (status)
- goto out;
- break;
- case NFS4_OPEN_CLAIM_PREVIOUS:
- status = nfs4_check_open_reclaim(cstate->clp);
- if (status)
- goto out;
- open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
- reclaim = true;
- fallthrough;
- case NFS4_OPEN_CLAIM_FH:
- case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
- status = do_open_fhandle(rqstp, cstate, open);
- if (status)
- goto out;
- resfh = &cstate->current_fh;
- break;
- case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
- case NFS4_OPEN_CLAIM_DELEGATE_PREV:
- dprintk("NFSD: unsupported OPEN claim type %d\n",
- open->op_claim_type);
- status = nfserr_notsupp;
+ case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+ case NFS4_OPEN_CLAIM_NULL:
+ status = do_open_lookup(rqstp, cstate, open, &resfh);
+ if (status)
goto out;
- default:
- dprintk("NFSD: Invalid OPEN claim type %d\n",
- open->op_claim_type);
- status = nfserr_inval;
+ break;
+ case NFS4_OPEN_CLAIM_PREVIOUS:
+ status = nfs4_check_open_reclaim(cstate->clp);
+ if (status)
+ goto out;
+ open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
+ reclaim = true;
+ fallthrough;
+ case NFS4_OPEN_CLAIM_FH:
+ case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
+ status = do_open_fhandle(rqstp, cstate, open);
+ if (status)
goto out;
+ resfh = &cstate->current_fh;
+ break;
+ case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
+ case NFS4_OPEN_CLAIM_DELEGATE_PREV:
+ status = nfserr_notsupp;
+ goto out;
+ default:
+ status = nfserr_inval;
+ goto out;
}
- /*
- * nfsd4_process_open2() does the actual opening of the file. If
- * successful, it (1) truncates the file if open->op_truncate was
- * set, (2) sets open->op_stateid, (3) sets open->op_delegation.
- */
+
status = nfsd4_process_open2(rqstp, resfh, open);
WARN(status && open->op_created,
"nfsd4_process_open2 failed to open newly-created file! status=%u\n",
@@ -471,6 +636,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (reclaim && !status)
nn->somebody_reclaimed = true;
out:
+ if (open->op_filp) {
+ fput(open->op_filp);
+ open->op_filp = NULL;
+ }
if (resfh && resfh != &cstate->current_fh) {
fh_dup2(&cstate->current_fh, resfh);
fh_put(resfh);
@@ -801,7 +970,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* the client wants us to do more in this compound:
*/
if (!nfsd4_last_compound_op(rqstp))
- clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* check stateid */
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
@@ -2481,11 +2650,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
cstate->minorversion = args->minorversion;
fh_init(current_fh, NFS4_FHSIZE);
fh_init(save_fh, NFS4_FHSIZE);
+
/*
* Don't use the deferral mechanism for NFSv4; compounds make it
* too hard to avoid non-idempotency problems.
*/
- clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+ __clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
/*
* According to RFC3010, this takes precedence over all other errors.
@@ -2600,7 +2770,7 @@ encode_op:
out:
cstate->status = status;
/* Reset deferral mechanism for RPC deferrals */
- set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+ __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
return rpc_success;
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 234e852fcdfa..9409a0dc1b76 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -125,6 +125,23 @@ static void free_session(struct nfsd4_session *);
static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
+static struct workqueue_struct *laundry_wq;
+
+int nfsd4_create_laundry_wq(void)
+{
+ int rc = 0;
+
+ laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
+ if (laundry_wq == NULL)
+ rc = -ENOMEM;
+ return rc;
+}
+
+void nfsd4_destroy_laundry_wq(void)
+{
+ destroy_workqueue(laundry_wq);
+}
+
static bool is_session_dead(struct nfsd4_session *ses)
{
return ses->se_flags & NFS4_SESSION_DEAD;
@@ -152,6 +169,7 @@ static __be32 get_client_locked(struct nfs4_client *clp)
if (is_client_expired(clp))
return nfserr_expired;
atomic_inc(&clp->cl_rpc_users);
+ clp->cl_state = NFSD4_ACTIVE;
return nfs_ok;
}
@@ -172,6 +190,7 @@ renew_client_locked(struct nfs4_client *clp)
list_move_tail(&clp->cl_lru, &nn->client_lru);
clp->cl_time = ktime_get_boottime_seconds();
+ clp->cl_state = NFSD4_ACTIVE;
}
static void put_client_renew_locked(struct nfs4_client *clp)
@@ -690,6 +709,57 @@ static unsigned int file_hashval(struct svc_fh *fh)
static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
+/*
+ * Check if courtesy clients have conflicting access and resolve it if possible
+ *
+ * access: is op_share_access if share_access is true.
+ * Check if access mode, op_share_access, would conflict with
+ * the current deny mode of the file 'fp'.
+ * access: is op_share_deny if share_access is false.
+ * Check if the deny mode, op_share_deny, would conflict with
+ * current access of the file 'fp'.
+ * stp: skip checking this entry.
+ * new_stp: normal open, not open upgrade.
+ *
+ * Function returns:
+ * false - access/deny mode conflict with normal client.
+ * true - no conflict or conflict with courtesy client(s) is resolved.
+ */
+static bool
+nfs4_resolve_deny_conflicts_locked(struct nfs4_file *fp, bool new_stp,
+ struct nfs4_ol_stateid *stp, u32 access, bool share_access)
+{
+ struct nfs4_ol_stateid *st;
+ bool resolvable = true;
+ unsigned char bmap;
+ struct nfsd_net *nn;
+ struct nfs4_client *clp;
+
+ lockdep_assert_held(&fp->fi_lock);
+ list_for_each_entry(st, &fp->fi_stateids, st_perfile) {
+ /* ignore lock stateid */
+ if (st->st_openstp)
+ continue;
+ if (st == stp && new_stp)
+ continue;
+ /* check file access against deny mode or vice versa */
+ bmap = share_access ? st->st_deny_bmap : st->st_access_bmap;
+ if (!(access & bmap_to_share_mode(bmap)))
+ continue;
+ clp = st->st_stid.sc_client;
+ if (try_to_expire_client(clp))
+ continue;
+ resolvable = false;
+ break;
+ }
+ if (resolvable) {
+ clp = stp->st_stid.sc_client;
+ nn = net_generic(clp->net, nfsd_net_id);
+ mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
+ }
+ return resolvable;
+}
+
static void
__nfs4_file_get_access(struct nfs4_file *fp, u32 access)
{
@@ -1090,6 +1160,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
get_clnt_odstate(odstate);
dp->dl_type = NFS4_OPEN_DELEGATE_READ;
dp->dl_retries = 1;
+ dp->dl_recalled = false;
nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
&nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
get_nfs4_file(fp);
@@ -2004,6 +2075,8 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
idr_init(&clp->cl_stateids);
atomic_set(&clp->cl_rpc_users, 0);
clp->cl_cb_state = NFSD4_CB_UNKNOWN;
+ clp->cl_state = NFSD4_ACTIVE;
+ atomic_set(&clp->cl_delegs_in_recall, 0);
INIT_LIST_HEAD(&clp->cl_idhash);
INIT_LIST_HEAD(&clp->cl_openowners);
INIT_LIST_HEAD(&clp->cl_delegations);
@@ -2408,10 +2481,17 @@ static int client_info_show(struct seq_file *m, void *v)
memcpy(&clid, &clp->cl_clientid, sizeof(clid));
seq_printf(m, "clientid: 0x%llx\n", clid);
seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr);
- if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
+
+ if (clp->cl_state == NFSD4_COURTESY)
+ seq_puts(m, "status: courtesy\n");
+ else if (clp->cl_state == NFSD4_EXPIRABLE)
+ seq_puts(m, "status: expirable\n");
+ else if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
seq_puts(m, "status: confirmed\n");
else
seq_puts(m, "status: unconfirmed\n");
+ seq_printf(m, "seconds from last renew: %lld\n",
+ ktime_get_boottime_seconds() - clp->cl_time);
seq_printf(m, "name: ");
seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len);
seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion);
@@ -4694,9 +4774,18 @@ nfsd_break_deleg_cb(struct file_lock *fl)
bool ret = false;
struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
struct nfs4_file *fp = dp->dl_stid.sc_file;
+ struct nfs4_client *clp = dp->dl_stid.sc_client;
+ struct nfsd_net *nn;
trace_nfsd_cb_recall(&dp->dl_stid);
+ dp->dl_recalled = true;
+ atomic_inc(&clp->cl_delegs_in_recall);
+ if (try_to_expire_client(clp)) {
+ nn = net_generic(clp->net, nfsd_net_id);
+ mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
+ }
+
/*
* We don't want the locks code to timeout the lease for us;
* we'll remove it ourself if a delegation isn't returned
@@ -4739,9 +4828,14 @@ static int
nfsd_change_deleg_cb(struct file_lock *onlist, int arg,
struct list_head *dispose)
{
- if (arg & F_UNLCK)
+ struct nfs4_delegation *dp = (struct nfs4_delegation *)onlist->fl_owner;
+ struct nfs4_client *clp = dp->dl_stid.sc_client;
+
+ if (arg & F_UNLCK) {
+ if (dp->dl_recalled)
+ atomic_dec(&clp->cl_delegs_in_recall);
return lease_modify(onlist, arg, dispose);
- else
+ } else
return -EAGAIN;
}
@@ -4947,7 +5041,7 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
- struct nfsd4_open *open)
+ struct nfsd4_open *open, bool new_stp)
{
struct nfsd_file *nf = NULL;
__be32 status;
@@ -4963,6 +5057,13 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
*/
status = nfs4_file_check_deny(fp, open->op_share_deny);
if (status != nfs_ok) {
+ if (status != nfserr_share_denied) {
+ spin_unlock(&fp->fi_lock);
+ goto out;
+ }
+ if (nfs4_resolve_deny_conflicts_locked(fp, new_stp,
+ stp, open->op_share_deny, false))
+ status = nfserr_jukebox;
spin_unlock(&fp->fi_lock);
goto out;
}
@@ -4970,6 +5071,13 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
/* set access to the file */
status = nfs4_file_get_access(fp, open->op_share_access);
if (status != nfs_ok) {
+ if (status != nfserr_share_denied) {
+ spin_unlock(&fp->fi_lock);
+ goto out;
+ }
+ if (nfs4_resolve_deny_conflicts_locked(fp, new_stp,
+ stp, open->op_share_access, true))
+ status = nfserr_jukebox;
spin_unlock(&fp->fi_lock);
goto out;
}
@@ -4985,9 +5093,19 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
if (!fp->fi_fds[oflag]) {
spin_unlock(&fp->fi_lock);
- status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
- if (status)
- goto out_put_access;
+
+ if (!open->op_filp) {
+ status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
+ if (status != nfs_ok)
+ goto out_put_access;
+ } else {
+ status = nfsd_file_create(rqstp, cur_fh, access, &nf);
+ if (status != nfs_ok)
+ goto out_put_access;
+ nf->nf_file = open->op_filp;
+ open->op_filp = NULL;
+ }
+
spin_lock(&fp->fi_lock);
if (!fp->fi_fds[oflag]) {
fp->fi_fds[oflag] = nf;
@@ -5016,21 +5134,29 @@ out_put_access:
}
static __be32
-nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
+nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp,
+ struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
+ struct nfsd4_open *open)
{
__be32 status;
unsigned char old_deny_bmap = stp->st_deny_bmap;
if (!test_access(open->op_share_access, stp))
- return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);
+ return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open, false);
/* test and set deny mode */
spin_lock(&fp->fi_lock);
status = nfs4_file_check_deny(fp, open->op_share_deny);
if (status == nfs_ok) {
- set_deny(open->op_share_deny, stp);
- fp->fi_share_deny |=
+ if (status != nfserr_share_denied) {
+ set_deny(open->op_share_deny, stp);
+ fp->fi_share_deny |=
(open->op_share_deny & NFS4_SHARE_DENY_BOTH);
+ } else {
+ if (nfs4_resolve_deny_conflicts_locked(fp, false,
+ stp, open->op_share_deny, false))
+ status = nfserr_jukebox;
+ }
}
spin_unlock(&fp->fi_lock);
@@ -5322,6 +5448,18 @@ static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
*/
}
+/**
+ * nfsd4_process_open2 - finish open processing
+ * @rqstp: the RPC transaction being executed
+ * @current_fh: NFSv4 COMPOUND's current filehandle
+ * @open: OPEN arguments
+ *
+ * If successful, (1) truncate the file if open->op_truncate was
+ * set, (2) set open->op_stateid, (3) set open->op_delegation.
+ *
+ * Returns %nfs_ok on success; otherwise an nfs4stat value in
+ * network byte order is returned.
+ */
__be32
nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
{
@@ -5371,7 +5509,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
goto out;
}
} else {
- status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
+ status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open, true);
if (status) {
stp->st_stid.sc_type = NFS4_CLOSED_STID;
release_open_stateid(stp);
@@ -5605,6 +5743,81 @@ static void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
}
#endif
+/* Check if any lock belonging to this lockowner has any blockers */
+static bool
+nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo)
+{
+ struct file_lock_context *ctx;
+ struct nfs4_ol_stateid *stp;
+ struct nfs4_file *nf;
+
+ list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
+ nf = stp->st_stid.sc_file;
+ ctx = nf->fi_inode->i_flctx;
+ if (!ctx)
+ continue;
+ if (locks_owner_has_blockers(ctx, lo))
+ return true;
+ }
+ return false;
+}
+
+static bool
+nfs4_anylock_blockers(struct nfs4_client *clp)
+{
+ int i;
+ struct nfs4_stateowner *so;
+ struct nfs4_lockowner *lo;
+
+ if (atomic_read(&clp->cl_delegs_in_recall))
+ return true;
+ spin_lock(&clp->cl_lock);
+ for (i = 0; i < OWNER_HASH_SIZE; i++) {
+ list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[i],
+ so_strhash) {
+ if (so->so_is_open_owner)
+ continue;
+ lo = lockowner(so);
+ if (nfs4_lockowner_has_blockers(lo)) {
+ spin_unlock(&clp->cl_lock);
+ return true;
+ }
+ }
+ }
+ spin_unlock(&clp->cl_lock);
+ return false;
+}
+
+static void
+nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
+ struct laundry_time *lt)
+{
+ struct list_head *pos, *next;
+ struct nfs4_client *clp;
+
+ INIT_LIST_HEAD(reaplist);
+ spin_lock(&nn->client_lock);
+ list_for_each_safe(pos, next, &nn->client_lru) {
+ clp = list_entry(pos, struct nfs4_client, cl_lru);
+ if (clp->cl_state == NFSD4_EXPIRABLE)
+ goto exp_client;
+ if (!state_expired(lt, clp->cl_time))
+ break;
+ if (!atomic_read(&clp->cl_rpc_users))
+ clp->cl_state = NFSD4_COURTESY;
+ if (!client_has_state(clp) ||
+ ktime_get_boottime_seconds() >=
+ (clp->cl_time + NFSD_COURTESY_CLIENT_TIMEOUT))
+ goto exp_client;
+ if (nfs4_anylock_blockers(clp)) {
+exp_client:
+ if (!mark_client_expired_locked(clp))
+ list_add(&clp->cl_lru, reaplist);
+ }
+ }
+ spin_unlock(&nn->client_lock);
+}
+
static time64_t
nfs4_laundromat(struct nfsd_net *nn)
{
@@ -5627,7 +5840,6 @@ nfs4_laundromat(struct nfsd_net *nn)
goto out;
}
nfsd4_end_grace(nn);
- INIT_LIST_HEAD(&reaplist);
spin_lock(&nn->s2s_cp_lock);
idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
@@ -5637,17 +5849,7 @@ nfs4_laundromat(struct nfsd_net *nn)
_free_cpntf_state_locked(nn, cps);
}
spin_unlock(&nn->s2s_cp_lock);
-
- spin_lock(&nn->client_lock);
- list_for_each_safe(pos, next, &nn->client_lru) {
- clp = list_entry(pos, struct nfs4_client, cl_lru);
- if (!state_expired(&lt, clp->cl_time))
- break;
- if (mark_client_expired_locked(clp))
- continue;
- list_add(&clp->cl_lru, &reaplist);
- }
- spin_unlock(&nn->client_lock);
+ nfs4_get_client_reaplist(nn, &reaplist, &lt);
list_for_each_safe(pos, next, &reaplist) {
clp = list_entry(pos, struct nfs4_client, cl_lru);
trace_nfsd_clid_purged(&clp->cl_clientid);
@@ -5722,7 +5924,6 @@ out:
return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
}
-static struct workqueue_struct *laundry_wq;
static void laundromat_main(struct work_struct *);
static void
@@ -6551,6 +6752,29 @@ nfsd4_lm_put_owner(fl_owner_t owner)
nfs4_put_stateowner(&lo->lo_owner);
}
+/* return pointer to struct nfs4_client if client is expirable */
+static bool
+nfsd4_lm_lock_expirable(struct file_lock *cfl)
+{
+ struct nfs4_lockowner *lo = (struct nfs4_lockowner *)cfl->fl_owner;
+ struct nfs4_client *clp = lo->lo_owner.so_client;
+ struct nfsd_net *nn;
+
+ if (try_to_expire_client(clp)) {
+ nn = net_generic(clp->net, nfsd_net_id);
+ mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
+ return true;
+ }
+ return false;
+}
+
+/* schedule laundromat to run immediately and wait for it to complete */
+static void
+nfsd4_lm_expire_lock(void)
+{
+ flush_workqueue(laundry_wq);
+}
+
static void
nfsd4_lm_notify(struct file_lock *fl)
{
@@ -6577,9 +6801,12 @@ nfsd4_lm_notify(struct file_lock *fl)
}
static const struct lock_manager_operations nfsd_posix_mng_ops = {
+ .lm_mod_owner = THIS_MODULE,
.lm_notify = nfsd4_lm_notify,
.lm_get_owner = nfsd4_lm_get_owner,
.lm_put_owner = nfsd4_lm_put_owner,
+ .lm_lock_expirable = nfsd4_lm_lock_expirable,
+ .lm_expire_lock = nfsd4_lm_expire_lock,
};
static inline void
@@ -7297,22 +7524,36 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
return status;
}
+/**
+ * nfsd4_release_lockowner - process NFSv4.0 RELEASE_LOCKOWNER operations
+ * @rqstp: RPC transaction
+ * @cstate: NFSv4 COMPOUND state
+ * @u: RELEASE_LOCKOWNER arguments
+ *
+ * The lockowner's so_count is bumped when a lock record is added
+ * or when copying a conflicting lock. The latter case is brief,
+ * but can lead to fleeting false positives when looking for
+ * locks-in-use.
+ *
+ * Return values:
+ * %nfs_ok: lockowner released or not found
+ * %nfserr_locks_held: lockowner still in use
+ * %nfserr_stale_clientid: clientid no longer active
+ * %nfserr_expired: clientid not recognized
+ */
__be32
nfsd4_release_lockowner(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
clientid_t *clid = &rlockowner->rl_clientid;
- struct nfs4_stateowner *sop;
- struct nfs4_lockowner *lo = NULL;
struct nfs4_ol_stateid *stp;
- struct xdr_netobj *owner = &rlockowner->rl_owner;
- unsigned int hashval = ownerstr_hashval(owner);
- __be32 status;
- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct nfs4_lockowner *lo;
struct nfs4_client *clp;
- LIST_HEAD (reaplist);
+ LIST_HEAD(reaplist);
+ __be32 status;
dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
clid->cl_boot, clid->cl_id);
@@ -7320,34 +7561,19 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
status = set_client(clid, cstate, nn);
if (status)
return status;
-
clp = cstate->clp;
- /* Find the matching lock stateowner */
- spin_lock(&clp->cl_lock);
- list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval],
- so_strhash) {
-
- if (sop->so_is_open_owner || !same_owner_str(sop, owner))
- continue;
- /* see if there are still any locks associated with it */
- lo = lockowner(sop);
- list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) {
- if (check_for_locks(stp->st_stid.sc_file, lo)) {
- status = nfserr_locks_held;
- spin_unlock(&clp->cl_lock);
- return status;
- }
- }
-
- nfs4_get_stateowner(sop);
- break;
- }
+ spin_lock(&clp->cl_lock);
+ lo = find_lockowner_str_locked(clp, &rlockowner->rl_owner);
if (!lo) {
spin_unlock(&clp->cl_lock);
- return status;
+ return nfs_ok;
+ }
+ if (atomic_read(&lo->lo_owner.so_count) != 2) {
+ spin_unlock(&clp->cl_lock);
+ nfs4_put_stateowner(&lo->lo_owner);
+ return nfserr_locks_held;
}
-
unhash_lockowner_locked(lo);
while (!list_empty(&lo->lo_owner.so_stateids)) {
stp = list_first_entry(&lo->lo_owner.so_stateids,
@@ -7357,11 +7583,11 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
put_ol_stateid_locked(stp, &reaplist);
}
spin_unlock(&clp->cl_lock);
+
free_ol_stateid_reaplist(&reaplist);
remove_blocked_locks(lo);
nfs4_put_stateowner(&lo->lo_owner);
-
- return status;
+ return nfs_ok;
}
static inline struct nfs4_client_reclaim *
@@ -7602,22 +7828,12 @@ nfs4_state_start(void)
{
int ret;
- laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
- if (laundry_wq == NULL) {
- ret = -ENOMEM;
- goto out;
- }
ret = nfsd4_create_callback_queue();
if (ret)
- goto out_free_laundry;
+ return ret;
set_max_delegations();
return 0;
-
-out_free_laundry:
- destroy_workqueue(laundry_wq);
-out:
- return ret;
}
void
@@ -7654,7 +7870,6 @@ nfs4_state_shutdown_net(struct net *net)
void
nfs4_state_shutdown(void)
{
- destroy_workqueue(laundry_wq);
nfsd4_destroy_callback_queue();
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index da92e7d2ab6a..61b2aae81abb 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2411,7 +2411,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
- clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
+ __clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
return true;
}
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 0b3f12aa37ff..7da88bdc0d6c 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -206,7 +206,6 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
struct svc_cacherep *rp;
unsigned int i;
- nfsd_reply_cache_stats_destroy(nn);
unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
for (i = 0; i < nn->drc_hashsize; i++) {
@@ -217,6 +216,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
rp, nn);
}
}
+ nfsd_reply_cache_stats_destroy(nn);
kvfree(nn->drc_hashtbl);
nn->drc_hashtbl = NULL;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 16920e4512bd..0621c2faf242 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1535,20 +1535,25 @@ static int __init init_nfsd(void)
retval = create_proc_exports_entry();
if (retval)
goto out_free_lockd;
- retval = register_filesystem(&nfsd_fs_type);
- if (retval)
- goto out_free_exports;
retval = register_pernet_subsys(&nfsd_net_ops);
if (retval < 0)
- goto out_free_filesystem;
+ goto out_free_exports;
retval = register_cld_notifier();
if (retval)
+ goto out_free_subsys;
+ retval = nfsd4_create_laundry_wq();
+ if (retval)
+ goto out_free_cld;
+ retval = register_filesystem(&nfsd_fs_type);
+ if (retval)
goto out_free_all;
return 0;
out_free_all:
+ nfsd4_destroy_laundry_wq();
+out_free_cld:
+ unregister_cld_notifier();
+out_free_subsys:
unregister_pernet_subsys(&nfsd_net_ops);
-out_free_filesystem:
- unregister_filesystem(&nfsd_fs_type);
out_free_exports:
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
@@ -1566,6 +1571,8 @@ out_free_slabs:
static void __exit exit_nfsd(void)
{
+ unregister_filesystem(&nfsd_fs_type);
+ nfsd4_destroy_laundry_wq();
unregister_cld_notifier();
unregister_pernet_subsys(&nfsd_net_ops);
nfsd_drc_slab_free();
@@ -1575,7 +1582,6 @@ static void __exit exit_nfsd(void)
nfsd_lockd_shutdown();
nfsd4_free_slabs();
nfsd4_exit_pnfs();
- unregister_filesystem(&nfsd_fs_type);
}
MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 4fc1fd639527..847b482155ae 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -162,6 +162,8 @@ void nfs4_state_shutdown_net(struct net *net);
int nfs4_reset_recoverydir(char *recdir);
char * nfs4_recoverydir(void);
bool nfsd4_spo_must_allow(struct svc_rqst *rqstp);
+int nfsd4_create_laundry_wq(void);
+void nfsd4_destroy_laundry_wq(void);
#else
static inline int nfsd4_init_slabs(void) { return 0; }
static inline void nfsd4_free_slabs(void) { }
@@ -175,6 +177,8 @@ static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
{
return false;
}
+static inline int nfsd4_create_laundry_wq(void) { return 0; };
+static inline void nfsd4_destroy_laundry_wq(void) {};
#endif
/*
@@ -336,6 +340,7 @@ void nfsd_lockd_shutdown(void);
#define COMPOUND_ERR_SLACK_SPACE 16 /* OP_SETATTR */
#define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */
+#define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */
/*
* The following attributes are currently not supported by the NFSv4 server:
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 95457cfd37fc..f3d6313914ed 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -149,6 +149,7 @@ struct nfs4_delegation {
/* For recall: */
int dl_retries;
struct nfsd4_callback dl_recall;
+ bool dl_recalled;
};
#define cb_to_delegation(cb) \
@@ -283,6 +284,28 @@ struct nfsd4_sessionid {
#define HEXDIR_LEN 33 /* hex version of 16 byte md5 of cl_name plus '\0' */
/*
+ * State Meaning Where set
+ * --------------------------------------------------------------------------
+ * | NFSD4_ACTIVE | Confirmed, active | Default |
+ * |------------------- ----------------------------------------------------|
+ * | NFSD4_COURTESY | Courtesy state. | nfs4_get_client_reaplist |
+ * | | Lease/lock/share | |
+ * | | reservation conflict | |
+ * | | can cause Courtesy | |
+ * | | client to be expired | |
+ * |------------------------------------------------------------------------|
+ * | NFSD4_EXPIRABLE | Courtesy client to be| nfs4_laundromat |
+ * | | expired by Laundromat| try_to_expire_client |
+ * | | due to conflict | |
+ * |------------------------------------------------------------------------|
+ */
+enum {
+ NFSD4_ACTIVE = 0,
+ NFSD4_COURTESY,
+ NFSD4_EXPIRABLE,
+};
+
+/*
* struct nfs4_client - one per client. Clientids live here.
*
* The initial object created by an NFS client using SETCLIENTID (for NFSv4.0)
@@ -385,6 +408,9 @@ struct nfs4_client {
struct list_head async_copies; /* list of async copies */
spinlock_t async_lock; /* lock for async copies */
atomic_t cl_cb_inflight; /* Outstanding callbacks */
+
+ unsigned int cl_state;
+ atomic_t cl_delegs_in_recall;
};
/* struct nfs4_client_reset
@@ -702,4 +728,9 @@ extern void nfsd4_client_record_remove(struct nfs4_client *clp);
extern int nfsd4_client_record_check(struct nfs4_client *clp);
extern void nfsd4_record_grace_done(struct nfsd_net *nn);
+static inline bool try_to_expire_client(struct nfs4_client *clp)
+{
+ cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
+ return clp->cl_state == NFSD4_EXPIRABLE;
+}
#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 242fa123e0e9..a60ead3b227a 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -692,12 +692,6 @@ DEFINE_CLID_EVENT(confirmed_r);
/*
* from fs/nfsd/filecache.h
*/
-TRACE_DEFINE_ENUM(NFSD_FILE_HASHED);
-TRACE_DEFINE_ENUM(NFSD_FILE_PENDING);
-TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ);
-TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_WRITE);
-TRACE_DEFINE_ENUM(NFSD_FILE_REFERENCED);
-
#define show_nf_flags(val) \
__print_flags(val, "|", \
{ 1 << NFSD_FILE_HASHED, "HASHED" }, \
@@ -784,6 +778,34 @@ TRACE_EVENT(nfsd_file_acquire,
__entry->nf_file, __entry->status)
);
+TRACE_EVENT(nfsd_file_open,
+ TP_PROTO(struct nfsd_file *nf, __be32 status),
+ TP_ARGS(nf, status),
+ TP_STRUCT__entry(
+ __field(unsigned int, nf_hashval)
+ __field(void *, nf_inode) /* cannot be dereferenced */
+ __field(int, nf_ref)
+ __field(unsigned long, nf_flags)
+ __field(unsigned long, nf_may)
+ __field(void *, nf_file) /* cannot be dereferenced */
+ ),
+ TP_fast_assign(
+ __entry->nf_hashval = nf->nf_hashval;
+ __entry->nf_inode = nf->nf_inode;
+ __entry->nf_ref = refcount_read(&nf->nf_ref);
+ __entry->nf_flags = nf->nf_flags;
+ __entry->nf_may = nf->nf_may;
+ __entry->nf_file = nf->nf_file;
+ ),
+ TP_printk("hash=0x%x inode=%p ref=%d flags=%s may=%s file=%p",
+ __entry->nf_hashval,
+ __entry->nf_inode,
+ __entry->nf_ref,
+ show_nf_flags(__entry->nf_flags),
+ show_nfsd_may_flags(__entry->nf_may),
+ __entry->nf_file)
+)
+
DECLARE_EVENT_CLASS(nfsd_file_search_class,
TP_PROTO(struct inode *inode, unsigned int hash, int found),
TP_ARGS(inode, hash, found),
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c22ad0532e8e..840e3af63a6f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -827,14 +827,23 @@ retry:
return err;
}
+/**
+ * nfsd_open_verified - Open a regular file for the filecache
+ * @rqstp: RPC request
+ * @fhp: NFS filehandle of the file to open
+ * @may_flags: internal permission flags
+ * @filp: OUT: open "struct file *"
+ *
+ * Returns an nfsstat value in network byte order.
+ */
__be32
-nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
- int may_flags, struct file **filp)
+nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags,
+ struct file **filp)
{
__be32 err;
validate_process_creds();
- err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+ err = __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp);
validate_process_creds();
return err;
}
@@ -849,17 +858,11 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct splice_desc *sd)
{
struct svc_rqst *rqstp = sd->u.data;
- struct page **pp = rqstp->rq_next_page;
- struct page *page = buf->page;
- if (rqstp->rq_res.page_len == 0) {
- svc_rqst_replace_page(rqstp, page);
+ svc_rqst_replace_page(rqstp, buf->page);
+ if (rqstp->rq_res.page_len == 0)
rqstp->rq_res.page_base = buf->offset;
- } else if (page != pp[-1]) {
- svc_rqst_replace_page(rqstp, page);
- }
rqstp->rq_res.page_len += sd->len;
-
return sd->len;
}
@@ -1187,14 +1190,26 @@ out:
return err;
}
-static __be32
-nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
- struct iattr *iap)
+/**
+ * nfsd_create_setattr - Set a created file's attributes
+ * @rqstp: RPC transaction being executed
+ * @fhp: NFS filehandle of parent directory
+ * @resfhp: NFS filehandle of new object
+ * @iap: requested attributes of new object
+ *
+ * Returns nfs_ok on success, or an nfsstat in network byte order.
+ */
+__be32
+nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct svc_fh *resfhp, struct iattr *iap)
{
+ __be32 status;
+
/*
- * Mode has already been set earlier in create:
+ * Mode has already been set by file creation.
*/
iap->ia_valid &= ~ATTR_MODE;
+
/*
* Setting uid/gid works only for root. Irix appears to
* send along the gid on create when it tries to implement
@@ -1202,10 +1217,31 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
*/
if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
+
+ /*
+ * Callers expect new file metadata to be committed even
+ * if the attributes have not changed.
+ */
if (iap->ia_valid)
- return nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0);
- /* Callers expect file metadata to be committed here */
- return nfserrno(commit_metadata(resfhp));
+ status = nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0);
+ else
+ status = nfserrno(commit_metadata(resfhp));
+
+ /*
+ * Transactional filesystems had a chance to commit changes
+ * for both parent and child simultaneously making the
+ * following commit_metadata a noop in many cases.
+ */
+ if (!status)
+ status = nfserrno(commit_metadata(fhp));
+
+ /*
+ * Update the new filehandle to pick up the new attributes.
+ */
+ if (!status)
+ status = fh_update(resfhp);
+
+ return status;
}
/* HPUX client sometimes creates a file in mode 000, and sets size to 0.
@@ -1232,7 +1268,6 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct dentry *dentry, *dchild;
struct inode *dirp;
__be32 err;
- __be32 err2;
int host_err;
dentry = fhp->fh_dentry;
@@ -1305,22 +1340,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (host_err < 0)
goto out_nfserr;
- err = nfsd_create_setattr(rqstp, resfhp, iap);
+ err = nfsd_create_setattr(rqstp, fhp, resfhp, iap);
- /*
- * nfsd_create_setattr already committed the child. Transactional
- * filesystems had a chance to commit changes for both parent and
- * child simultaneously making the following commit_metadata a
- * noop.
- */
- err2 = nfserrno(commit_metadata(fhp));
- if (err2)
- err = err2;
- /*
- * Update the file handle to get the new inode info.
- */
- if (!err)
- err = fh_update(resfhp);
out:
dput(dchild);
return err;
@@ -1376,172 +1397,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
/*
- * NFSv3 and NFSv4 version of nfsd_create
- */
-__be32
-do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
- char *fname, int flen, struct iattr *iap,
- struct svc_fh *resfhp, int createmode, u32 *verifier,
- bool *truncp, bool *created)
-{
- struct dentry *dentry, *dchild = NULL;
- struct inode *dirp;
- __be32 err;
- int host_err;
- __u32 v_mtime=0, v_atime=0;
-
- err = nfserr_perm;
- if (!flen)
- goto out;
- err = nfserr_exist;
- if (isdotent(fname, flen))
- goto out;
- if (!(iap->ia_valid & ATTR_MODE))
- iap->ia_mode = 0;
- err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
- if (err)
- goto out;
-
- dentry = fhp->fh_dentry;
- dirp = d_inode(dentry);
-
- host_err = fh_want_write(fhp);
- if (host_err)
- goto out_nfserr;
-
- fh_lock_nested(fhp, I_MUTEX_PARENT);
-
- /*
- * Compose the response file handle.
- */
- dchild = lookup_one_len(fname, dentry, flen);
- host_err = PTR_ERR(dchild);
- if (IS_ERR(dchild))
- goto out_nfserr;
-
- /* If file doesn't exist, check for permissions to create one */
- if (d_really_is_negative(dchild)) {
- err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
- if (err)
- goto out;
- }
-
- err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
- if (err)
- goto out;
-
- if (nfsd_create_is_exclusive(createmode)) {
- /* solaris7 gets confused (bugid 4218508) if these have
- * the high bit set, as do xfs filesystems without the
- * "bigtime" feature. So just clear the high bits. If this is
- * ever changed to use different attrs for storing the
- * verifier, then do_open_lookup() will also need to be fixed
- * accordingly.
- */
- v_mtime = verifier[0]&0x7fffffff;
- v_atime = verifier[1]&0x7fffffff;
- }
-
- if (d_really_is_positive(dchild)) {
- err = 0;
-
- switch (createmode) {
- case NFS3_CREATE_UNCHECKED:
- if (! d_is_reg(dchild))
- goto out;
- else if (truncp) {
- /* in nfsv4, we need to treat this case a little
- * differently. we don't want to truncate the
- * file now; this would be wrong if the OPEN
- * fails for some other reason. furthermore,
- * if the size is nonzero, we should ignore it
- * according to spec!
- */
- *truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size;
- }
- else {
- iap->ia_valid &= ATTR_SIZE;
- goto set_attr;
- }
- break;
- case NFS3_CREATE_EXCLUSIVE:
- if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime
- && d_inode(dchild)->i_atime.tv_sec == v_atime
- && d_inode(dchild)->i_size == 0 ) {
- if (created)
- *created = true;
- break;
- }
- fallthrough;
- case NFS4_CREATE_EXCLUSIVE4_1:
- if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime
- && d_inode(dchild)->i_atime.tv_sec == v_atime
- && d_inode(dchild)->i_size == 0 ) {
- if (created)
- *created = true;
- goto set_attr;
- }
- fallthrough;
- case NFS3_CREATE_GUARDED:
- err = nfserr_exist;
- }
- fh_drop_write(fhp);
- goto out;
- }
-
- if (!IS_POSIXACL(dirp))
- iap->ia_mode &= ~current_umask();
-
- host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true);
- if (host_err < 0) {
- fh_drop_write(fhp);
- goto out_nfserr;
- }
- if (created)
- *created = true;
-
- nfsd_check_ignore_resizing(iap);
-
- if (nfsd_create_is_exclusive(createmode)) {
- /* Cram the verifier into atime/mtime */
- iap->ia_valid = ATTR_MTIME|ATTR_ATIME
- | ATTR_MTIME_SET|ATTR_ATIME_SET;
- /* XXX someone who knows this better please fix it for nsec */
- iap->ia_mtime.tv_sec = v_mtime;
- iap->ia_atime.tv_sec = v_atime;
- iap->ia_mtime.tv_nsec = 0;
- iap->ia_atime.tv_nsec = 0;
- }
-
- set_attr:
- err = nfsd_create_setattr(rqstp, resfhp, iap);
-
- /*
- * nfsd_create_setattr already committed the child
- * (and possibly also the parent).
- */
- if (!err)
- err = nfserrno(commit_metadata(fhp));
-
- /*
- * Update the filehandle to get the new inode info.
- */
- if (!err)
- err = fh_update(resfhp);
-
- out:
- fh_unlock(fhp);
- if (dchild && !IS_ERR(dchild))
- dput(dchild);
- fh_drop_write(fhp);
- return err;
-
- out_nfserr:
- err = nfserrno(host_err);
- goto out;
-}
-
-/*
* Read a symlink. On entry, *lenp must contain the maximum path length that
* fits into the buffer. On return, it contains the true length.
* N.B. After this call fhp needs an fh_put
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index ccb87b2864f6..26347d76f44a 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -69,10 +69,8 @@ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
int type, dev_t rdev, struct svc_fh *res);
__be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
-__be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
- char *name, int len, struct iattr *attrs,
- struct svc_fh *res, int createmode,
- u32 *verifier, bool *truncp, bool *created);
+__be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct svc_fh *resfhp, struct iattr *iap);
__be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp,
u64 offset, u32 count, __be32 *verf);
#ifdef CONFIG_NFSD_V4
@@ -88,7 +86,7 @@ __be32 nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
int nfsd_open_break_lease(struct inode *, int);
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
-__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t,
+__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *,
int, struct file **);
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
@@ -159,10 +157,4 @@ static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat)
AT_STATX_SYNC_AS_STAT));
}
-static inline int nfsd_create_is_exclusive(int createmode)
-{
- return createmode == NFS3_CREATE_EXCLUSIVE
- || createmode == NFS4_CREATE_EXCLUSIVE4_1;
-}
-
#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 846ab6df9d48..7b744011f2d3 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -273,6 +273,7 @@ struct nfsd4_open {
bool op_truncate; /* used during processing */
bool op_created; /* used during processing */
struct nfs4_openowner *op_openowner; /* used during processing */
+ struct file *op_filp; /* used during processing */
struct nfs4_file *op_file; /* used during processing */
struct nfs4_ol_stateid *op_stp; /* used during processing */
struct nfs4_clnt_odstate *op_odstate; /* used during processing */
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index e1392a9b8ceb..a8abe2296514 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1772,11 +1772,11 @@ static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
last_vcn = -1;
do {
VCN vcn;
- pgoff_t idx, start_idx;
+ pgoff_t start_idx;
unsigned ofs, do_pages, u;
size_t copied;
- start_idx = idx = pos >> PAGE_SHIFT;
+ start_idx = pos >> PAGE_SHIFT;
ofs = pos & ~PAGE_MASK;
bytes = PAGE_SIZE - ofs;
do_pages = 1;
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 5781b9e8e3d8..0c6de6287737 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -668,9 +668,11 @@ static u32 format_size_gb(const u64 bytes, u32 *mb)
static u32 true_sectors_per_clst(const struct NTFS_BOOT *boot)
{
- return boot->sectors_per_clusters <= 0x80
- ? boot->sectors_per_clusters
- : (1u << (0 - boot->sectors_per_clusters));
+ if (boot->sectors_per_clusters <= 0x80)
+ return boot->sectors_per_clusters;
+ if (boot->sectors_per_clusters >= 0xf4) /* limit shift to 2MB max */
+ return 1U << (0 - boot->sectors_per_clusters);
+ return -EINVAL;
}
/*
@@ -713,6 +715,8 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size,
/* cluster size: 512, 1K, 2K, 4K, ... 2M */
sct_per_clst = true_sectors_per_clst(boot);
+ if ((int)sct_per_clst < 0)
+ goto out;
if (!is_power_of_2(sct_per_clst))
goto out;
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index d442cf5dda8a..be5e9ed7da8d 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -541,7 +541,7 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos)
struct debug_lockres *dl = m->private;
struct dlm_ctxt *dlm = dl->dl_ctxt;
struct dlm_lock_resource *oldres = dl->dl_res;
- struct dlm_lock_resource *res = NULL;
+ struct dlm_lock_resource *res = NULL, *iter;
struct list_head *track_list;
spin_lock(&dlm->track_lock);
@@ -556,11 +556,11 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos)
}
}
- list_for_each_entry(res, track_list, tracking) {
- if (&res->tracking == &dlm->tracking_list)
- res = NULL;
- else
- dlm_lockres_get(res);
+ list_for_each_entry(iter, track_list, tracking) {
+ if (&iter->tracking != &dlm->tracking_list) {
+ dlm_lockres_get(iter);
+ res = iter;
+ }
break;
}
spin_unlock(&dlm->track_lock);
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 61103b2d69fb..7318e4794ef9 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -392,9 +392,9 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data,
struct dlm_ctxt *dlm = data;
struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf;
struct dlm_lock_resource *res = NULL;
- struct dlm_lock *lock = NULL;
+ struct dlm_lock *lock = NULL, *iter;
enum dlm_status status = DLM_NORMAL;
- int found = 0, i;
+ int i;
struct dlm_lockstatus *lksb = NULL;
int ignore;
u32 flags;
@@ -437,7 +437,6 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data,
}
queue=&res->granted;
- found = 0;
spin_lock(&res->spinlock);
if (res->state & DLM_LOCK_RES_RECOVERING) {
spin_unlock(&res->spinlock);
@@ -461,21 +460,21 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data,
}
for (i=0; i<3; i++) {
- list_for_each_entry(lock, queue, list) {
- if (lock->ml.cookie == unlock->cookie &&
- lock->ml.node == unlock->node_idx) {
- dlm_lock_get(lock);
- found = 1;
+ list_for_each_entry(iter, queue, list) {
+ if (iter->ml.cookie == unlock->cookie &&
+ iter->ml.node == unlock->node_idx) {
+ dlm_lock_get(iter);
+ lock = iter;
break;
}
}
- if (found)
+ if (lock)
break;
/* scan granted -> converting -> blocked queues */
queue++;
}
spin_unlock(&res->spinlock);
- if (!found) {
+ if (!lock) {
status = DLM_IVLOCKID;
goto not_found;
}
@@ -505,7 +504,7 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data,
dlm_kick_thread(dlm, res);
not_found:
- if (!found)
+ if (!lock)
mlog(ML_ERROR, "failed to find lock to unlock! "
"cookie=%u:%llu\n",
dlm_get_lock_cookie_node(be64_to_cpu(unlock->cookie)),
diff --git a/fs/ocfs2/dlmfs/userdlm.c b/fs/ocfs2/dlmfs/userdlm.c
index 29f183a15798..617c92e7b925 100644
--- a/fs/ocfs2/dlmfs/userdlm.c
+++ b/fs/ocfs2/dlmfs/userdlm.c
@@ -433,6 +433,11 @@ again:
}
spin_lock(&lockres->l_lock);
+ if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
+ spin_unlock(&lockres->l_lock);
+ status = -EAGAIN;
+ goto bail;
+ }
/* We only compare against the currently granted level
* here. If the lock is blocked waiting on a downconvert,
@@ -595,7 +600,7 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
spin_lock(&lockres->l_lock);
if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
spin_unlock(&lockres->l_lock);
- return 0;
+ goto bail;
}
lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
@@ -609,22 +614,30 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
}
if (lockres->l_ro_holders || lockres->l_ex_holders) {
+ lockres->l_flags &= ~USER_LOCK_IN_TEARDOWN;
spin_unlock(&lockres->l_lock);
goto bail;
}
status = 0;
if (!(lockres->l_flags & USER_LOCK_ATTACHED)) {
+ /*
+ * lock is never requested, leave USER_LOCK_IN_TEARDOWN set
+ * to avoid new lock request coming in.
+ */
spin_unlock(&lockres->l_lock);
goto bail;
}
- lockres->l_flags &= ~USER_LOCK_ATTACHED;
lockres->l_flags |= USER_LOCK_BUSY;
spin_unlock(&lockres->l_lock);
status = ocfs2_dlm_unlock(conn, &lockres->l_lksb, DLM_LKF_VALBLK);
if (status) {
+ spin_lock(&lockres->l_lock);
+ lockres->l_flags &= ~USER_LOCK_IN_TEARDOWN;
+ lockres->l_flags &= ~USER_LOCK_BUSY;
+ spin_unlock(&lockres->l_lock);
user_log_dlm_error("ocfs2_dlm_unlock", status, lockres);
goto bail;
}
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 5739dc301569..bb116c39b581 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -125,6 +125,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
struct inode *inode = NULL;
struct super_block *sb = osb->sb;
struct ocfs2_find_inode_args args;
+ journal_t *journal = osb->journal->j_journal;
trace_ocfs2_iget_begin((unsigned long long)blkno, flags,
sysfile_type);
@@ -171,11 +172,10 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
* part of the transaction - the inode could have been reclaimed and
* now it is reread from disk.
*/
- if (osb->journal) {
+ if (journal) {
transaction_t *transaction;
tid_t tid;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
- journal_t *journal = osb->journal->j_journal;
read_lock(&journal->j_state_lock);
if (journal->j_running_transaction)
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 1887a2708709..fa87d89cf754 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -810,22 +810,20 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb)
write_unlock(&journal->j_state_lock);
}
-int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
+/*
+ * alloc & initialize skeleton for journal structure.
+ * ocfs2_journal_init() will make fs have journal ability.
+ */
+int ocfs2_journal_alloc(struct ocfs2_super *osb)
{
- int status = -1;
- struct inode *inode = NULL; /* the journal inode */
- journal_t *j_journal = NULL;
- struct ocfs2_journal *journal = NULL;
- struct ocfs2_dinode *di = NULL;
- struct buffer_head *bh = NULL;
- int inode_lock = 0;
+ int status = 0;
+ struct ocfs2_journal *journal;
- /* initialize our journal structure */
journal = kzalloc(sizeof(struct ocfs2_journal), GFP_KERNEL);
if (!journal) {
mlog(ML_ERROR, "unable to alloc journal\n");
status = -ENOMEM;
- goto done;
+ goto bail;
}
osb->journal = journal;
journal->j_osb = osb;
@@ -839,6 +837,21 @@ int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
journal->j_state = OCFS2_JOURNAL_FREE;
+bail:
+ return status;
+}
+
+int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
+{
+ int status = -1;
+ struct inode *inode = NULL; /* the journal inode */
+ journal_t *j_journal = NULL;
+ struct ocfs2_journal *journal = osb->journal;
+ struct ocfs2_dinode *di = NULL;
+ struct buffer_head *bh = NULL;
+ int inode_lock = 0;
+
+ BUG_ON(!journal);
/* already have the inode for our journal */
inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
osb->slot_num);
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 8dcb2f2cadbc..969d0aa28718 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -154,6 +154,7 @@ int ocfs2_compute_replay_slots(struct ocfs2_super *osb);
* Journal Control:
* Initialize, Load, Shutdown, Wipe a journal.
*
+ * ocfs2_journal_alloc - Initialize skeleton for journal structure.
* ocfs2_journal_init - Initialize journal structures in the OSB.
* ocfs2_journal_load - Load the given journal off disk. Replay it if
* there's transactions still in there.
@@ -167,6 +168,7 @@ int ocfs2_compute_replay_slots(struct ocfs2_super *osb);
* ocfs2_start_checkpoint - Kick the commit thread to do a checkpoint.
*/
void ocfs2_set_journal_params(struct ocfs2_super *osb);
+int ocfs2_journal_alloc(struct ocfs2_super *osb);
int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty);
void ocfs2_journal_shutdown(struct ocfs2_super *osb);
int ocfs2_journal_wipe(struct ocfs2_journal *journal,
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index b1a8b046f4c2..5022b3e9bfcd 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -921,19 +921,19 @@ static struct ocfs2_quota_chunk *ocfs2_find_free_entry(struct super_block *sb,
{
struct mem_dqinfo *info = sb_dqinfo(sb, type);
struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
- struct ocfs2_quota_chunk *chunk;
+ struct ocfs2_quota_chunk *chunk = NULL, *iter;
struct ocfs2_local_disk_chunk *dchunk;
int found = 0, len;
- list_for_each_entry(chunk, &oinfo->dqi_chunk, qc_chunk) {
+ list_for_each_entry(iter, &oinfo->dqi_chunk, qc_chunk) {
dchunk = (struct ocfs2_local_disk_chunk *)
- chunk->qc_headerbh->b_data;
+ iter->qc_headerbh->b_data;
if (le32_to_cpu(dchunk->dqc_free) > 0) {
- found = 1;
+ chunk = iter;
break;
}
}
- if (!found)
+ if (!chunk)
return NULL;
if (chunk->qc_num < oinfo->dqi_chunks - 1) {
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index 769e466887b0..a9d1296d736d 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -198,7 +198,7 @@ void ocfs2_resv_set_type(struct ocfs2_alloc_reservation *resv,
resv->r_flags |= flags;
}
-int ocfs2_resmap_init(struct ocfs2_super *osb,
+void ocfs2_resmap_init(struct ocfs2_super *osb,
struct ocfs2_reservation_map *resmap)
{
memset(resmap, 0, sizeof(*resmap));
@@ -207,8 +207,6 @@ int ocfs2_resmap_init(struct ocfs2_super *osb,
resmap->m_reservations = RB_ROOT;
/* m_bitmap_len is initialized to zero by the above memset. */
INIT_LIST_HEAD(&resmap->m_lru);
-
- return 0;
}
static void ocfs2_resv_mark_lru(struct ocfs2_reservation_map *resmap,
diff --git a/fs/ocfs2/reservations.h b/fs/ocfs2/reservations.h
index 677c50663595..ec8101ef5717 100644
--- a/fs/ocfs2/reservations.h
+++ b/fs/ocfs2/reservations.h
@@ -73,15 +73,10 @@ void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
/**
* ocfs2_resmap_init() - Initialize fields of a reservations bitmap
+ * @osb: struct ocfs2_super to be saved in resmap
* @resmap: struct ocfs2_reservation_map to initialize
- * @obj: unused for now
- * @ops: unused for now
- * @max_bitmap_bytes: Maximum size of the bitmap (typically blocksize)
- *
- * Only possible return value other than '0' is -ENOMEM for failure to
- * allocation mirror bitmap.
*/
-int ocfs2_resmap_init(struct ocfs2_super *osb,
+void ocfs2_resmap_init(struct ocfs2_super *osb,
struct ocfs2_reservation_map *resmap);
/**
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 477cdf94122e..f7298816d8d9 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -989,28 +989,27 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
if (!ocfs2_parse_options(sb, data, &parsed_options, 0)) {
status = -EINVAL;
- goto read_super_error;
+ goto out;
}
/* probe for superblock */
status = ocfs2_sb_probe(sb, &bh, &sector_size, &stats);
if (status < 0) {
mlog(ML_ERROR, "superblock probe failed!\n");
- goto read_super_error;
+ goto out;
}
status = ocfs2_initialize_super(sb, bh, sector_size, &stats);
- osb = OCFS2_SB(sb);
- if (status < 0) {
- mlog_errno(status);
- goto read_super_error;
- }
brelse(bh);
bh = NULL;
+ if (status < 0)
+ goto out;
+
+ osb = OCFS2_SB(sb);
if (!ocfs2_check_set_options(sb, &parsed_options)) {
status = -EINVAL;
- goto read_super_error;
+ goto out_super;
}
osb->s_mount_opt = parsed_options.mount_opt;
osb->s_atime_quantum = parsed_options.atime_quantum;
@@ -1027,7 +1026,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
status = ocfs2_verify_userspace_stack(osb, &parsed_options);
if (status)
- goto read_super_error;
+ goto out_super;
sb->s_magic = OCFS2_SUPER_MAGIC;
@@ -1041,7 +1040,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
status = -EACCES;
mlog(ML_ERROR, "Readonly device detected but readonly "
"mount was not specified.\n");
- goto read_super_error;
+ goto out_super;
}
/* You should not be able to start a local heartbeat
@@ -1050,7 +1049,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
status = -EROFS;
mlog(ML_ERROR, "Local heartbeat specified on readonly "
"device.\n");
- goto read_super_error;
+ goto out_super;
}
status = ocfs2_check_journals_nolocks(osb);
@@ -1059,9 +1058,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
mlog(ML_ERROR, "Recovery required on readonly "
"file system, but write access is "
"unavailable.\n");
- else
- mlog_errno(status);
- goto read_super_error;
+ goto out_super;
}
ocfs2_set_ro_flag(osb, 1);
@@ -1077,10 +1074,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
}
status = ocfs2_verify_heartbeat(osb);
- if (status < 0) {
- mlog_errno(status);
- goto read_super_error;
- }
+ if (status < 0)
+ goto out_super;
osb->osb_debug_root = debugfs_create_dir(osb->uuid_str,
ocfs2_debugfs_root);
@@ -1094,15 +1089,14 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
status = ocfs2_mount_volume(sb);
if (status < 0)
- goto read_super_error;
+ goto out_debugfs;
if (osb->root_inode)
inode = igrab(osb->root_inode);
if (!inode) {
status = -EIO;
- mlog_errno(status);
- goto read_super_error;
+ goto out_dismount;
}
osb->osb_dev_kset = kset_create_and_add(sb->s_id, NULL,
@@ -1110,7 +1104,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
if (!osb->osb_dev_kset) {
status = -ENOMEM;
mlog(ML_ERROR, "Unable to create device kset %s.\n", sb->s_id);
- goto read_super_error;
+ goto out_dismount;
}
/* Create filecheck sysfs related directories/files at
@@ -1119,14 +1113,13 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
status = -ENOMEM;
mlog(ML_ERROR, "Unable to create filecheck sysfs directory at "
"/sys/fs/ocfs2/%s/filecheck.\n", sb->s_id);
- goto read_super_error;
+ goto out_dismount;
}
root = d_make_root(inode);
if (!root) {
status = -ENOMEM;
- mlog_errno(status);
- goto read_super_error;
+ goto out_dismount;
}
sb->s_root = root;
@@ -1178,17 +1171,21 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
return status;
-read_super_error:
- brelse(bh);
-
- if (status)
- mlog_errno(status);
+out_dismount:
+ atomic_set(&osb->vol_state, VOLUME_DISABLED);
+ wake_up(&osb->osb_mount_event);
+ ocfs2_dismount_volume(sb, 1);
+ goto out;
- if (osb) {
- atomic_set(&osb->vol_state, VOLUME_DISABLED);
- wake_up(&osb->osb_mount_event);
- ocfs2_dismount_volume(sb, 1);
- }
+out_debugfs:
+ debugfs_remove_recursive(osb->osb_debug_root);
+out_super:
+ ocfs2_release_system_inodes(osb);
+ kfree(osb->recovery_map);
+ ocfs2_delete_osb(osb);
+ kfree(osb);
+out:
+ mlog_errno(status);
return status;
}
@@ -1803,11 +1800,10 @@ static int ocfs2_get_sector(struct super_block *sb,
static int ocfs2_mount_volume(struct super_block *sb)
{
int status = 0;
- int unlock_super = 0;
struct ocfs2_super *osb = OCFS2_SB(sb);
if (ocfs2_is_hard_readonly(osb))
- goto leave;
+ goto out;
mutex_init(&osb->obs_trim_fs_mutex);
@@ -1817,44 +1813,56 @@ static int ocfs2_mount_volume(struct super_block *sb)
if (status == -EBADR && ocfs2_userspace_stack(osb))
mlog(ML_ERROR, "couldn't mount because cluster name on"
" disk does not match the running cluster name.\n");
- goto leave;
+ goto out;
}
status = ocfs2_super_lock(osb, 1);
if (status < 0) {
mlog_errno(status);
- goto leave;
+ goto out_dlm;
}
- unlock_super = 1;
/* This will load up the node map and add ourselves to it. */
status = ocfs2_find_slot(osb);
if (status < 0) {
mlog_errno(status);
- goto leave;
+ goto out_super_lock;
}
/* load all node-local system inodes */
status = ocfs2_init_local_system_inodes(osb);
if (status < 0) {
mlog_errno(status);
- goto leave;
+ goto out_super_lock;
}
status = ocfs2_check_volume(osb);
if (status < 0) {
mlog_errno(status);
- goto leave;
+ goto out_system_inodes;
}
status = ocfs2_truncate_log_init(osb);
- if (status < 0)
+ if (status < 0) {
mlog_errno(status);
+ goto out_system_inodes;
+ }
-leave:
- if (unlock_super)
- ocfs2_super_unlock(osb, 1);
+ ocfs2_super_unlock(osb, 1);
+ return 0;
+out_system_inodes:
+ if (osb->local_alloc_state == OCFS2_LA_ENABLED)
+ ocfs2_shutdown_local_alloc(osb);
+ ocfs2_release_system_inodes(osb);
+ /* before journal shutdown, we should release slot_info */
+ ocfs2_free_slot_info(osb);
+ ocfs2_journal_shutdown(osb);
+out_super_lock:
+ ocfs2_super_unlock(osb, 1);
+out_dlm:
+ ocfs2_dlm_shutdown(osb, 0);
+out:
return status;
}
@@ -2022,7 +2030,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
if (!osb) {
status = -ENOMEM;
mlog_errno(status);
- goto bail;
+ goto out;
}
sb->s_fs_info = osb;
@@ -2083,7 +2091,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
mlog(ML_ERROR, "Invalid number of node slots (%u)\n",
osb->max_slots);
status = -EINVAL;
- goto bail;
+ goto out;
}
ocfs2_orphan_scan_init(osb);
@@ -2092,7 +2100,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
if (status) {
mlog(ML_ERROR, "Unable to initialize recovery state\n");
mlog_errno(status);
- goto bail;
+ goto out;
}
init_waitqueue_head(&osb->checkpoint_event);
@@ -2110,17 +2118,13 @@ static int ocfs2_initialize_super(struct super_block *sb,
init_waitqueue_head(&osb->osb_mount_event);
- status = ocfs2_resmap_init(osb, &osb->osb_la_resmap);
- if (status) {
- mlog_errno(status);
- goto bail;
- }
+ ocfs2_resmap_init(osb, &osb->osb_la_resmap);
osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL);
if (!osb->vol_label) {
mlog(ML_ERROR, "unable to alloc vol label\n");
status = -ENOMEM;
- goto bail;
+ goto out_recovery_map;
}
osb->slot_recovery_generations =
@@ -2129,7 +2133,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
if (!osb->slot_recovery_generations) {
status = -ENOMEM;
mlog_errno(status);
- goto bail;
+ goto out_vol_label;
}
init_waitqueue_head(&osb->osb_wipe_event);
@@ -2139,7 +2143,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
if (!osb->osb_orphan_wipes) {
status = -ENOMEM;
mlog_errno(status);
- goto bail;
+ goto out_slot_recovery_gen;
}
osb->osb_rf_lock_tree = RB_ROOT;
@@ -2155,13 +2159,13 @@ static int ocfs2_initialize_super(struct super_block *sb,
mlog(ML_ERROR, "couldn't mount because of unsupported "
"optional features (%x).\n", i);
status = -EINVAL;
- goto bail;
+ goto out_orphan_wipes;
}
if (!sb_rdonly(osb->sb) && (i = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, ~OCFS2_FEATURE_RO_COMPAT_SUPP))) {
mlog(ML_ERROR, "couldn't mount RDWR because of "
"unsupported optional features (%x).\n", i);
status = -EINVAL;
- goto bail;
+ goto out_orphan_wipes;
}
if (ocfs2_clusterinfo_valid(osb)) {
@@ -2182,7 +2186,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
"cluster stack label (%s) \n",
osb->osb_cluster_stack);
status = -EINVAL;
- goto bail;
+ goto out_orphan_wipes;
}
memcpy(osb->osb_cluster_name,
OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster,
@@ -2195,6 +2199,15 @@ static int ocfs2_initialize_super(struct super_block *sb,
get_random_bytes(&osb->s_next_generation, sizeof(u32));
+ /*
+ * FIXME
+ * This should be done in ocfs2_journal_init(), but any inode
+ * writes back operation will cause the filesystem to crash.
+ */
+ status = ocfs2_journal_alloc(osb);
+ if (status < 0)
+ goto out_orphan_wipes;
+
INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs);
init_llist_head(&osb->dquot_drop_list);
@@ -2208,7 +2221,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
mlog(ML_ERROR, "Volume has invalid cluster size (%d)\n",
osb->s_clustersize);
status = -EINVAL;
- goto bail;
+ goto out_journal;
}
total_blocks = ocfs2_clusters_to_blocks(osb->sb,
@@ -2220,14 +2233,14 @@ static int ocfs2_initialize_super(struct super_block *sb,
mlog(ML_ERROR, "Volume too large "
"to mount safely on this system");
status = -EFBIG;
- goto bail;
+ goto out_journal;
}
if (ocfs2_setup_osb_uuid(osb, di->id2.i_super.s_uuid,
sizeof(di->id2.i_super.s_uuid))) {
mlog(ML_ERROR, "Out of memory trying to setup our uuid.\n");
status = -ENOMEM;
- goto bail;
+ goto out_journal;
}
strlcpy(osb->vol_label, di->id2.i_super.s_label,
@@ -2247,7 +2260,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
if (!osb->osb_dlm_debug) {
status = -ENOMEM;
mlog_errno(status);
- goto bail;
+ goto out_uuid_str;
}
atomic_set(&osb->vol_state, VOLUME_INIT);
@@ -2256,7 +2269,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
status = ocfs2_init_global_system_inodes(osb);
if (status < 0) {
mlog_errno(status);
- goto bail;
+ goto out_dlm_out;
}
/*
@@ -2267,7 +2280,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
if (!inode) {
status = -EINVAL;
mlog_errno(status);
- goto bail;
+ goto out_system_inodes;
}
osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno;
@@ -2280,16 +2293,39 @@ static int ocfs2_initialize_super(struct super_block *sb,
status = ocfs2_init_slot_info(osb);
if (status < 0) {
mlog_errno(status);
- goto bail;
+ goto out_system_inodes;
}
osb->ocfs2_wq = alloc_ordered_workqueue("ocfs2_wq", WQ_MEM_RECLAIM);
if (!osb->ocfs2_wq) {
status = -ENOMEM;
mlog_errno(status);
+ goto out_slot_info;
}
-bail:
+ return status;
+
+out_slot_info:
+ ocfs2_free_slot_info(osb);
+out_system_inodes:
+ ocfs2_release_system_inodes(osb);
+out_dlm_out:
+ ocfs2_put_dlm_debug(osb->osb_dlm_debug);
+out_uuid_str:
+ kfree(osb->uuid_str);
+out_journal:
+ kfree(osb->journal);
+out_orphan_wipes:
+ kfree(osb->osb_orphan_wipes);
+out_slot_recovery_gen:
+ kfree(osb->slot_recovery_generations);
+out_vol_label:
+ kfree(osb->vol_label);
+out_recovery_map:
+ kfree(osb->recovery_map);
+out:
+ kfree(osb);
+ sb->s_fs_info = NULL;
return status;
}
@@ -2483,6 +2519,12 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
kfree(osb->osb_orphan_wipes);
kfree(osb->slot_recovery_generations);
+ /* FIXME
+ * This belongs in journal shutdown, but because we have to
+ * allocate osb->journal at the middle of ocfs2_initialize_super(),
+ * we free it here.
+ */
+ kfree(osb->journal);
kfree(osb->local_alloc_copy);
kfree(osb->uuid_str);
kfree(osb->vol_label);
diff --git a/fs/open.c b/fs/open.c
index 1315253e0247..be849dcca032 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -834,16 +834,15 @@ static int do_dentry_open(struct file *f,
if ((f->f_mode & FMODE_WRITE) &&
likely(f->f_op->write || f->f_op->write_iter))
f->f_mode |= FMODE_CAN_WRITE;
+ if (f->f_mapping->a_ops && f->f_mapping->a_ops->direct_IO)
+ f->f_mode |= FMODE_CAN_ODIRECT;
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
- /* NB: we're sure to have correct a_ops only after f_op->open */
- if (f->f_flags & O_DIRECT) {
- if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
- return -EINVAL;
- }
+ if ((f->f_flags & O_DIRECT) && !(f->f_mode & FMODE_CAN_ODIRECT))
+ return -EINVAL;
/*
* XXX: Huge page cache doesn't support writing yet. Drop all page
@@ -981,6 +980,48 @@ struct file *dentry_open(const struct path *path, int flags,
}
EXPORT_SYMBOL(dentry_open);
+/**
+ * dentry_create - Create and open a file
+ * @path: path to create
+ * @flags: O_ flags
+ * @mode: mode bits for new file
+ * @cred: credentials to use
+ *
+ * Caller must hold the parent directory's lock, and have prepared
+ * a negative dentry, placed in @path->dentry, for the new file.
+ *
+ * Caller sets @path->mnt to the vfsmount of the filesystem where
+ * the new file is to be created. The parent directory and the
+ * negative dentry must reside on the same filesystem instance.
+ *
+ * On success, returns a "struct file *". Otherwise a ERR_PTR
+ * is returned.
+ */
+struct file *dentry_create(const struct path *path, int flags, umode_t mode,
+ const struct cred *cred)
+{
+ struct file *f;
+ int error;
+
+ validate_creds(cred);
+ f = alloc_empty_file(flags, cred);
+ if (IS_ERR(f))
+ return f;
+
+ error = vfs_create(mnt_user_ns(path->mnt),
+ d_inode(path->dentry->d_parent),
+ path->dentry, mode, true);
+ if (!error)
+ error = vfs_open(path, f);
+
+ if (unlikely(error)) {
+ fput(f);
+ return ERR_PTR(error);
+ }
+ return f;
+}
+EXPORT_SYMBOL(dentry_create);
+
struct file *open_with_fake_path(const struct path *path, int flags,
struct inode *inode, const struct cred *cred)
{
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index fa125feed0ff..9d69b4dbb8c4 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -82,11 +82,8 @@ static int ovl_change_flags(struct file *file, unsigned int flags)
if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
return -EPERM;
- if (flags & O_DIRECT) {
- if (!file->f_mapping->a_ops ||
- !file->f_mapping->a_ops->direct_IO)
- return -EINVAL;
- }
+ if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT))
+ return -EINVAL;
if (file->f_op->check_flags) {
err = file->f_op->check_flags(flags);
@@ -306,8 +303,7 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
ret = -EINVAL;
if (iocb->ki_flags & IOCB_DIRECT &&
- (!real.file->f_mapping->a_ops ||
- !real.file->f_mapping->a_ops->direct_IO))
+ !(real.file->f_mode & FMODE_CAN_ODIRECT))
goto out_fdput;
old_cred = ovl_override_creds(file_inode(file)->i_sb);
@@ -367,8 +363,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
ret = -EINVAL;
if (iocb->ki_flags & IOCB_DIRECT &&
- (!real.file->f_mapping->a_ops ||
- !real.file->f_mapping->a_ops->direct_IO))
+ !(real.file->f_mode & FMODE_CAN_ODIRECT))
goto out_fdput;
if (!ovl_should_sync(OVL_FS(inode->i_sb)))
diff --git a/fs/pipe.c b/fs/pipe.c
index e140ea150bbb..74ae9fafd25a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -653,7 +653,7 @@ pipe_poll(struct file *filp, poll_table *wait)
unsigned int head, tail;
/* Epoll has some historical nasty semantics, this enables them */
- pipe->poll_usage = 1;
+ WRITE_ONCE(pipe->poll_usage, true);
/*
* Reading pipe state only -- no need for acquiring the semaphore.
@@ -1245,30 +1245,33 @@ unsigned int round_pipe_size(unsigned long size)
/*
* Resize the pipe ring to a number of slots.
+ *
+ * Note the pipe can be reduced in capacity, but only if the current
+ * occupancy doesn't exceed nr_slots; if it does, EBUSY will be
+ * returned instead.
*/
int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
{
struct pipe_buffer *bufs;
unsigned int head, tail, mask, n;
- /*
- * We can shrink the pipe, if arg is greater than the ring occupancy.
- * Since we don't expect a lot of shrink+grow operations, just free and
- * allocate again like we would do for growing. If the pipe currently
- * contains more buffers than arg, then return busy.
- */
- mask = pipe->ring_size - 1;
- head = pipe->head;
- tail = pipe->tail;
- n = pipe_occupancy(pipe->head, pipe->tail);
- if (nr_slots < n)
- return -EBUSY;
-
bufs = kcalloc(nr_slots, sizeof(*bufs),
GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (unlikely(!bufs))
return -ENOMEM;
+ spin_lock_irq(&pipe->rd_wait.lock);
+ mask = pipe->ring_size - 1;
+ head = pipe->head;
+ tail = pipe->tail;
+
+ n = pipe_occupancy(head, tail);
+ if (nr_slots < n) {
+ spin_unlock_irq(&pipe->rd_wait.lock);
+ kfree(bufs);
+ return -EBUSY;
+ }
+
/*
* The pipe array wraps around, so just start the new one at zero
* and adjust the indices.
@@ -1300,6 +1303,8 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
pipe->tail = tail;
pipe->head = head;
+ spin_unlock_irq(&pipe->rd_wait.lock);
+
/* This might have made more room for writers */
wake_up_interruptible(&pipe->wr_wait);
return 0;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c1031843cc6a..8dfa36a99c74 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3154,6 +3154,22 @@ static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns,
}
#endif /* CONFIG_LIVEPATCH */
+#ifdef CONFIG_KSM
+static int proc_pid_ksm_merging_pages(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
+{
+ struct mm_struct *mm;
+
+ mm = get_task_mm(task);
+ if (mm) {
+ seq_printf(m, "%lu\n", mm->ksm_merging_pages);
+ mmput(mm);
+ }
+
+ return 0;
+}
+#endif /* CONFIG_KSM */
+
#ifdef CONFIG_STACKLEAK_METRICS
static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
@@ -3285,6 +3301,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_SECCOMP_CACHE_DEBUG
ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
#endif
+#ifdef CONFIG_KSM
+ ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages),
+#endif
};
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3618,6 +3637,9 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_SECCOMP_CACHE_DEBUG
ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
#endif
+#ifdef CONFIG_KSM
+ ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages),
+#endif
};
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index f2132407e133..587b91d9d998 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -448,6 +448,9 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
proc_set_user(ent, (*parent)->uid, (*parent)->gid);
ent->proc_dops = &proc_misc_dentry_ops;
+ /* Revalidate everything under /proc/${pid}/net */
+ if ((*parent)->proc_dops == &proc_net_dentry_ops)
+ pde_force_lookup(ent);
out:
return ent;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 982e694aae77..dff921f7ca33 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -479,10 +479,15 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
* the previous entry, search for a matching entry.
*/
if (!m || start < m->addr || start >= m->addr + m->size) {
- list_for_each_entry(m, &kclist_head, list) {
- if (start >= m->addr &&
- start < m->addr + m->size)
+ struct kcore_list *iter;
+
+ m = NULL;
+ list_for_each_entry(iter, &kclist_head, list) {
+ if (start >= iter->addr &&
+ start < iter->addr + iter->size) {
+ m = iter;
break;
+ }
}
}
@@ -492,12 +497,11 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
page_offline_freeze();
}
- if (&m->list == &kclist_head) {
+ if (!m) {
if (clear_user(buffer, tsz)) {
ret = -EFAULT;
goto out;
}
- m = NULL; /* skip the list anchor */
goto skip;
}
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 6fa761c9cc78..6e89f0e2fd20 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -86,6 +86,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SwapTotal: ", i.totalswap);
show_val_kb(m, "SwapFree: ", i.freeswap);
+#ifdef CONFIG_ZSWAP
+ seq_printf(m, "Zswap: %8lu kB\n",
+ (unsigned long)(zswap_pool_total_size >> 10));
+ seq_printf(m, "Zswapped: %8lu kB\n",
+ (unsigned long)atomic_read(&zswap_stored_pages) <<
+ (PAGE_SHIFT - 10));
+#endif
show_val_kb(m, "Dirty: ",
global_node_page_state(NR_FILE_DIRTY));
show_val_kb(m, "Writeback: ",
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index e1cfeda397f3..913e5acefbb6 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -376,6 +376,9 @@ static __net_init int proc_net_ns_init(struct net *net)
proc_set_user(netd, uid, gid);
+ /* Seed dentry revalidation for /proc/${pid}/net */
+ pde_force_lookup(netd);
+
err = -EEXIST;
net_statd = proc_net_mkdir(net, "stat", netd);
if (!net_statd)
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index c29f39c01a9a..021e83fe831f 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -19,6 +19,9 @@
#include <linux/kmemleak.h>
#include "internal.h"
+#define list_for_each_table_entry(entry, table) \
+ for ((entry) = (table); (entry)->procname; (entry)++)
+
static const struct dentry_operations proc_sys_dentry_operations;
static const struct file_operations proc_sys_file_operations;
static const struct inode_operations proc_sys_inode_operations;
@@ -215,15 +218,19 @@ static void init_header(struct ctl_table_header *head,
INIT_HLIST_HEAD(&head->inodes);
if (node) {
struct ctl_table *entry;
- for (entry = table; entry->procname; entry++, node++)
+
+ list_for_each_table_entry(entry, table) {
node->header = head;
+ node++;
+ }
}
}
static void erase_header(struct ctl_table_header *head)
{
struct ctl_table *entry;
- for (entry = head->ctl_table; entry->procname; entry++)
+
+ list_for_each_table_entry(entry, head->ctl_table)
erase_entry(head, entry);
}
@@ -248,7 +255,7 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header)
err = insert_links(header);
if (err)
goto fail_links;
- for (entry = header->ctl_table; entry->procname; entry++) {
+ list_for_each_table_entry(entry, header->ctl_table) {
err = insert_entry(header, entry);
if (err)
goto fail;
@@ -978,7 +985,6 @@ static struct ctl_dir *new_dir(struct ctl_table_set *set,
table = (struct ctl_table *)(node + 1);
new_name = (char *)(table + 2);
memcpy(new_name, name, namelen);
- new_name[namelen] = '\0';
table[0].procname = new_name;
table[0].mode = S_IFDIR|S_IRUGO|S_IXUGO;
init_header(&new->header, set->dir.header.root, set, node, table);
@@ -1130,35 +1136,36 @@ static int sysctl_check_table_array(const char *path, struct ctl_table *table)
static int sysctl_check_table(const char *path, struct ctl_table *table)
{
+ struct ctl_table *entry;
int err = 0;
- for (; table->procname; table++) {
- if (table->child)
- err |= sysctl_err(path, table, "Not a file");
+ list_for_each_table_entry(entry, table) {
+ if (entry->child)
+ err |= sysctl_err(path, entry, "Not a file");
- if ((table->proc_handler == proc_dostring) ||
- (table->proc_handler == proc_dointvec) ||
- (table->proc_handler == proc_douintvec) ||
- (table->proc_handler == proc_douintvec_minmax) ||
- (table->proc_handler == proc_dointvec_minmax) ||
- (table->proc_handler == proc_dou8vec_minmax) ||
- (table->proc_handler == proc_dointvec_jiffies) ||
- (table->proc_handler == proc_dointvec_userhz_jiffies) ||
- (table->proc_handler == proc_dointvec_ms_jiffies) ||
- (table->proc_handler == proc_doulongvec_minmax) ||
- (table->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
- if (!table->data)
- err |= sysctl_err(path, table, "No data");
- if (!table->maxlen)
- err |= sysctl_err(path, table, "No maxlen");
+ if ((entry->proc_handler == proc_dostring) ||
+ (entry->proc_handler == proc_dointvec) ||
+ (entry->proc_handler == proc_douintvec) ||
+ (entry->proc_handler == proc_douintvec_minmax) ||
+ (entry->proc_handler == proc_dointvec_minmax) ||
+ (entry->proc_handler == proc_dou8vec_minmax) ||
+ (entry->proc_handler == proc_dointvec_jiffies) ||
+ (entry->proc_handler == proc_dointvec_userhz_jiffies) ||
+ (entry->proc_handler == proc_dointvec_ms_jiffies) ||
+ (entry->proc_handler == proc_doulongvec_minmax) ||
+ (entry->proc_handler == proc_doulongvec_ms_jiffies_minmax)) {
+ if (!entry->data)
+ err |= sysctl_err(path, entry, "No data");
+ if (!entry->maxlen)
+ err |= sysctl_err(path, entry, "No maxlen");
else
- err |= sysctl_check_table_array(path, table);
+ err |= sysctl_check_table_array(path, entry);
}
- if (!table->proc_handler)
- err |= sysctl_err(path, table, "No proc_handler");
+ if (!entry->proc_handler)
+ err |= sysctl_err(path, entry, "No proc_handler");
- if ((table->mode & (S_IRUGO|S_IWUGO)) != table->mode)
- err |= sysctl_err(path, table, "bogus .mode 0%o",
- table->mode);
+ if ((entry->mode & (S_IRUGO|S_IWUGO)) != entry->mode)
+ err |= sysctl_err(path, entry, "bogus .mode 0%o",
+ entry->mode);
}
return err;
}
@@ -1174,7 +1181,7 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table
name_bytes = 0;
nr_entries = 0;
- for (entry = table; entry->procname; entry++) {
+ list_for_each_table_entry(entry, table) {
nr_entries++;
name_bytes += strlen(entry->procname) + 1;
}
@@ -1191,14 +1198,16 @@ static struct ctl_table_header *new_links(struct ctl_dir *dir, struct ctl_table
node = (struct ctl_node *)(links + 1);
link_table = (struct ctl_table *)(node + nr_entries);
link_name = (char *)&link_table[nr_entries + 1];
+ link = link_table;
- for (link = link_table, entry = table; entry->procname; link++, entry++) {
+ list_for_each_table_entry(entry, table) {
int len = strlen(entry->procname) + 1;
memcpy(link_name, entry->procname, len);
link->procname = link_name;
link->mode = S_IFLNK|S_IRWXUGO;
link->data = link_root;
link_name += len;
+ link++;
}
init_header(links, dir->header.root, dir->header.set, node, link_table);
links->nreg = nr_entries;
@@ -1213,7 +1222,7 @@ static bool get_links(struct ctl_dir *dir,
struct ctl_table *entry, *link;
/* Are there links available for every entry in table? */
- for (entry = table; entry->procname; entry++) {
+ list_for_each_table_entry(entry, table) {
const char *procname = entry->procname;
link = find_entry(&head, dir, procname, strlen(procname));
if (!link)
@@ -1226,7 +1235,7 @@ static bool get_links(struct ctl_dir *dir,
}
/* The checks passed. Increase the registration count on the links */
- for (entry = table; entry->procname; entry++) {
+ list_for_each_table_entry(entry, table) {
const char *procname = entry->procname;
link = find_entry(&head, dir, procname, strlen(procname));
head->nreg++;
@@ -1329,7 +1338,7 @@ struct ctl_table_header *__register_sysctl_table(
struct ctl_node *node;
int nr_entries = 0;
- for (entry = table; entry->procname; entry++)
+ list_for_each_table_entry(entry, table)
nr_entries++;
header = kzalloc(sizeof(struct ctl_table_header) +
@@ -1456,7 +1465,7 @@ static int count_subheaders(struct ctl_table *table)
if (!table || !table->procname)
return 1;
- for (entry = table; entry->procname; entry++) {
+ list_for_each_table_entry(entry, table) {
if (entry->child)
nr_subheaders += count_subheaders(entry->child);
else
@@ -1475,7 +1484,7 @@ static int register_leaf_sysctl_tables(const char *path, char *pos,
int nr_dirs = 0;
int err = -ENOMEM;
- for (entry = table; entry->procname; entry++) {
+ list_for_each_table_entry(entry, table) {
if (entry->child)
nr_dirs++;
else
@@ -1492,7 +1501,9 @@ static int register_leaf_sysctl_tables(const char *path, char *pos,
goto out;
ctl_table_arg = files;
- for (new = files, entry = table; entry->procname; entry++) {
+ new = files;
+
+ list_for_each_table_entry(entry, table) {
if (entry->child)
continue;
*new = *entry;
@@ -1516,7 +1527,7 @@ static int register_leaf_sysctl_tables(const char *path, char *pos,
}
/* Recurse into the subdirectories. */
- for (entry = table; entry->procname; entry++) {
+ list_for_each_table_entry(entry, table) {
char *child_pos;
if (!entry->child)
@@ -1670,7 +1681,7 @@ static void put_links(struct ctl_table_header *header)
if (IS_ERR(core_parent))
return;
- for (entry = header->ctl_table; entry->procname; entry++) {
+ list_for_each_table_entry(entry, header->ctl_table) {
struct ctl_table_header *link_head;
struct ctl_table *link;
const char *name = entry->procname;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f46060eb91b5..2d04e3470d4c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1421,6 +1421,8 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
migration = is_migration_entry(entry);
if (is_pfn_swap_entry(entry))
page = pfn_swap_entry_to_page(entry);
+ if (pte_marker_entry_uffd_wp(entry))
+ flags |= PM_UFFD_WP;
}
if (page && !PageAnon(page))
@@ -1556,10 +1558,15 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
if (page_mapcount(page) == 1)
flags |= PM_MMAP_EXCLUSIVE;
+ if (huge_pte_uffd_wp(pte))
+ flags |= PM_UFFD_WP;
+
flags |= PM_PRESENT;
if (pm->show_pfn)
frame = pte_pfn(pte) +
((addr & ~hmask) >> PAGE_SHIFT);
+ } else if (pte_swp_uffd_wp_any(pte)) {
+ flags |= PM_UFFD_WP;
}
for (; addr != end; addr += PAGE_SIZE) {
@@ -1873,8 +1880,6 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
return 0;
page = pte_page(huge_pte);
- if (!page)
- return 0;
md = walk->private;
gather_stats(page, md, pte_dirty(huge_pte), 1);
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 6f1b8ddc6f7a..4eaeb645e759 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -26,6 +26,7 @@
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/uaccess.h>
+#include <linux/uio.h>
#include <linux/cc_platform.h>
#include <asm/io.h>
#include "internal.h"
@@ -128,9 +129,8 @@ static int open_vmcore(struct inode *inode, struct file *file)
}
/* Reads a page from the oldmem device from given offset. */
-ssize_t read_from_oldmem(char *buf, size_t count,
- u64 *ppos, int userbuf,
- bool encrypted)
+ssize_t read_from_oldmem(struct iov_iter *iter, size_t count,
+ u64 *ppos, bool encrypted)
{
unsigned long pfn, offset;
size_t nr_bytes;
@@ -152,29 +152,23 @@ ssize_t read_from_oldmem(char *buf, size_t count,
/* If pfn is not ram, return zeros for sparse dump files */
if (!pfn_is_ram(pfn)) {
- tmp = 0;
- if (!userbuf)
- memset(buf, 0, nr_bytes);
- else if (clear_user(buf, nr_bytes))
- tmp = -EFAULT;
+ tmp = iov_iter_zero(nr_bytes, iter);
} else {
if (encrypted)
- tmp = copy_oldmem_page_encrypted(pfn, buf,
+ tmp = copy_oldmem_page_encrypted(iter, pfn,
nr_bytes,
- offset,
- userbuf);
+ offset);
else
- tmp = copy_oldmem_page(pfn, buf, nr_bytes,
- offset, userbuf);
+ tmp = copy_oldmem_page(iter, pfn, nr_bytes,
+ offset);
}
- if (tmp < 0) {
+ if (tmp < nr_bytes) {
srcu_read_unlock(&vmcore_cb_srcu, idx);
- return tmp;
+ return -EFAULT;
}
*ppos += nr_bytes;
count -= nr_bytes;
- buf += nr_bytes;
read += nr_bytes;
++pfn;
offset = 0;
@@ -203,7 +197,12 @@ void __weak elfcorehdr_free(unsigned long long addr)
*/
ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
{
- return read_from_oldmem(buf, count, ppos, 0, false);
+ struct kvec kvec = { .iov_base = buf, .iov_len = count };
+ struct iov_iter iter;
+
+ iov_iter_kvec(&iter, READ, &kvec, 1, count);
+
+ return read_from_oldmem(&iter, count, ppos, false);
}
/*
@@ -211,7 +210,13 @@ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
*/
ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
{
- return read_from_oldmem(buf, count, ppos, 0, cc_platform_has(CC_ATTR_MEM_ENCRYPT));
+ struct kvec kvec = { .iov_base = buf, .iov_len = count };
+ struct iov_iter iter;
+
+ iov_iter_kvec(&iter, READ, &kvec, 1, count);
+
+ return read_from_oldmem(&iter, count, ppos,
+ cc_platform_has(CC_ATTR_MEM_ENCRYPT));
}
/*
@@ -228,29 +233,14 @@ int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma,
/*
* Architectures which support memory encryption override this.
*/
-ssize_t __weak
-copy_oldmem_page_encrypted(unsigned long pfn, char *buf, size_t csize,
- unsigned long offset, int userbuf)
+ssize_t __weak copy_oldmem_page_encrypted(struct iov_iter *iter,
+ unsigned long pfn, size_t csize, unsigned long offset)
{
- return copy_oldmem_page(pfn, buf, csize, offset, userbuf);
-}
-
-/*
- * Copy to either kernel or user space
- */
-static int copy_to(void *target, void *src, size_t size, int userbuf)
-{
- if (userbuf) {
- if (copy_to_user((char __user *) target, src, size))
- return -EFAULT;
- } else {
- memcpy(target, src, size);
- }
- return 0;
+ return copy_oldmem_page(iter, pfn, csize, offset);
}
#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
-static int vmcoredd_copy_dumps(void *dst, u64 start, size_t size, int userbuf)
+static int vmcoredd_copy_dumps(struct iov_iter *iter, u64 start, size_t size)
{
struct vmcoredd_node *dump;
u64 offset = 0;
@@ -263,14 +253,13 @@ static int vmcoredd_copy_dumps(void *dst, u64 start, size_t size, int userbuf)
if (start < offset + dump->size) {
tsz = min(offset + (u64)dump->size - start, (u64)size);
buf = dump->buf + start - offset;
- if (copy_to(dst, buf, tsz, userbuf)) {
+ if (copy_to_iter(buf, tsz, iter) < tsz) {
ret = -EFAULT;
goto out_unlock;
}
size -= tsz;
start += tsz;
- dst += tsz;
/* Leave now if buffer filled already */
if (!size)
@@ -326,33 +315,28 @@ out_unlock:
/* Read from the ELF header and then the crash dump. On error, negative value is
* returned otherwise number of bytes read are returned.
*/
-static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
- int userbuf)
+static ssize_t __read_vmcore(struct iov_iter *iter, loff_t *fpos)
{
ssize_t acc = 0, tmp;
size_t tsz;
u64 start;
struct vmcore *m = NULL;
- if (buflen == 0 || *fpos >= vmcore_size)
+ if (!iov_iter_count(iter) || *fpos >= vmcore_size)
return 0;
- /* trim buflen to not go beyond EOF */
- if (buflen > vmcore_size - *fpos)
- buflen = vmcore_size - *fpos;
+ iov_iter_truncate(iter, vmcore_size - *fpos);
/* Read ELF core header */
if (*fpos < elfcorebuf_sz) {
- tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen);
- if (copy_to(buffer, elfcorebuf + *fpos, tsz, userbuf))
+ tsz = min(elfcorebuf_sz - (size_t)*fpos, iov_iter_count(iter));
+ if (copy_to_iter(elfcorebuf + *fpos, tsz, iter) < tsz)
return -EFAULT;
- buflen -= tsz;
*fpos += tsz;
- buffer += tsz;
acc += tsz;
/* leave now if filled buffer already */
- if (buflen == 0)
+ if (!iov_iter_count(iter))
return acc;
}
@@ -373,35 +357,32 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
/* Read device dumps */
if (*fpos < elfcorebuf_sz + vmcoredd_orig_sz) {
tsz = min(elfcorebuf_sz + vmcoredd_orig_sz -
- (size_t)*fpos, buflen);
+ (size_t)*fpos, iov_iter_count(iter));
start = *fpos - elfcorebuf_sz;
- if (vmcoredd_copy_dumps(buffer, start, tsz, userbuf))
+ if (vmcoredd_copy_dumps(iter, start, tsz))
return -EFAULT;
- buflen -= tsz;
*fpos += tsz;
- buffer += tsz;
acc += tsz;
/* leave now if filled buffer already */
- if (!buflen)
+ if (!iov_iter_count(iter))
return acc;
}
#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
/* Read remaining elf notes */
- tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen);
+ tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos,
+ iov_iter_count(iter));
kaddr = elfnotes_buf + *fpos - elfcorebuf_sz - vmcoredd_orig_sz;
- if (copy_to(buffer, kaddr, tsz, userbuf))
+ if (copy_to_iter(kaddr, tsz, iter) < tsz)
return -EFAULT;
- buflen -= tsz;
*fpos += tsz;
- buffer += tsz;
acc += tsz;
/* leave now if filled buffer already */
- if (buflen == 0)
+ if (!iov_iter_count(iter))
return acc;
}
@@ -409,19 +390,17 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
if (*fpos < m->offset + m->size) {
tsz = (size_t)min_t(unsigned long long,
m->offset + m->size - *fpos,
- buflen);
+ iov_iter_count(iter));
start = m->paddr + *fpos - m->offset;
- tmp = read_from_oldmem(buffer, tsz, &start,
- userbuf, cc_platform_has(CC_ATTR_MEM_ENCRYPT));
+ tmp = read_from_oldmem(iter, tsz, &start,
+ cc_platform_has(CC_ATTR_MEM_ENCRYPT));
if (tmp < 0)
return tmp;
- buflen -= tsz;
*fpos += tsz;
- buffer += tsz;
acc += tsz;
/* leave now if filled buffer already */
- if (buflen == 0)
+ if (!iov_iter_count(iter))
return acc;
}
}
@@ -429,15 +408,14 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
return acc;
}
-static ssize_t read_vmcore(struct file *file, char __user *buffer,
- size_t buflen, loff_t *fpos)
+static ssize_t read_vmcore(struct kiocb *iocb, struct iov_iter *iter)
{
- return __read_vmcore((__force char *) buffer, buflen, fpos, 1);
+ return __read_vmcore(iter, &iocb->ki_pos);
}
/*
* The vmcore fault handler uses the page cache and fills data using the
- * standard __vmcore_read() function.
+ * standard __read_vmcore() function.
*
* On s390 the fault handler is used for memory regions that can't be mapped
* directly with remap_pfn_range().
@@ -447,9 +425,10 @@ static vm_fault_t mmap_vmcore_fault(struct vm_fault *vmf)
#ifdef CONFIG_S390
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
pgoff_t index = vmf->pgoff;
+ struct iov_iter iter;
+ struct kvec kvec;
struct page *page;
loff_t offset;
- char *buf;
int rc;
page = find_or_create_page(mapping, index, GFP_KERNEL);
@@ -457,8 +436,11 @@ static vm_fault_t mmap_vmcore_fault(struct vm_fault *vmf)
return VM_FAULT_OOM;
if (!PageUptodate(page)) {
offset = (loff_t) index << PAGE_SHIFT;
- buf = __va((page_to_pfn(page) << PAGE_SHIFT));
- rc = __read_vmcore(buf, PAGE_SIZE, &offset, 0);
+ kvec.iov_base = page_address(page);
+ kvec.iov_len = PAGE_SIZE;
+ iov_iter_kvec(&iter, READ, &kvec, 1, PAGE_SIZE);
+
+ rc = __read_vmcore(&iter, &offset);
if (rc < 0) {
unlock_page(page);
put_page(page);
@@ -708,7 +690,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
static const struct proc_ops vmcore_proc_ops = {
.proc_open = open_vmcore,
- .proc_read = read_vmcore,
+ .proc_read_iter = read_vmcore,
.proc_lseek = default_llseek,
.proc_mmap = mmap_vmcore,
};
diff --git a/fs/smbfs_common/smb2pdu.h b/fs/smbfs_common/smb2pdu.h
index 0507aecfc669..2cab413fffee 100644
--- a/fs/smbfs_common/smb2pdu.h
+++ b/fs/smbfs_common/smb2pdu.h
@@ -1244,6 +1244,106 @@ struct file_zero_data_information {
__le64 BeyondFinalZero;
} __packed;
+/* See MS-FSCC 2.3.7 */
+struct duplicate_extents_to_file {
+ __u64 PersistentFileHandle; /* source file handle, opaque endianness */
+ __u64 VolatileFileHandle;
+ __le64 SourceFileOffset;
+ __le64 TargetFileOffset;
+ __le64 ByteCount; /* Bytes to be copied */
+} __packed;
+
+/* See MS-FSCC 2.3.8 */
+#define DUPLICATE_EXTENTS_DATA_EX_SOURCE_ATOMIC 0x00000001
+struct duplicate_extents_to_file_ex {
+ __u64 PersistentFileHandle; /* source file handle, opaque endianness */
+ __u64 VolatileFileHandle;
+ __le64 SourceFileOffset;
+ __le64 TargetFileOffset;
+ __le64 ByteCount; /* Bytes to be copied */
+ __le32 Flags;
+ __le32 Reserved;
+} __packed;
+
+
+/* See MS-FSCC 2.3.20 */
+struct fsctl_get_integrity_information_rsp {
+ __le16 ChecksumAlgorithm;
+ __le16 Reserved;
+ __le32 Flags;
+ __le32 ChecksumChunkSizeInBytes;
+ __le32 ClusterSizeInBytes;
+} __packed;
+
+/* See MS-FSCC 2.3.55 */
+struct fsctl_query_file_regions_req {
+ __le64 FileOffset;
+ __le64 Length;
+ __le32 DesiredUsage;
+ __le32 Reserved;
+} __packed;
+
+/* DesiredUsage flags see MS-FSCC 2.3.56.1 */
+#define FILE_USAGE_INVALID_RANGE 0x00000000
+#define FILE_USAGE_VALID_CACHED_DATA 0x00000001
+#define FILE_USAGE_NONCACHED_DATA 0x00000002
+
+struct file_region_info {
+ __le64 FileOffset;
+ __le64 Length;
+ __le32 DesiredUsage;
+ __le32 Reserved;
+} __packed;
+
+/* See MS-FSCC 2.3.56 */
+struct fsctl_query_file_region_rsp {
+ __le32 Flags;
+ __le32 TotalRegionEntryCount;
+ __le32 RegionEntryCount;
+ __u32 Reserved;
+ struct file_region_info Regions[];
+} __packed;
+
+/* See MS-FSCC 2.3.58 */
+struct fsctl_query_on_disk_vol_info_rsp {
+ __le64 DirectoryCount;
+ __le64 FileCount;
+ __le16 FsFormatMajVersion;
+ __le16 FsFormatMinVersion;
+ __u8 FsFormatName[24];
+ __le64 FormatTime;
+ __le64 LastUpdateTime;
+ __u8 CopyrightInfo[68];
+ __u8 AbstractInfo[68];
+ __u8 FormatImplInfo[68];
+ __u8 LastModifyImplInfo[68];
+} __packed;
+
+/* See MS-FSCC 2.3.73 */
+struct fsctl_set_integrity_information_req {
+ __le16 ChecksumAlgorithm;
+ __le16 Reserved;
+ __le32 Flags;
+} __packed;
+
+/* See MS-FSCC 2.3.75 */
+struct fsctl_set_integrity_info_ex_req {
+ __u8 EnableIntegrity;
+ __u8 KeepState;
+ __u16 Reserved;
+ __le32 Flags;
+ __u8 Version;
+ __u8 Reserved2[7];
+} __packed;
+
+/* Integrity ChecksumAlgorithm choices for above */
+#define CHECKSUM_TYPE_NONE 0x0000
+#define CHECKSUM_TYPE_CRC64 0x0002
+#define CHECKSUM_TYPE_UNCHANGED 0xFFFF /* set only */
+
+/* Integrity flags for above */
+#define FSCTL_INTEGRITY_FLAG_CHECKSUM_ENFORCEMENT_OFF 0x00000001
+
/* Reparse structures - see MS-FSCC 2.1.2 */
/* struct fsctl_reparse_info_req is empty, only response structs (see below) */
@@ -1304,13 +1404,6 @@ struct validate_negotiate_info_rsp {
__le16 Dialect; /* Dialect in use for the connection */
} __packed;
-struct duplicate_extents_to_file {
- __u64 PersistentFileHandle; /* source file handle, opaque endianness */
- __u64 VolatileFileHandle;
- __le64 SourceFileOffset;
- __le64 TargetFileOffset;
- __le64 ByteCount; /* Bytes to be copied */
-} __packed;
/* Possible InfoType values */
#define SMB2_O_INFO_FILE 0x01
@@ -1419,6 +1512,7 @@ struct smb2_query_info_rsp {
* PDU query infolevel structure definitions
*/
+/* See MS-FSCC 2.3.52 */
struct file_allocated_range_buffer {
__le64 file_offset;
__le64 length;
diff --git a/fs/smbfs_common/smbfsctl.h b/fs/smbfs_common/smbfsctl.h
index d51939c43ad7..edd7fc2a7921 100644
--- a/fs/smbfs_common/smbfsctl.h
+++ b/fs/smbfs_common/smbfsctl.h
@@ -88,21 +88,27 @@
#define FSCTL_READ_RAW_ENCRYPTED 0x000900E3 /* BB add struct */
#define FSCTL_READ_FILE_USN_DATA 0x000900EB /* BB add struct */
#define FSCTL_WRITE_USN_CLOSE_RECORD 0x000900EF /* BB add struct */
+#define FSCTL_MARK_HANDLE 0x000900FC /* BB add struct */
#define FSCTL_SIS_COPYFILE 0x00090100 /* BB add struct */
#define FSCTL_RECALL_FILE 0x00090117 /* BB add struct */
#define FSCTL_QUERY_SPARING_INFO 0x00090138 /* BB add struct */
+#define FSCTL_QUERY_ON_DISK_VOLUME_INFO 0x0009013C
#define FSCTL_SET_ZERO_ON_DEALLOC 0x00090194 /* BB add struct */
#define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */
#define FSCTL_GET_INTEGRITY_INFORMATION 0x0009027C
+#define FSCTL_QUERY_FILE_REGIONS 0x00090284
#define FSCTL_GET_REFS_VOLUME_DATA 0x000902D8 /* See MS-FSCC 2.3.24 */
#define FSCTL_SET_INTEGRITY_INFORMATION_EXT 0x00090380
#define FSCTL_GET_RETRIEVAL_POINTERS_AND_REFCOUNT 0x000903d3
#define FSCTL_GET_RETRIEVAL_POINTER_COUNT 0x0009042b
#define FSCTL_REFS_STREAM_SNAPSHOT_MANAGEMENT 0x00090440
#define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF
+#define FSCTL_OFFLOAD_READ 0x00094264 /* BB add struct */
+#define FSCTL_OFFLOAD_WRITE 0x00098268 /* BB add struct */
#define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */
#define FSCTL_FILE_LEVEL_TRIM 0x00098208 /* BB add struct */
#define FSCTL_DUPLICATE_EXTENTS_TO_FILE 0x00098344
+#define FSCTL_DUPLICATE_EXTENTS_TO_FILE_EX 0x000983E8
#define FSCTL_SIS_LINK_FILES 0x0009C104
#define FSCTL_SET_INTEGRITY_INFORMATION 0x0009C280
#define FSCTL_PIPE_PEEK 0x0011400C /* BB add struct */
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index d1def0771a40..3365a30dc1e0 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -312,7 +312,9 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
sbi->s_firstinodezone = 2;
flavour_setup[sbi->s_type](sbi, &sb->s_max_links);
-
+ if (sbi->s_firstdatazone < sbi->s_firstinodezone)
+ return 0;
+
sbi->s_ndatazones = sbi->s_nzones - sbi->s_firstdatazone;
sbi->s_inodes_per_block = bsize >> 6;
sbi->s_inodes_per_block_1 = (bsize >> 6)-1;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index aa0c47cb0d16..e943370107d0 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -29,6 +29,7 @@
#include <linux/ioctl.h>
#include <linux/security.h>
#include <linux/hugetlb.h>
+#include <linux/swapops.h>
int sysctl_unprivileged_userfaultfd __read_mostly;
@@ -249,9 +250,10 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
/*
* Lockless access: we're in a wait_event so it's ok if it
- * changes under us.
+ * changes under us. PTE markers should be handled the same as none
+ * ptes here.
*/
- if (huge_pte_none(pte))
+ if (huge_pte_none_mostly(pte))
ret = true;
if (!huge_pte_write(pte) && (reason & VM_UFFD_WP))
ret = true;
@@ -330,9 +332,10 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
pte = pte_offset_map(pmd, address);
/*
* Lockless access: we're in a wait_event so it's ok if it
- * changes under us.
+ * changes under us. PTE markers should be handled the same as none
+ * ptes here.
*/
- if (pte_none(*pte))
+ if (pte_none_mostly(*pte))
ret = true;
if (!pte_write(*pte) && (reason & VM_UFFD_WP))
ret = true;
@@ -1255,24 +1258,6 @@ static __always_inline int validate_range(struct mm_struct *mm,
return 0;
}
-static inline bool vma_can_userfault(struct vm_area_struct *vma,
- unsigned long vm_flags)
-{
- /* FIXME: add WP support to hugetlbfs and shmem */
- if (vm_flags & VM_UFFD_WP) {
- if (is_vm_hugetlb_page(vma) || vma_is_shmem(vma))
- return false;
- }
-
- if (vm_flags & VM_UFFD_MINOR) {
- if (!(is_vm_hugetlb_page(vma) || vma_is_shmem(vma)))
- return false;
- }
-
- return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) ||
- vma_is_shmem(vma);
-}
-
static int userfaultfd_register(struct userfaultfd_ctx *ctx,
unsigned long arg)
{
@@ -1954,6 +1939,9 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP
uffdio_api.features &= ~UFFD_FEATURE_PAGEFAULT_FLAG_WP;
#endif
+#ifndef CONFIG_PTE_MARKER_UFFD_WP
+ uffdio_api.features &= ~UFFD_FEATURE_WP_HUGETLBFS_SHMEM;
+#endif
uffdio_api.ioctls = UFFD_API_IOCTLS;
ret = -EFAULT;
if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))